1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.james.transport.mailets;
21
22 import org.apache.mailet.base.GenericMailet;
23 import org.apache.mailet.Mail;
24 import org.apache.mailet.MailetException;
25
26 import javax.mail.Message;
27 import javax.mail.MessagingException;
28 import javax.mail.Multipart;
29 import javax.mail.Part;
30 import javax.mail.internet.ContentType;
31
32 import java.io.IOException;
33 import java.util.HashMap;
34
35
36
37
38
39
40
41
42
43 public class OnlyText extends GenericMailet {
44 private static final String PARAMETER_NAME_NOTEXT_PROCESSOR = "NoTextProcessor";
45
46 private String optionsNotextProcessor = null;
47 private final HashMap charMap = new HashMap();
48
49
50
51
52
53
54 public String getMailetInfo() {
55 return "OnlyText";
56 }
57
58 public void init() throws MailetException {
59 optionsNotextProcessor = getInitParameter(PARAMETER_NAME_NOTEXT_PROCESSOR);
60 initEntityTable();
61 }
62
63 private int[] process(Mail mail, Multipart mp, int found, int htmlPart, int stringPart) throws MessagingException, IOException {
64 for (int i = 0; found < 0 && i < mp.getCount(); i++) {
65 Object content = null;
66 try {
67 content = mp.getBodyPart(i).getContent();
68 } catch (java.io.UnsupportedEncodingException e) {
69 log("Caught error [" + e.getMessage() + "] in a text/plain part, skipping...");
70 }
71 if (content != null) {
72 if (mp.getBodyPart(i).isMimeType("text/plain")) {
73 setContentFromPart(mail.getMessage(), mp.getBodyPart(i), null, false);
74 found = 1;
75 }
76 else if (htmlPart == -1 && mp.getBodyPart(i).isMimeType("text/html"))
77 htmlPart = i;
78
79 else if (stringPart == -1 && content instanceof String)
80 stringPart = i;
81
82 else if (content instanceof Multipart) {
83 int[] res = process(mail, (Multipart) content, found, htmlPart, stringPart);
84 found = res[0];
85 htmlPart = res[1];
86 stringPart = res[2];
87 }
88 }
89 }
90
91 return new int[] {found, htmlPart, stringPart};
92
93 }
94
95 public void service(Mail mail) throws MailetException {
96 try {
97 Object content = mail.getMessage().getContent();
98 if (content instanceof Multipart) {
99 Multipart mp = (Multipart) content;
100
101 int found = -1;
102 int htmlPart = -1;
103 int stringPart = -1;
104 int[] res = process(mail, (Multipart) content, found, htmlPart, stringPart);
105 found = res[0];
106 htmlPart = res[1];
107 stringPart = res[2];
108
109 if (found < 0 && htmlPart != -1) {
110 setContentFromPart(mail.getMessage(), mp.getBodyPart(htmlPart), html2Text((String) mp.getBodyPart(htmlPart).getContent()), true);
111 found = 1;
112 }
113
114 if (found < 0 && stringPart != -1) {
115 setContentFromPart(mail.getMessage(), mp.getBodyPart(htmlPart), null, false);
116 found = 1;
117 }
118
119
120 if (found < 0 && optionsNotextProcessor != null) mail.setState(optionsNotextProcessor);
121
122 }
123
124 else if (!(content instanceof String) && optionsNotextProcessor != null) mail.setState(optionsNotextProcessor);
125
126 else if (mail.getMessage().isMimeType("text/html")) {
127 setContentFromPart(mail.getMessage(), mail.getMessage(), html2Text((String) mail.getMessage().getContent()), true);
128 }
129
130 } catch (IOException e) {
131 throw new MailetException("Failed fetching text part", e);
132
133 } catch (MessagingException e) {
134 throw new MailetException("Failed fetching text part", e);
135 }
136 }
137
138 private static void setContentFromPart(Message m, Part p, String newText, boolean setTextPlain) throws MessagingException, IOException {
139 String contentType = p.getContentType();
140 if (setTextPlain) {
141 ContentType ct = new ContentType(contentType);
142 ct.setPrimaryType("text");
143 ct.setSubType("plain");
144 contentType = ct.toString();
145 }
146 m.setContent(newText != null ? newText : p.getContent(), contentType);
147 String[] h = p.getHeader("Content-Transfer-Encoding");
148 if (h != null && h.length > 0) m.setHeader("Content-Transfer-Encoding", h[0]);
149 m.saveChanges();
150 }
151
152 public String html2Text(String html) {
153 return decodeEntities(html
154 .replaceAll("\\<([bB][rR]|[dD][lL])[ ]*[/]*[ ]*\\>", "\n")
155 .replaceAll("\\</([pP]|[hH]5|[dD][tT]|[dD][dD]|[dD][iI][vV])[ ]*\\>", "\n")
156 .replaceAll("\\<[lL][iI][ ]*[/]*[ ]*\\>", "\n* ")
157 .replaceAll("\\<[dD][dD][ ]*[/]*[ ]*\\>", " - ")
158 .replaceAll("\\<.*?\\>", ""));
159 }
160
161 public String decodeEntities(String data) {
162 StringBuffer buffer = new StringBuffer();
163 StringBuffer res = new StringBuffer();
164 int lastAmp = -1;
165 for (int i = 0; i < data.length(); i++) {
166 char c = data.charAt(i);
167
168 if (c == '&' && lastAmp == -1) lastAmp = buffer.length();
169 else if (c == ';' && (lastAmp > -1)) {
170 if (charMap.containsKey(buffer.toString())) res.append((String) charMap.get(buffer.toString()));
171 else res.append("&" + buffer.toString() + ";");
172 lastAmp = -1;
173 buffer = new StringBuffer();
174 }
175 else if (lastAmp == -1) res.append(c);
176 else buffer.append(c);
177 }
178 return res.toString();
179 }
180
181 private final void initEntityTable() {
182 for (int index = 11; index < 32; index++) charMap.put("#0" + index, String.valueOf((char) index));
183 for (int index = 32; index < 128; index++) charMap.put("#" + index, String.valueOf((char) index));
184 for (int index = 128; index < 256; index++) charMap.put("#" + index, String.valueOf((char) index));
185
186
187
188
189 charMap.put("#09", "\t");
190 charMap.put("#10", "\n");
191 charMap.put("#13", "\r");
192 charMap.put("#60", "<");
193 charMap.put("#62", ">");
194
195 charMap.put("lt", "<");
196 charMap.put("gt", ">");
197 charMap.put("amp", "&");
198 charMap.put("nbsp", " ");
199 charMap.put("quot", "\"");
200
201 charMap.put("iexcl", "\u00A1");
202 charMap.put("cent", "\u00A2");
203 charMap.put("pound", "\u00A3");
204 charMap.put("curren", "\u00A4");
205 charMap.put("yen", "\u00A5");
206 charMap.put("brvbar", "\u00A6");
207 charMap.put("sect", "\u00A7");
208 charMap.put("uml", "\u00A8");
209 charMap.put("copy", "\u00A9");
210 charMap.put("ordf", "\u00AA");
211 charMap.put("laquo", "\u00AB");
212 charMap.put("not", "\u00AC");
213 charMap.put("shy", "\u00AD");
214 charMap.put("reg", "\u00AE");
215 charMap.put("macr", "\u00AF");
216 charMap.put("deg", "\u00B0");
217 charMap.put("plusmn", "\u00B1");
218 charMap.put("sup2", "\u00B2");
219 charMap.put("sup3", "\u00B3");
220
221 charMap.put("acute", "\u00B4");
222 charMap.put("micro", "\u00B5");
223 charMap.put("para", "\u00B6");
224 charMap.put("middot", "\u00B7");
225 charMap.put("cedil", "\u00B8");
226 charMap.put("sup1", "\u00B9");
227 charMap.put("ordm", "\u00BA");
228 charMap.put("raquo", "\u00BB");
229 charMap.put("frac14", "\u00BC");
230 charMap.put("frac12", "\u00BD");
231 charMap.put("frac34", "\u00BE");
232 charMap.put("iquest", "\u00BF");
233
234 charMap.put("Agrave", "\u00C0");
235 charMap.put("Aacute", "\u00C1");
236 charMap.put("Acirc", "\u00C2");
237 charMap.put("Atilde", "\u00C3");
238 charMap.put("Auml", "\u00C4");
239 charMap.put("Aring", "\u00C5");
240 charMap.put("AElig", "\u00C6");
241 charMap.put("Ccedil", "\u00C7");
242 charMap.put("Egrave", "\u00C8");
243 charMap.put("Eacute", "\u00C9");
244 charMap.put("Ecirc", "\u00CA");
245 charMap.put("Euml", "\u00CB");
246 charMap.put("Igrave", "\u00CC");
247 charMap.put("Iacute", "\u00CD");
248 charMap.put("Icirc", "\u00CE");
249 charMap.put("Iuml", "\u00CF");
250
251 charMap.put("ETH", "\u00D0");
252 charMap.put("Ntilde", "\u00D1");
253 charMap.put("Ograve", "\u00D2");
254 charMap.put("Oacute", "\u00D3");
255 charMap.put("Ocirc", "\u00D4");
256 charMap.put("Otilde", "\u00D5");
257 charMap.put("Ouml", "\u00D6");
258 charMap.put("times", "\u00D7");
259 charMap.put("Oslash", "\u00D8");
260 charMap.put("Ugrave", "\u00D9");
261 charMap.put("Uacute", "\u00DA");
262 charMap.put("Ucirc", "\u00DB");
263 charMap.put("Uuml", "\u00DC");
264 charMap.put("Yacute", "\u00DD");
265 charMap.put("THORN", "\u00DE");
266 charMap.put("szlig", "\u00DF");
267
268 charMap.put("agrave", "\u00E0");
269 charMap.put("aacute", "\u00E1");
270 charMap.put("acirc", "\u00E2");
271 charMap.put("atilde", "\u00E3");
272 charMap.put("auml", "\u00E4");
273 charMap.put("aring", "\u00E5");
274 charMap.put("aelig", "\u00E6");
275 charMap.put("ccedil", "\u00E7");
276 charMap.put("egrave", "\u00E8");
277 charMap.put("eacute", "\u00E9");
278 charMap.put("ecirc", "\u00EA");
279 charMap.put("euml", "\u00EB");
280 charMap.put("igrave", "\u00EC");
281 charMap.put("iacute", "\u00ED");
282 charMap.put("icirc", "\u00EE");
283 charMap.put("iuml", "\u00EF");
284
285 charMap.put("eth", "\u00F0");
286 charMap.put("ntilde", "\u00F1");
287 charMap.put("ograve", "\u00F2");
288 charMap.put("oacute", "\u00F3");
289 charMap.put("ocirc", "\u00F4");
290 charMap.put("otilde", "\u00F5");
291 charMap.put("ouml", "\u00F6");
292 charMap.put("divid", "\u00F7");
293 charMap.put("oslash", "\u00F8");
294 charMap.put("ugrave", "\u00F9");
295 charMap.put("uacute", "\u00FA");
296 charMap.put("ucirc", "\u00FB");
297 charMap.put("uuml", "\u00FC");
298 charMap.put("yacute", "\u00FD");
299 charMap.put("thorn", "\u00FE");
300 charMap.put("yuml", "\u00FF");
301 charMap.put("euro", "\u0080");
302 }
303 }