1 /****************************************************************
2 * Licensed to the Apache Software Foundation (ASF) under one *
3 * or more contributor license agreements. See the NOTICE file *
4 * distributed with this work for additional information *
5 * regarding copyright ownership. The ASF licenses this file *
6 * to you under the Apache License, Version 2.0 (the *
7 * "License"); you may not use this file except in compliance *
8 * with the License. You may obtain a copy of the License at *
9 * *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, *
13 * software distributed under the License is distributed on an *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
15 * KIND, either express or implied. See the License for the *
16 * specific language governing permissions and limitations *
17 * under the License. *
18 ****************************************************************/
19
20 package org.apache.james.mime4j.util;
21
22 import java.text.DateFormat;
23 import java.text.FieldPosition;
24 import java.text.SimpleDateFormat;
25 import java.util.Date;
26 import java.util.GregorianCalendar;
27 import java.util.HashMap;
28 import java.util.Locale;
29 import java.util.Map;
30 import java.util.Random;
31 import java.util.TimeZone;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35
36 /**
37 * A utility class, which provides some MIME related application logic.
38 */
39 public final class MimeUtil {
40 private static final Log log = LogFactory.getLog(MimeUtil.class);
41
42 /**
43 * The <code>quoted-printable</code> encoding.
44 */
45 public static final String ENC_QUOTED_PRINTABLE = "quoted-printable";
46 /**
47 * The <code>binary</code> encoding.
48 */
49 public static final String ENC_BINARY = "binary";
50 /**
51 * The <code>base64</code> encoding.
52 */
53 public static final String ENC_BASE64 = "base64";
54 /**
55 * The <code>8bit</code> encoding.
56 */
57 public static final String ENC_8BIT = "8bit";
58 /**
59 * The <code>7bit</code> encoding.
60 */
61 public static final String ENC_7BIT = "7bit";
62
63 /** <code>MIME-Version</code> header name (lowercase) */
64 public static final String MIME_HEADER_MIME_VERSION = "mime-version";
65 /** <code>Content-ID</code> header name (lowercase) */
66 public static final String MIME_HEADER_CONTENT_ID = "content-id";
67 /** <code>Content-Description</code> header name (lowercase) */
68 public static final String MIME_HEADER_CONTENT_DESCRIPTION = "content-description";
69 /**
70 * <code>Content-Disposition</code> header name (lowercase).
71 * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>.
72 */
73 public static final String MIME_HEADER_CONTENT_DISPOSITION = "content-disposition";
74 /**
75 * <code>Content-Disposition</code> filename parameter (lowercase).
76 * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>.
77 */
78 public static final String PARAM_FILENAME = "filename";
79 /**
80 * <code>Content-Disposition</code> modification-date parameter (lowercase).
81 * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>.
82 */
83 public static final String PARAM_MODIFICATION_DATE = "modification-date";
84 /**
85 * <code>Content-Disposition</code> creation-date parameter (lowercase).
86 * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>.
87 */
88 public static final String PARAM_CREATION_DATE = "creation-date";
89 /**
90 * <code>Content-Disposition</code> read-date parameter (lowercase).
91 * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>.
92 */
93 public static final String PARAM_READ_DATE = "read-date";
94 /**
95 * <code>Content-Disposition</code> size parameter (lowercase).
96 * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>.
97 */
98 public static final String PARAM_SIZE = "size";
99 /**
100 * <code>Content-Langauge</code> header (lower case).
101 * See <a href='http://www.faqs.org/rfcs/rfc4646.html'>RFC4646</a>.
102 */
103 public static final String MIME_HEADER_LANGAUGE = "content-language";
104 /**
105 * <code>Content-Location</code> header (lower case).
106 * See <a href='http://www.faqs.org/rfcs/rfc2557.html'>RFC2557</a>.
107 */
108 public static final String MIME_HEADER_LOCATION = "content-location";
109 /**
110 * <code>Content-MD5</code> header (lower case).
111 * See <a href='http://www.faqs.org/rfcs/rfc1864.html'>RFC1864</a>.
112 */
113 public static final String MIME_HEADER_MD5 = "content-md5";
114
115 // used to create unique ids
116 private static final Random random = new Random();
117
118 // used to create unique ids
119 private static int counter = 0;
120
121 private MimeUtil() {
122 // this is an utility class to be used statically.
123 // this constructor protect from instantiation.
124 }
125
126 /**
127 * Returns, whether the given two MIME types are identical.
128 */
129 public static boolean isSameMimeType(String pType1, String pType2) {
130 return pType1 != null && pType2 != null && pType1.equalsIgnoreCase(pType2);
131 }
132
133 /**
134 * Returns true, if the given MIME type is that of a message.
135 */
136 public static boolean isMessage(String pMimeType) {
137 return pMimeType != null && pMimeType.equalsIgnoreCase("message/rfc822");
138 }
139
140 /**
141 * Return true, if the given MIME type indicates a multipart entity.
142 */
143 public static boolean isMultipart(String pMimeType) {
144 return pMimeType != null && pMimeType.toLowerCase().startsWith("multipart/");
145 }
146
147 /**
148 * Returns, whether the given transfer-encoding is "base64".
149 */
150 public static boolean isBase64Encoding(String pTransferEncoding) {
151 return ENC_BASE64.equalsIgnoreCase(pTransferEncoding);
152 }
153
154 /**
155 * Returns, whether the given transfer-encoding is "quoted-printable".
156 */
157 public static boolean isQuotedPrintableEncoded(String pTransferEncoding) {
158 return ENC_QUOTED_PRINTABLE.equalsIgnoreCase(pTransferEncoding);
159 }
160
161 /**
162 * <p>Parses a complex field value into a map of key/value pairs. You may
163 * use this, for example, to parse a definition like
164 * <pre>
165 * text/plain; charset=UTF-8; boundary=foobar
166 * </pre>
167 * The above example would return a map with the keys "", "charset",
168 * and "boundary", and the values "text/plain", "UTF-8", and "foobar".
169 * </p><p>
170 * Header value will be unfolded and excess white space trimmed.
171 * </p>
172 * @param pValue The field value to parse.
173 * @return The result map; use the key "" to retrieve the first value.
174 */
175 @SuppressWarnings("fallthrough")
176 public static Map<String, String> getHeaderParams(String pValue) {
177 pValue = pValue.trim();
178
179 pValue = unfold(pValue);
180
181 Map<String, String> result = new HashMap<String, String>();
182
183 // split main value and parameters
184 String main;
185 String rest;
186 if (pValue.indexOf(";") == -1) {
187 main = pValue;
188 rest = null;
189 } else {
190 main = pValue.substring(0, pValue.indexOf(";"));
191 rest = pValue.substring(main.length() + 1);
192 }
193
194 result.put("", main);
195 if (rest != null) {
196 char[] chars = rest.toCharArray();
197 StringBuilder paramName = new StringBuilder(64);
198 StringBuilder paramValue = new StringBuilder(64);
199
200 final byte READY_FOR_NAME = 0;
201 final byte IN_NAME = 1;
202 final byte READY_FOR_VALUE = 2;
203 final byte IN_VALUE = 3;
204 final byte IN_QUOTED_VALUE = 4;
205 final byte VALUE_DONE = 5;
206 final byte ERROR = 99;
207
208 byte state = READY_FOR_NAME;
209 boolean escaped = false;
210 for (char c : chars) {
211 switch (state) {
212 case ERROR:
213 if (c == ';')
214 state = READY_FOR_NAME;
215 break;
216
217 case READY_FOR_NAME:
218 if (c == '=') {
219 log.error("Expected header param name, got '='");
220 state = ERROR;
221 break;
222 }
223
224 paramName.setLength(0);
225 paramValue.setLength(0);
226
227 state = IN_NAME;
228 // fall-through
229
230 case IN_NAME:
231 if (c == '=') {
232 if (paramName.length() == 0)
233 state = ERROR;
234 else
235 state = READY_FOR_VALUE;
236 break;
237 }
238
239 // not '='... just add to name
240 paramName.append(c);
241 break;
242
243 case READY_FOR_VALUE:
244 boolean fallThrough = false;
245 switch (c) {
246 case ' ':
247 case '\t':
248 break; // ignore spaces, especially before '"'
249
250 case '"':
251 state = IN_QUOTED_VALUE;
252 break;
253
254 default:
255 state = IN_VALUE;
256 fallThrough = true;
257 break;
258 }
259 if (!fallThrough)
260 break;
261
262 // fall-through
263
264 case IN_VALUE:
265 fallThrough = false;
266 switch (c) {
267 case ';':
268 case ' ':
269 case '\t':
270 result.put(
271 paramName.toString().trim().toLowerCase(),
272 paramValue.toString().trim());
273 state = VALUE_DONE;
274 fallThrough = true;
275 break;
276 default:
277 paramValue.append(c);
278 break;
279 }
280 if (!fallThrough)
281 break;
282
283 case VALUE_DONE:
284 switch (c) {
285 case ';':
286 state = READY_FOR_NAME;
287 break;
288
289 case ' ':
290 case '\t':
291 break;
292
293 default:
294 state = ERROR;
295 break;
296 }
297 break;
298
299 case IN_QUOTED_VALUE:
300 switch (c) {
301 case '"':
302 if (!escaped) {
303 // don't trim quoted strings; the spaces could be intentional.
304 result.put(
305 paramName.toString().trim().toLowerCase(),
306 paramValue.toString());
307 state = VALUE_DONE;
308 } else {
309 escaped = false;
310 paramValue.append(c);
311 }
312 break;
313
314 case '\\':
315 if (escaped) {
316 paramValue.append('\\');
317 }
318 escaped = !escaped;
319 break;
320
321 default:
322 if (escaped) {
323 paramValue.append('\\');
324 }
325 escaped = false;
326 paramValue.append(c);
327 break;
328 }
329 break;
330
331 }
332 }
333
334 // done looping. check if anything is left over.
335 if (state == IN_VALUE) {
336 result.put(
337 paramName.toString().trim().toLowerCase(),
338 paramValue.toString().trim());
339 }
340 }
341
342 return result;
343 }
344
345 /**
346 * Creates a new unique message boundary string that can be used as boundary
347 * parameter for the Content-Type header field of a message.
348 *
349 * @return a new unique message boundary string.
350 */
351 public static String createUniqueBoundary() {
352 StringBuilder sb = new StringBuilder();
353 sb.append("-=Part.");
354 sb.append(Integer.toHexString(nextCounterValue()));
355 sb.append('.');
356 sb.append(Long.toHexString(random.nextLong()));
357 sb.append('.');
358 sb.append(Long.toHexString(System.currentTimeMillis()));
359 sb.append('.');
360 sb.append(Long.toHexString(random.nextLong()));
361 sb.append("=-");
362 return sb.toString();
363 }
364
365 /**
366 * Creates a new unique message identifier that can be used in message
367 * header field such as Message-ID or In-Reply-To. If the given host name is
368 * not <code>null</code> it will be used as suffix for the message ID
369 * (following an at sign).
370 *
371 * The resulting string is enclosed in angle brackets (< and >);
372 *
373 * @param hostName host name to be included in the message ID or
374 * <code>null</code> if no host name should be included.
375 * @return a new unique message identifier.
376 */
377 public static String createUniqueMessageId(String hostName) {
378 StringBuilder sb = new StringBuilder("<Mime4j.");
379 sb.append(Integer.toHexString(nextCounterValue()));
380 sb.append('.');
381 sb.append(Long.toHexString(random.nextLong()));
382 sb.append('.');
383 sb.append(Long.toHexString(System.currentTimeMillis()));
384 if (hostName != null) {
385 sb.append('@');
386 sb.append(hostName);
387 }
388 sb.append('>');
389 return sb.toString();
390 }
391
392 /**
393 * Formats the specified date into a RFC 822 date-time string.
394 *
395 * @param date
396 * date to be formatted into a string.
397 * @param zone
398 * the time zone to use or <code>null</code> to use the default
399 * time zone.
400 * @return the formatted time string.
401 */
402 public static String formatDate(Date date, TimeZone zone) {
403 DateFormat df = RFC822_DATE_FORMAT.get();
404
405 if (zone == null) {
406 df.setTimeZone(TimeZone.getDefault());
407 } else {
408 df.setTimeZone(zone);
409 }
410
411 return df.format(date);
412 }
413
414 /**
415 * Splits the specified string into a multiple-line representation with
416 * lines no longer than 76 characters (because the line might contain
417 * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC
418 * 2047</a> section 2). If the string contains non-whitespace sequences
419 * longer than 76 characters a line break is inserted at the whitespace
420 * character following the sequence resulting in a line longer than 76
421 * characters.
422 *
423 * @param s
424 * string to split.
425 * @param usedCharacters
426 * number of characters already used up. Usually the number of
427 * characters for header field name plus colon and one space.
428 * @return a multiple-line representation of the given string.
429 */
430 public static String fold(String s, int usedCharacters) {
431 final int maxCharacters = 76;
432
433 final int length = s.length();
434 if (usedCharacters + length <= maxCharacters)
435 return s;
436
437 StringBuilder sb = new StringBuilder();
438
439 int lastLineBreak = -usedCharacters;
440 int wspIdx = indexOfWsp(s, 0);
441 while (true) {
442 if (wspIdx == length) {
443 sb.append(s.substring(Math.max(0, lastLineBreak)));
444 return sb.toString();
445 }
446
447 int nextWspIdx = indexOfWsp(s, wspIdx + 1);
448
449 if (nextWspIdx - lastLineBreak > maxCharacters) {
450 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
451 sb.append("\r\n");
452 lastLineBreak = wspIdx;
453 }
454
455 wspIdx = nextWspIdx;
456 }
457 }
458
459 /**
460 * Unfold a multiple-line representation into a single line.
461 *
462 * @param s
463 * string to unfold.
464 * @return unfolded string.
465 */
466 public static String unfold(String s) {
467 final int length = s.length();
468 for (int idx = 0; idx < length; idx++) {
469 char c = s.charAt(idx);
470 if (c == '\r' || c == '\n') {
471 return unfold0(s, idx);
472 }
473 }
474
475 return s;
476 }
477
478 private static String unfold0(String s, int crlfIdx) {
479 final int length = s.length();
480 StringBuilder sb = new StringBuilder(length);
481
482 if (crlfIdx > 0) {
483 sb.append(s.substring(0, crlfIdx));
484 }
485
486 for (int idx = crlfIdx + 1; idx < length; idx++) {
487 char c = s.charAt(idx);
488 if (c != '\r' && c != '\n') {
489 sb.append(c);
490 }
491 }
492
493 return sb.toString();
494 }
495
496 private static int indexOfWsp(String s, int fromIndex) {
497 final int len = s.length();
498 for (int index = fromIndex; index < len; index++) {
499 char c = s.charAt(index);
500 if (c == ' ' || c == '\t')
501 return index;
502 }
503 return len;
504 }
505
506 private static synchronized int nextCounterValue() {
507 return counter++;
508 }
509
510 private static final ThreadLocal<DateFormat> RFC822_DATE_FORMAT = new ThreadLocal<DateFormat>() {
511 @Override
512 protected DateFormat initialValue() {
513 return new Rfc822DateFormat();
514 }
515 };
516
517 private static final class Rfc822DateFormat extends SimpleDateFormat {
518 private static final long serialVersionUID = 1L;
519
520 public Rfc822DateFormat() {
521 super("EEE, d MMM yyyy HH:mm:ss ", Locale.US);
522 }
523
524 @Override
525 public StringBuffer format(Date date, StringBuffer toAppendTo,
526 FieldPosition pos) {
527 StringBuffer sb = super.format(date, toAppendTo, pos);
528
529 int zoneMillis = calendar.get(GregorianCalendar.ZONE_OFFSET);
530 int dstMillis = calendar.get(GregorianCalendar.DST_OFFSET);
531 int minutes = (zoneMillis + dstMillis) / 1000 / 60;
532
533 if (minutes < 0) {
534 sb.append('-');
535 minutes = -minutes;
536 } else {
537 sb.append('+');
538 }
539
540 sb.append(String.format("%02d%02d", minutes / 60, minutes % 60));
541
542 return sb;
543 }
544 }
545 }