View Javadoc

1   /****************************************************************
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   ****************************************************************/
19  
20  package org.apache.james.mime4j.util;
21  
22  import java.text.DateFormat;
23  import java.text.FieldPosition;
24  import java.text.SimpleDateFormat;
25  import java.util.Date;
26  import java.util.GregorianCalendar;
27  import java.util.HashMap;
28  import java.util.Locale;
29  import java.util.Map;
30  import java.util.Random;
31  import java.util.TimeZone;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  
36  /**
37   * A utility class, which provides some MIME related application logic.
38   */
39  public final class MimeUtil {
40      private static final Log log = LogFactory.getLog(MimeUtil.class);
41      
42      /**
43       * The <code>quoted-printable</code> encoding.
44       */
45      public static final String ENC_QUOTED_PRINTABLE = "quoted-printable";
46      /**
47       * The <code>binary</code> encoding.
48       */
49      public static final String ENC_BINARY = "binary";
50      /**
51       * The <code>base64</code> encoding.
52       */
53      public static final String ENC_BASE64 = "base64";
54      /**
55       * The <code>8bit</code> encoding.
56       */
57      public static final String ENC_8BIT = "8bit";
58      /**
59       * The <code>7bit</code> encoding.
60       */
61      public static final String ENC_7BIT = "7bit";
62  
63      /** <code>MIME-Version</code> header name (lowercase) */
64      public static final String MIME_HEADER_MIME_VERSION = "mime-version";
65      /** <code>Content-ID</code> header name (lowercase) */
66      public static final String MIME_HEADER_CONTENT_ID = "content-id";
67      /** <code>Content-Description</code> header name (lowercase) */
68      public static final String MIME_HEADER_CONTENT_DESCRIPTION = "content-description";
69      /** 
70       * <code>Content-Disposition</code> header name (lowercase). 
71       * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>. 
72       */
73      public static final String MIME_HEADER_CONTENT_DISPOSITION = "content-disposition";
74      /** 
75       * <code>Content-Disposition</code> filename parameter (lowercase). 
76       * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>. 
77       */
78      public static final String PARAM_FILENAME = "filename";
79      /** 
80       * <code>Content-Disposition</code> modification-date parameter (lowercase). 
81       * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>. 
82       */
83      public static final String PARAM_MODIFICATION_DATE = "modification-date";
84      /** 
85       * <code>Content-Disposition</code> creation-date parameter (lowercase). 
86       * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>. 
87       */
88      public static final String PARAM_CREATION_DATE = "creation-date";
89      /** 
90       * <code>Content-Disposition</code> read-date parameter (lowercase). 
91       * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>. 
92       */
93      public static final String PARAM_READ_DATE = "read-date";
94      /** 
95       * <code>Content-Disposition</code> size parameter (lowercase). 
96       * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>. 
97       */
98      public static final String PARAM_SIZE = "size";
99      /**
100      * <code>Content-Langauge</code> header (lower case).
101      * See <a href='http://www.faqs.org/rfcs/rfc4646.html'>RFC4646</a>.
102      */
103     public static final String MIME_HEADER_LANGAUGE = "content-language";
104     /**
105      * <code>Content-Location</code> header (lower case).
106      * See <a href='http://www.faqs.org/rfcs/rfc2557.html'>RFC2557</a>.
107      */
108     public static final String MIME_HEADER_LOCATION = "content-location";
109     /**
110      * <code>Content-MD5</code> header (lower case).
111      * See <a href='http://www.faqs.org/rfcs/rfc1864.html'>RFC1864</a>.
112      */
113     public static final String MIME_HEADER_MD5 = "content-md5";
114 
115     // used to create unique ids
116     private static final Random random = new Random();
117     
118     // used to create unique ids
119     private static int counter = 0;
120 
121     private MimeUtil() {
122         // this is an utility class to be used statically.
123         // this constructor protect from instantiation.
124     }
125     
126     /**
127      * Returns, whether the given two MIME types are identical.
128      */
129     public static boolean isSameMimeType(String pType1, String pType2) {
130         return pType1 != null  &&  pType2 != null  &&  pType1.equalsIgnoreCase(pType2);
131     }
132 
133     /**
134      * Returns true, if the given MIME type is that of a message. 
135      */
136     public static boolean isMessage(String pMimeType) {
137         return pMimeType != null  &&  pMimeType.equalsIgnoreCase("message/rfc822");
138     }
139 
140     /**
141      * Return true, if the given MIME type indicates a multipart entity.
142      */
143     public static boolean isMultipart(String pMimeType) {
144         return pMimeType != null  &&  pMimeType.toLowerCase().startsWith("multipart/");
145     }
146 
147     /**
148      * Returns, whether the given transfer-encoding is "base64".
149      */
150     public static boolean isBase64Encoding(String pTransferEncoding) {
151         return ENC_BASE64.equalsIgnoreCase(pTransferEncoding);
152     }
153 
154     /**
155      * Returns, whether the given transfer-encoding is "quoted-printable".
156      */
157     public static boolean isQuotedPrintableEncoded(String pTransferEncoding) {
158         return ENC_QUOTED_PRINTABLE.equalsIgnoreCase(pTransferEncoding);
159     }
160 
161     /**
162      * <p>Parses a complex field value into a map of key/value pairs. You may
163      * use this, for example, to parse a definition like
164      * <pre>
165      *   text/plain; charset=UTF-8; boundary=foobar
166      * </pre>
167      * The above example would return a map with the keys "", "charset",
168      * and "boundary", and the values "text/plain", "UTF-8", and "foobar".
169      * </p><p>
170      * Header value will be unfolded and excess white space trimmed.
171      * </p>
172      * @param pValue The field value to parse.
173      * @return The result map; use the key "" to retrieve the first value.
174      */
175     @SuppressWarnings("fallthrough")
176     public static Map<String, String> getHeaderParams(String pValue) {
177         pValue = pValue.trim();
178 
179         pValue = unfold(pValue);
180         
181         Map<String, String> result = new HashMap<String, String>();
182 
183         // split main value and parameters
184         String main;
185         String rest;
186         if (pValue.indexOf(";") == -1) {
187             main = pValue;
188             rest = null;
189         } else {
190             main = pValue.substring(0, pValue.indexOf(";"));
191             rest = pValue.substring(main.length() + 1);
192         }
193 
194         result.put("", main);
195         if (rest != null) {
196             char[] chars = rest.toCharArray();
197             StringBuilder paramName = new StringBuilder(64);
198             StringBuilder paramValue = new StringBuilder(64);
199 
200             final byte READY_FOR_NAME = 0;
201             final byte IN_NAME = 1;
202             final byte READY_FOR_VALUE = 2;
203             final byte IN_VALUE = 3;
204             final byte IN_QUOTED_VALUE = 4;
205             final byte VALUE_DONE = 5;
206             final byte ERROR = 99;
207 
208             byte state = READY_FOR_NAME;
209             boolean escaped = false;
210             for (char c : chars) {
211                 switch (state) {
212                     case ERROR:
213                         if (c == ';')
214                             state = READY_FOR_NAME;
215                         break;
216 
217                     case READY_FOR_NAME:
218                         if (c == '=') {
219                             log.error("Expected header param name, got '='");
220                             state = ERROR;
221                             break;
222                         }
223 
224                         paramName.setLength(0);
225                         paramValue.setLength(0);
226 
227                         state = IN_NAME;
228                         // fall-through
229 
230                     case IN_NAME:
231                         if (c == '=') {
232                             if (paramName.length() == 0)
233                                 state = ERROR;
234                             else
235                                 state = READY_FOR_VALUE;
236                             break;
237                         }
238 
239                         // not '='... just add to name
240                         paramName.append(c);
241                         break;
242 
243                     case READY_FOR_VALUE:
244                         boolean fallThrough = false;
245                         switch (c) {
246                             case ' ':
247                             case '\t':
248                                 break;  // ignore spaces, especially before '"'
249 
250                             case '"':
251                                 state = IN_QUOTED_VALUE;
252                                 break;
253 
254                             default:
255                                 state = IN_VALUE;
256                                 fallThrough = true;
257                                 break;
258                         }
259                         if (!fallThrough)
260                             break;
261 
262                         // fall-through
263 
264                     case IN_VALUE:
265                         fallThrough = false;
266                         switch (c) {
267                             case ';':
268                             case ' ':
269                             case '\t':
270                                 result.put(
271                                    paramName.toString().trim().toLowerCase(),
272                                    paramValue.toString().trim());
273                                 state = VALUE_DONE;
274                                 fallThrough = true;
275                                 break;
276                             default:
277                                 paramValue.append(c);
278                                 break;
279                         }
280                         if (!fallThrough)
281                             break;
282 
283                     case VALUE_DONE:
284                         switch (c) {
285                             case ';':
286                                 state = READY_FOR_NAME;
287                                 break;
288 
289                             case ' ':
290                             case '\t':
291                                 break;
292 
293                             default:
294                                 state = ERROR;
295                                 break;
296                         }
297                         break;
298                         
299                     case IN_QUOTED_VALUE:
300                         switch (c) {
301                             case '"':
302                                 if (!escaped) {
303                                     // don't trim quoted strings; the spaces could be intentional.
304                                     result.put(
305                                             paramName.toString().trim().toLowerCase(),
306                                             paramValue.toString());
307                                     state = VALUE_DONE;
308                                 } else {
309                                     escaped = false;
310                                     paramValue.append(c);                                    
311                                 }
312                                 break;
313                                 
314                             case '\\':
315                                 if (escaped) {
316                                     paramValue.append('\\');
317                                 }
318                                 escaped = !escaped;
319                                 break;
320 
321                             default:
322                                 if (escaped) {
323                                     paramValue.append('\\');
324                                 }
325                                 escaped = false;
326                                 paramValue.append(c);
327                                 break;
328                         }
329                         break;
330 
331                 }
332             }
333 
334             // done looping.  check if anything is left over.
335             if (state == IN_VALUE) {
336                 result.put(
337                         paramName.toString().trim().toLowerCase(),
338                         paramValue.toString().trim());
339             }
340         }
341 
342         return result;
343     }
344 
345     /**
346      * Creates a new unique message boundary string that can be used as boundary
347      * parameter for the Content-Type header field of a message.
348      * 
349      * @return a new unique message boundary string.
350      */
351     public static String createUniqueBoundary() {
352         StringBuilder sb = new StringBuilder();
353         sb.append("-=Part.");
354         sb.append(Integer.toHexString(nextCounterValue()));
355         sb.append('.');
356         sb.append(Long.toHexString(random.nextLong()));
357         sb.append('.');
358         sb.append(Long.toHexString(System.currentTimeMillis()));
359         sb.append('.');
360         sb.append(Long.toHexString(random.nextLong()));
361         sb.append("=-");
362         return sb.toString();
363     }
364 
365     /**
366      * Creates a new unique message identifier that can be used in message
367      * header field such as Message-ID or In-Reply-To. If the given host name is
368      * not <code>null</code> it will be used as suffix for the message ID
369      * (following an at sign).
370      * 
371      * The resulting string is enclosed in angle brackets (&lt; and &gt;);
372      * 
373      * @param hostName host name to be included in the message ID or
374      *            <code>null</code> if no host name should be included.
375      * @return a new unique message identifier.
376      */
377     public static String createUniqueMessageId(String hostName) {
378         StringBuilder sb = new StringBuilder("<Mime4j.");
379         sb.append(Integer.toHexString(nextCounterValue()));
380         sb.append('.');
381         sb.append(Long.toHexString(random.nextLong()));
382         sb.append('.');
383         sb.append(Long.toHexString(System.currentTimeMillis()));
384         if (hostName != null) {
385             sb.append('@');
386             sb.append(hostName);
387         }
388         sb.append('>');
389         return sb.toString();
390     }
391 
392     /**
393      * Formats the specified date into a RFC 822 date-time string.
394      * 
395      * @param date
396      *            date to be formatted into a string.
397      * @param zone
398      *            the time zone to use or <code>null</code> to use the default
399      *            time zone.
400      * @return the formatted time string.
401      */
402     public static String formatDate(Date date, TimeZone zone) {
403         DateFormat df = RFC822_DATE_FORMAT.get();
404 
405         if (zone == null) {
406             df.setTimeZone(TimeZone.getDefault());
407         } else {
408             df.setTimeZone(zone);
409         }
410 
411         return df.format(date);
412     }
413 
414     /**
415      * Splits the specified string into a multiple-line representation with
416      * lines no longer than 76 characters (because the line might contain
417      * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC
418      * 2047</a> section 2). If the string contains non-whitespace sequences
419      * longer than 76 characters a line break is inserted at the whitespace
420      * character following the sequence resulting in a line longer than 76
421      * characters.
422      * 
423      * @param s
424      *            string to split.
425      * @param usedCharacters
426      *            number of characters already used up. Usually the number of
427      *            characters for header field name plus colon and one space.
428      * @return a multiple-line representation of the given string.
429      */
430     public static String fold(String s, int usedCharacters) {
431         final int maxCharacters = 76;
432 
433         final int length = s.length();
434         if (usedCharacters + length <= maxCharacters)
435             return s;
436 
437         StringBuilder sb = new StringBuilder();
438 
439         int lastLineBreak = -usedCharacters;
440         int wspIdx = indexOfWsp(s, 0);
441         while (true) {
442             if (wspIdx == length) {
443                 sb.append(s.substring(Math.max(0, lastLineBreak)));
444                 return sb.toString();
445             }
446 
447             int nextWspIdx = indexOfWsp(s, wspIdx + 1);
448 
449             if (nextWspIdx - lastLineBreak > maxCharacters) {
450                 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
451                 sb.append("\r\n");
452                 lastLineBreak = wspIdx;
453             }
454 
455             wspIdx = nextWspIdx;
456         }
457     }
458 
459     /**
460      * Unfold a multiple-line representation into a single line.
461      * 
462      * @param s
463      *            string to unfold.
464      * @return unfolded string.
465      */
466     public static String unfold(String s) {
467         final int length = s.length();
468         for (int idx = 0; idx < length; idx++) {
469             char c = s.charAt(idx);
470             if (c == '\r' || c == '\n') {
471                 return unfold0(s, idx);
472             }
473         }
474 
475         return s;
476     }
477 
478     private static String unfold0(String s, int crlfIdx) {
479         final int length = s.length();
480         StringBuilder sb = new StringBuilder(length);
481 
482         if (crlfIdx > 0) {
483             sb.append(s.substring(0, crlfIdx));
484         }
485 
486         for (int idx = crlfIdx + 1; idx < length; idx++) {
487             char c = s.charAt(idx);
488             if (c != '\r' && c != '\n') {
489                 sb.append(c);
490             }
491         }
492 
493         return sb.toString();
494     }
495 
496     private static int indexOfWsp(String s, int fromIndex) {
497         final int len = s.length();
498         for (int index = fromIndex; index < len; index++) {
499             char c = s.charAt(index);
500             if (c == ' ' || c == '\t')
501                 return index;
502         }
503         return len;
504     }
505 
506     private static synchronized int nextCounterValue() {
507         return counter++;
508     }
509 
510     private static final ThreadLocal<DateFormat> RFC822_DATE_FORMAT = new ThreadLocal<DateFormat>() {
511         @Override
512         protected DateFormat initialValue() {
513             return new Rfc822DateFormat();
514         }
515     };
516 
517     private static final class Rfc822DateFormat extends SimpleDateFormat {
518         private static final long serialVersionUID = 1L;
519 
520         public Rfc822DateFormat() {
521             super("EEE, d MMM yyyy HH:mm:ss ", Locale.US);
522         }
523 
524         @Override
525         public StringBuffer format(Date date, StringBuffer toAppendTo,
526                 FieldPosition pos) {
527             StringBuffer sb = super.format(date, toAppendTo, pos);
528 
529             int zoneMillis = calendar.get(GregorianCalendar.ZONE_OFFSET);
530             int dstMillis = calendar.get(GregorianCalendar.DST_OFFSET);
531             int minutes = (zoneMillis + dstMillis) / 1000 / 60;
532 
533             if (minutes < 0) {
534                 sb.append('-');
535                 minutes = -minutes;
536             } else {
537                 sb.append('+');
538             }
539 
540             sb.append(String.format("%02d%02d", minutes / 60, minutes % 60));
541 
542             return sb;
543         }
544     }
545 }