View Javadoc

1   /****************************************************************
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   ****************************************************************/
19  
20  package org.apache.james.mime4j.codec;
21  
22  import java.io.ByteArrayInputStream;
23  import java.io.ByteArrayOutputStream;
24  import java.io.IOException;
25  import java.io.UnsupportedEncodingException;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.james.mime4j.util.CharsetUtil;
30  
31  /**
32   * Static methods for decoding strings, byte arrays and encoded words.
33   */
34  public class DecoderUtil {
35      private static Log log = LogFactory.getLog(DecoderUtil.class);
36      
37      /**
38       * Decodes a string containing quoted-printable encoded data. 
39       * 
40       * @param s the string to decode.
41       * @return the decoded bytes.
42       */
43      public static byte[] decodeBaseQuotedPrintable(String s) {
44          ByteArrayOutputStream baos = new ByteArrayOutputStream();
45          
46          try {
47              byte[] bytes = s.getBytes("US-ASCII");
48              
49              QuotedPrintableInputStream is = new QuotedPrintableInputStream(
50                                                 new ByteArrayInputStream(bytes));
51              
52              int b = 0;
53              while ((b = is.read()) != -1) {
54                  baos.write(b);
55              }
56          } catch (IOException e) {
57              /*
58               * This should never happen!
59               */
60              log.error(e);
61          }
62          
63          return baos.toByteArray();
64      }
65      
66      /**
67       * Decodes a string containing base64 encoded data. 
68       * 
69       * @param s the string to decode.
70       * @return the decoded bytes.
71       */
72      public static byte[] decodeBase64(String s) {
73          ByteArrayOutputStream baos = new ByteArrayOutputStream();
74          
75          try {
76              byte[] bytes = s.getBytes("US-ASCII");
77              
78              Base64InputStream is = new Base64InputStream(
79                                          new ByteArrayInputStream(bytes));
80              
81              int b = 0;
82              while ((b = is.read()) != -1) {
83                  baos.write(b);
84              }
85          } catch (IOException e) {
86              /*
87               * This should never happen!
88               */
89              log.error(e);
90          }
91          
92          return baos.toByteArray();
93      }
94      
95      /**
96       * Decodes an encoded word encoded with the 'B' encoding (described in 
97       * RFC 2047) found in a header field body.
98       * 
99       * @param encodedWord the encoded word to decode.
100      * @param charset the Java charset to use.
101      * @return the decoded string.
102      * @throws UnsupportedEncodingException if the given Java charset isn't 
103      *         supported.
104      */
105     public static String decodeB(String encodedWord, String charset) 
106             throws UnsupportedEncodingException {
107         
108         return new String(decodeBase64(encodedWord), charset);
109     }
110     
111     /**
112      * Decodes an encoded word encoded with the 'Q' encoding (described in 
113      * RFC 2047) found in a header field body.
114      * 
115      * @param encodedWord the encoded word to decode.
116      * @param charset the Java charset to use.
117      * @return the decoded string.
118      * @throws UnsupportedEncodingException if the given Java charset isn't 
119      *         supported.
120      */
121     public static String decodeQ(String encodedWord, String charset)
122             throws UnsupportedEncodingException {
123            
124         /*
125          * Replace _ with =20
126          */
127         StringBuilder sb = new StringBuilder(128);
128         for (int i = 0; i < encodedWord.length(); i++) {
129             char c = encodedWord.charAt(i);
130             if (c == '_') {
131                 sb.append("=20");
132             } else {
133                 sb.append(c);
134             }
135         }
136         
137         return new String(decodeBaseQuotedPrintable(sb.toString()), charset);
138     }
139     
140     /**
141      * Decodes a string containing encoded words as defined by RFC 2047.
142      * Encoded words in have the form 
143      * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for 
144      * quoted-printable and 'B' or 'b' for Base64.
145      * 
146      * @param body the string to decode.
147      * @return the decoded string.
148      */
149     public static String decodeEncodedWords(String body) {
150         int previousEnd = 0;
151         boolean previousWasEncoded = false;
152 
153         StringBuilder sb = new StringBuilder();
154 
155         while (true) {
156             int begin = body.indexOf("=?", previousEnd);
157             int end = begin == -1 ? -1 : body.indexOf("?=", begin + 2);
158             if (end == -1) {
159                 if (previousEnd == 0)
160                     return body;
161 
162                 sb.append(body.substring(previousEnd));
163                 return sb.toString();
164             }
165             end += 2;
166 
167             String sep = body.substring(previousEnd, begin);
168 
169             String decoded = decodeEncodedWord(body, begin, end);
170             if (decoded == null) {
171                 sb.append(sep);
172                 sb.append(body.substring(begin, end));
173             } else {
174                 if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) {
175                     sb.append(sep);
176                 }
177                 sb.append(decoded);
178             }
179 
180             previousEnd = end;
181             previousWasEncoded = decoded != null;
182         }
183     }
184 
185     // return null on error
186     private static String decodeEncodedWord(String body, int begin, int end) {
187         int qm1 = body.indexOf('?', begin + 2);
188         if (qm1 == end - 2)
189             return null;
190 
191         int qm2 = body.indexOf('?', qm1 + 1);
192         if (qm2 == end - 2)
193             return null;
194 
195         String mimeCharset = body.substring(begin + 2, qm1);
196         String encoding = body.substring(qm1 + 1, qm2);
197         String encodedText = body.substring(qm2 + 1, end - 2);
198 
199         String charset = CharsetUtil.toJavaCharset(mimeCharset);
200         if (charset == null) {
201             if (log.isWarnEnabled()) {
202                 log.warn("MIME charset '" + mimeCharset + "' in encoded word '"
203                         + body.substring(begin, end) + "' doesn't have a "
204                         + "corresponding Java charset");
205             }
206             return null;
207         } else if (!CharsetUtil.isDecodingSupported(charset)) {
208             if (log.isWarnEnabled()) {
209                 log.warn("Current JDK doesn't support decoding of charset '"
210                         + charset + "' (MIME charset '" + mimeCharset
211                         + "' in encoded word '" + body.substring(begin, end)
212                         + "')");
213             }
214             return null;
215         }
216 
217         if (encodedText.length() == 0) {
218             if (log.isWarnEnabled()) {
219                 log.warn("Missing encoded text in encoded word: '"
220                         + body.substring(begin, end) + "'");
221             }
222             return null;
223         }
224 
225         try {
226             if (encoding.equalsIgnoreCase("Q")) {
227                 return DecoderUtil.decodeQ(encodedText, charset);
228             } else if (encoding.equalsIgnoreCase("B")) {
229                 return DecoderUtil.decodeB(encodedText, charset);
230             } else {
231                 if (log.isWarnEnabled()) {
232                     log.warn("Warning: Unknown encoding in encoded word '"
233                             + body.substring(begin, end) + "'");
234                 }
235                 return null;
236             }
237         } catch (UnsupportedEncodingException e) {
238             // should not happen because of isDecodingSupported check above
239             if (log.isWarnEnabled()) {
240                 log.warn("Unsupported encoding in encoded word '"
241                         + body.substring(begin, end) + "'", e);
242             }
243             return null;
244         } catch (RuntimeException e) {
245             if (log.isWarnEnabled()) {
246                 log.warn("Could not decode encoded word '"
247                         + body.substring(begin, end) + "'", e);
248             }
249             return null;
250         }
251     }
252 }