1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.james.mime4j.codec;
21
22 import java.io.ByteArrayInputStream;
23 import java.io.ByteArrayOutputStream;
24 import java.io.IOException;
25 import java.io.UnsupportedEncodingException;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.james.mime4j.util.CharsetUtil;
30
31
32
33
34 public class DecoderUtil {
35 private static Log log = LogFactory.getLog(DecoderUtil.class);
36
37
38
39
40
41
42
43 public static byte[] decodeBaseQuotedPrintable(String s) {
44 ByteArrayOutputStream baos = new ByteArrayOutputStream();
45
46 try {
47 byte[] bytes = s.getBytes("US-ASCII");
48
49 QuotedPrintableInputStream is = new QuotedPrintableInputStream(
50 new ByteArrayInputStream(bytes));
51
52 int b = 0;
53 while ((b = is.read()) != -1) {
54 baos.write(b);
55 }
56 } catch (IOException e) {
57
58
59
60 log.error(e);
61 }
62
63 return baos.toByteArray();
64 }
65
66
67
68
69
70
71
72 public static byte[] decodeBase64(String s) {
73 ByteArrayOutputStream baos = new ByteArrayOutputStream();
74
75 try {
76 byte[] bytes = s.getBytes("US-ASCII");
77
78 Base64InputStream is = new Base64InputStream(
79 new ByteArrayInputStream(bytes));
80
81 int b = 0;
82 while ((b = is.read()) != -1) {
83 baos.write(b);
84 }
85 } catch (IOException e) {
86
87
88
89 log.error(e);
90 }
91
92 return baos.toByteArray();
93 }
94
95
96
97
98
99
100
101
102
103
104
105 public static String decodeB(String encodedWord, String charset)
106 throws UnsupportedEncodingException {
107
108 return new String(decodeBase64(encodedWord), charset);
109 }
110
111
112
113
114
115
116
117
118
119
120
121 public static String decodeQ(String encodedWord, String charset)
122 throws UnsupportedEncodingException {
123
124
125
126
127 StringBuilder sb = new StringBuilder(128);
128 for (int i = 0; i < encodedWord.length(); i++) {
129 char c = encodedWord.charAt(i);
130 if (c == '_') {
131 sb.append("=20");
132 } else {
133 sb.append(c);
134 }
135 }
136
137 return new String(decodeBaseQuotedPrintable(sb.toString()), charset);
138 }
139
140
141
142
143
144
145
146
147
148
149 public static String decodeEncodedWords(String body) {
150 int previousEnd = 0;
151 boolean previousWasEncoded = false;
152
153 StringBuilder sb = new StringBuilder();
154
155 while (true) {
156 int begin = body.indexOf("=?", previousEnd);
157 int end = begin == -1 ? -1 : body.indexOf("?=", begin + 2);
158 if (end == -1) {
159 if (previousEnd == 0)
160 return body;
161
162 sb.append(body.substring(previousEnd));
163 return sb.toString();
164 }
165 end += 2;
166
167 String sep = body.substring(previousEnd, begin);
168
169 String decoded = decodeEncodedWord(body, begin, end);
170 if (decoded == null) {
171 sb.append(sep);
172 sb.append(body.substring(begin, end));
173 } else {
174 if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) {
175 sb.append(sep);
176 }
177 sb.append(decoded);
178 }
179
180 previousEnd = end;
181 previousWasEncoded = decoded != null;
182 }
183 }
184
185
186 private static String decodeEncodedWord(String body, int begin, int end) {
187 int qm1 = body.indexOf('?', begin + 2);
188 if (qm1 == end - 2)
189 return null;
190
191 int qm2 = body.indexOf('?', qm1 + 1);
192 if (qm2 == end - 2)
193 return null;
194
195 String mimeCharset = body.substring(begin + 2, qm1);
196 String encoding = body.substring(qm1 + 1, qm2);
197 String encodedText = body.substring(qm2 + 1, end - 2);
198
199 String charset = CharsetUtil.toJavaCharset(mimeCharset);
200 if (charset == null) {
201 if (log.isWarnEnabled()) {
202 log.warn("MIME charset '" + mimeCharset + "' in encoded word '"
203 + body.substring(begin, end) + "' doesn't have a "
204 + "corresponding Java charset");
205 }
206 return null;
207 } else if (!CharsetUtil.isDecodingSupported(charset)) {
208 if (log.isWarnEnabled()) {
209 log.warn("Current JDK doesn't support decoding of charset '"
210 + charset + "' (MIME charset '" + mimeCharset
211 + "' in encoded word '" + body.substring(begin, end)
212 + "')");
213 }
214 return null;
215 }
216
217 if (encodedText.length() == 0) {
218 if (log.isWarnEnabled()) {
219 log.warn("Missing encoded text in encoded word: '"
220 + body.substring(begin, end) + "'");
221 }
222 return null;
223 }
224
225 try {
226 if (encoding.equalsIgnoreCase("Q")) {
227 return DecoderUtil.decodeQ(encodedText, charset);
228 } else if (encoding.equalsIgnoreCase("B")) {
229 return DecoderUtil.decodeB(encodedText, charset);
230 } else {
231 if (log.isWarnEnabled()) {
232 log.warn("Warning: Unknown encoding in encoded word '"
233 + body.substring(begin, end) + "'");
234 }
235 return null;
236 }
237 } catch (UnsupportedEncodingException e) {
238
239 if (log.isWarnEnabled()) {
240 log.warn("Unsupported encoding in encoded word '"
241 + body.substring(begin, end) + "'", e);
242 }
243 return null;
244 } catch (RuntimeException e) {
245 if (log.isWarnEnabled()) {
246 log.warn("Could not decode encoded word '"
247 + body.substring(begin, end) + "'", e);
248 }
249 return null;
250 }
251 }
252 }