1 /****************************************************************
2 * Licensed to the Apache Software Foundation (ASF) under one *
3 * or more contributor license agreements. See the NOTICE file *
4 * distributed with this work for additional information *
5 * regarding copyright ownership. The ASF licenses this file *
6 * to you under the Apache License, Version 2.0 (the *
7 * "License"); you may not use this file except in compliance *
8 * with the License. You may obtain a copy of the License at *
9 * *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, *
13 * software distributed under the License is distributed on an *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
15 * KIND, either express or implied. See the License for the *
16 * specific language governing permissions and limitations *
17 * under the License. *
18 ****************************************************************/
19
20 package org.apache.james.mime4j.codec;
21
22 import java.io.IOException;
23 import java.io.InputStream;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27
28 /**
29 * Performs Quoted-Printable decoding on an underlying stream.
30 */
31 public class QuotedPrintableInputStream extends InputStream {
32 private static Log log = LogFactory.getLog(QuotedPrintableInputStream.class);
33
34 private InputStream stream;
35 ByteQueue byteq = new ByteQueue();
36 ByteQueue pushbackq = new ByteQueue();
37 private byte state = 0;
38 private boolean closed = false;
39
40 public QuotedPrintableInputStream(InputStream stream) {
41 this.stream = stream;
42 }
43
44 /**
45 * Terminates Quoted-Printable coded content. This method does NOT close
46 * the underlying input stream.
47 *
48 * @throws IOException on I/O errors.
49 */
50 @Override
51 public void close() throws IOException {
52 this.closed = true;
53 }
54
55 @Override
56 public int read() throws IOException {
57 if (closed) {
58 throw new IOException("QuotedPrintableInputStream has been closed");
59 }
60 fillBuffer();
61 if (byteq.count() == 0)
62 return -1;
63 else {
64 byte val = byteq.dequeue();
65 if (val >= 0)
66 return val;
67 else
68 return val & 0xFF;
69 }
70 }
71
72 /**
73 * Pulls bytes out of the underlying stream and places them in the
74 * pushback queue. This is necessary (vs. reading from the
75 * underlying stream directly) to detect and filter out "transport
76 * padding" whitespace, i.e., all whitespace that appears immediately
77 * before a CRLF.
78 *
79 * @throws IOException Underlying stream threw IOException.
80 */
81 private void populatePushbackQueue() throws IOException {
82 //Debug.verify(pushbackq.count() == 0, "PopulatePushbackQueue called when pushback queue was not empty!");
83
84 if (pushbackq.count() != 0)
85 return;
86
87 while (true) {
88 int i = stream.read();
89 switch (i) {
90 case -1:
91 // stream is done
92 pushbackq.clear(); // discard any whitespace preceding EOF
93 return;
94 case ' ':
95 case '\t':
96 pushbackq.enqueue((byte)i);
97 break;
98 case '\r':
99 case '\n':
100 pushbackq.clear(); // discard any whitespace preceding EOL
101 pushbackq.enqueue((byte)i);
102 return;
103 default:
104 pushbackq.enqueue((byte)i);
105 return;
106 }
107 }
108 }
109
110 /**
111 * Causes the pushback queue to get populated if it is empty, then
112 * consumes and decodes bytes out of it until one or more bytes are
113 * in the byte queue. This decoding step performs the actual QP
114 * decoding.
115 *
116 * @throws IOException Underlying stream threw IOException.
117 */
118 private void fillBuffer() throws IOException {
119 byte msdChar = 0; // first digit of escaped num
120 while (byteq.count() == 0) {
121 if (pushbackq.count() == 0) {
122 populatePushbackQueue();
123 if (pushbackq.count() == 0)
124 return;
125 }
126
127 byte b = pushbackq.dequeue();
128
129 switch (state) {
130 case 0: // start state, no bytes pending
131 if (b != '=') {
132 byteq.enqueue(b);
133 break; // state remains 0
134 } else {
135 state = 1;
136 break;
137 }
138 case 1: // encountered "=" so far
139 if (b == '\r') {
140 state = 2;
141 break;
142 } else if ((b >= '0' && b <= '9') || (b >= 'A' && b <= 'F') || (b >= 'a' && b <= 'f')) {
143 state = 3;
144 msdChar = b; // save until next digit encountered
145 break;
146 } else if (b == '=') {
147 /*
148 * Special case when == is encountered.
149 * Emit one = and stay in this state.
150 */
151 if (log.isWarnEnabled()) {
152 log.warn("Malformed MIME; got ==");
153 }
154 byteq.enqueue((byte)'=');
155 break;
156 } else {
157 if (log.isWarnEnabled()) {
158 log.warn("Malformed MIME; expected \\r or "
159 + "[0-9A-Z], got " + b);
160 }
161 state = 0;
162 byteq.enqueue((byte)'=');
163 byteq.enqueue(b);
164 break;
165 }
166 case 2: // encountered "=\r" so far
167 if (b == '\n') {
168 state = 0;
169 break;
170 } else {
171 if (log.isWarnEnabled()) {
172 log.warn("Malformed MIME; expected "
173 + (int)'\n' + ", got " + b);
174 }
175 state = 0;
176 byteq.enqueue((byte)'=');
177 byteq.enqueue((byte)'\r');
178 byteq.enqueue(b);
179 break;
180 }
181 case 3: // encountered =<digit> so far; expecting another <digit> to complete the octet
182 if ((b >= '0' && b <= '9') || (b >= 'A' && b <= 'F') || (b >= 'a' && b <= 'f')) {
183 byte msd = asciiCharToNumericValue(msdChar);
184 byte low = asciiCharToNumericValue(b);
185 state = 0;
186 byteq.enqueue((byte)((msd << 4) | low));
187 break;
188 } else {
189 if (log.isWarnEnabled()) {
190 log.warn("Malformed MIME; expected "
191 + "[0-9A-Z], got " + b);
192 }
193 state = 0;
194 byteq.enqueue((byte)'=');
195 byteq.enqueue(msdChar);
196 byteq.enqueue(b);
197 break;
198 }
199 default: // should never happen
200 log.error("Illegal state: " + state);
201 state = 0;
202 byteq.enqueue(b);
203 break;
204 }
205 }
206 }
207
208 /**
209 * Converts '0' => 0, 'A' => 10, etc.
210 * @param c ASCII character value.
211 * @return Numeric value of hexadecimal character.
212 */
213 private byte asciiCharToNumericValue(byte c) {
214 if (c >= '0' && c <= '9') {
215 return (byte)(c - '0');
216 } else if (c >= 'A' && c <= 'Z') {
217 return (byte)(0xA + (c - 'A'));
218 } else if (c >= 'a' && c <= 'z') {
219 return (byte)(0xA + (c - 'a'));
220 } else {
221 /*
222 * This should never happen since all calls to this method
223 * are preceded by a check that c is in [0-9A-Za-z]
224 */
225 throw new IllegalArgumentException((char) c
226 + " is not a hexadecimal digit");
227 }
228 }
229
230 }