View Javadoc

1   /****************************************************************
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   ****************************************************************/
19  
20  package org.apache.james.mime4j.codec;
21  
22  import java.io.IOException;
23  import java.io.InputStream;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  
28  /**
29   * Performs Quoted-Printable decoding on an underlying stream.
30   */
31  public class QuotedPrintableInputStream extends InputStream {
32      private static Log log = LogFactory.getLog(QuotedPrintableInputStream.class);
33      
34      private InputStream stream;
35      ByteQueue byteq = new ByteQueue();
36      ByteQueue pushbackq = new ByteQueue();
37      private byte state = 0;
38      private boolean closed = false;
39  
40      public QuotedPrintableInputStream(InputStream stream) {
41          this.stream = stream;
42      }
43      
44      /**
45       * Terminates Quoted-Printable coded content. This method does NOT close 
46       * the underlying input stream.
47       * 
48       * @throws IOException on I/O errors.
49       */
50      @Override
51      public void close() throws IOException {
52          this.closed = true;
53      }
54  
55      @Override
56      public int read() throws IOException {
57          if (closed) {
58              throw new IOException("QuotedPrintableInputStream has been closed");
59          }
60          fillBuffer();
61          if (byteq.count() == 0)
62              return -1;
63          else {
64              byte val = byteq.dequeue();
65              if (val >= 0)
66                  return val;
67              else
68                  return val & 0xFF;
69          }
70      }
71  
72      /**
73       * Pulls bytes out of the underlying stream and places them in the
74       * pushback queue.  This is necessary (vs. reading from the
75       * underlying stream directly) to detect and filter out "transport
76       * padding" whitespace, i.e., all whitespace that appears immediately
77       * before a CRLF.
78       *
79       * @throws IOException Underlying stream threw IOException.
80       */
81      private void populatePushbackQueue() throws IOException {
82          //Debug.verify(pushbackq.count() == 0, "PopulatePushbackQueue called when pushback queue was not empty!");
83  
84          if (pushbackq.count() != 0)
85              return;
86  
87          while (true) {
88              int i = stream.read();
89              switch (i) {
90                  case -1:
91                      // stream is done
92                      pushbackq.clear();  // discard any whitespace preceding EOF
93                      return;
94                  case ' ':
95                  case '\t':
96                      pushbackq.enqueue((byte)i);
97                      break;
98                  case '\r':
99                  case '\n':
100                     pushbackq.clear();  // discard any whitespace preceding EOL
101                     pushbackq.enqueue((byte)i);
102                     return;
103                 default:
104                     pushbackq.enqueue((byte)i);
105                     return;
106             }
107         }
108     }
109 
110     /**
111      * Causes the pushback queue to get populated if it is empty, then
112      * consumes and decodes bytes out of it until one or more bytes are
113      * in the byte queue.  This decoding step performs the actual QP
114      * decoding.
115      *
116      * @throws IOException Underlying stream threw IOException.
117      */
118     private void fillBuffer() throws IOException {
119         byte msdChar = 0;  // first digit of escaped num
120         while (byteq.count() == 0) {
121             if (pushbackq.count() == 0) {
122                 populatePushbackQueue();
123                 if (pushbackq.count() == 0)
124                     return;
125             }
126 
127             byte b = pushbackq.dequeue();
128 
129             switch (state) {
130                 case 0:  // start state, no bytes pending
131                     if (b != '=') {
132                         byteq.enqueue(b);
133                         break;  // state remains 0
134                     } else {
135                         state = 1;
136                         break;
137                     }
138                 case 1:  // encountered "=" so far
139                     if (b == '\r') {
140                         state = 2;
141                         break;
142                     } else if ((b >= '0' && b <= '9') || (b >= 'A' && b <= 'F') || (b >= 'a' && b <= 'f')) {
143                         state = 3;
144                         msdChar = b;  // save until next digit encountered
145                         break;
146                     } else if (b == '=') {
147                         /*
148                          * Special case when == is encountered.
149                          * Emit one = and stay in this state.
150                          */
151                         if (log.isWarnEnabled()) {
152                             log.warn("Malformed MIME; got ==");
153                         }
154                         byteq.enqueue((byte)'=');
155                         break;
156                     } else {
157                         if (log.isWarnEnabled()) {
158                             log.warn("Malformed MIME; expected \\r or "
159                                     + "[0-9A-Z], got " + b);
160                         }
161                         state = 0;
162                         byteq.enqueue((byte)'=');
163                         byteq.enqueue(b);
164                         break;
165                     }
166                 case 2:  // encountered "=\r" so far
167                     if (b == '\n') {
168                         state = 0;
169                         break;
170                     } else {
171                         if (log.isWarnEnabled()) {
172                             log.warn("Malformed MIME; expected " 
173                                     + (int)'\n' + ", got " + b);
174                         }
175                         state = 0;
176                         byteq.enqueue((byte)'=');
177                         byteq.enqueue((byte)'\r');
178                         byteq.enqueue(b);
179                         break;
180                     }
181                 case 3:  // encountered =<digit> so far; expecting another <digit> to complete the octet
182                     if ((b >= '0' && b <= '9') || (b >= 'A' && b <= 'F') || (b >= 'a' && b <= 'f')) {
183                         byte msd = asciiCharToNumericValue(msdChar);
184                         byte low = asciiCharToNumericValue(b);
185                         state = 0;
186                         byteq.enqueue((byte)((msd << 4) | low));
187                         break;
188                     } else {
189                         if (log.isWarnEnabled()) {
190                             log.warn("Malformed MIME; expected "
191                                      + "[0-9A-Z], got " + b);
192                         }
193                         state = 0;
194                         byteq.enqueue((byte)'=');
195                         byteq.enqueue(msdChar);
196                         byteq.enqueue(b);
197                         break;
198                     }
199                 default:  // should never happen
200                     log.error("Illegal state: " + state);
201                     state = 0;
202                     byteq.enqueue(b);
203                     break;
204             }
205         }
206     }
207 
208     /**
209      * Converts '0' => 0, 'A' => 10, etc.
210      * @param c ASCII character value.
211      * @return Numeric value of hexadecimal character.
212      */
213     private byte asciiCharToNumericValue(byte c) {
214         if (c >= '0' && c <= '9') {
215             return (byte)(c - '0');
216         } else if (c >= 'A' && c <= 'Z') {
217             return (byte)(0xA + (c - 'A'));
218         } else if (c >= 'a' && c <= 'z') {
219             return (byte)(0xA + (c - 'a'));
220         } else {
221             /*
222              * This should never happen since all calls to this method
223              * are preceded by a check that c is in [0-9A-Za-z]
224              */
225             throw new IllegalArgumentException((char) c 
226                     + " is not a hexadecimal digit");
227         }
228     }
229 
230 }