View Javadoc

1   /****************************************************************
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   ****************************************************************/
19  
20  package org.apache.james.mime4j.parser;
21  
22  import java.io.IOException;
23  import java.util.BitSet;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.james.mime4j.MimeException;
28  import org.apache.james.mime4j.descriptor.BodyDescriptor;
29  import org.apache.james.mime4j.descriptor.DefaultBodyDescriptor;
30  import org.apache.james.mime4j.descriptor.MaximalBodyDescriptor;
31  import org.apache.james.mime4j.descriptor.MutableBodyDescriptor;
32  import org.apache.james.mime4j.io.LineReaderInputStream;
33  import org.apache.james.mime4j.io.MaxHeaderLimitException;
34  import org.apache.james.mime4j.io.MaxLineLimitException;
35  import org.apache.james.mime4j.util.ByteArrayBuffer;
36  import org.apache.james.mime4j.util.CharsetUtil;
37  
38  /**
39   * Abstract MIME entity.
40   */
41  public abstract class AbstractEntity implements EntityStateMachine {
42  
43      protected final Log log;
44      
45      protected final BodyDescriptor parent;
46      protected final int startState;
47      protected final int endState;
48      protected final MimeEntityConfig config;
49      protected final MutableBodyDescriptor body;
50      
51      protected int state;
52  
53      private final ByteArrayBuffer linebuf;
54  
55      private int lineCount;
56      private Field field;
57      private boolean endOfHeader;
58      private int headerCount;
59  
60      private static final BitSet fieldChars = new BitSet();
61  
62      static {
63          for (int i = 0x21; i <= 0x39; i++) {
64              fieldChars.set(i);
65          }
66          for (int i = 0x3b; i <= 0x7e; i++) {
67              fieldChars.set(i);
68          }
69      }
70  
71      /**
72       * Internal state, not exposed.
73       */
74      private static final int T_IN_BODYPART = -2;
75      /**
76       * Internal state, not exposed.
77       */
78      private static final int T_IN_MESSAGE = -3;
79  
80      AbstractEntity(
81              BodyDescriptor parent,
82              int startState, 
83              int endState,
84              MimeEntityConfig config) {
85          this.log = LogFactory.getLog(getClass());        
86          this.parent = parent;
87          this.state = startState;
88          this.startState = startState;
89          this.endState = endState;
90          this.config = config;
91          this.body = newBodyDescriptor(parent);
92          this.linebuf = new ByteArrayBuffer(64);
93          this.lineCount = 0;
94          this.endOfHeader = false;
95          this.headerCount = 0;
96      }
97  
98      public int getState() {
99          return state;
100     }
101     
102     /**
103      * Creates a new instance of {@link BodyDescriptor}. Subclasses may override
104      * this in order to create body descriptors, that provide more specific
105      * information.
106      */
107     protected MutableBodyDescriptor newBodyDescriptor(BodyDescriptor pParent) {
108         final MutableBodyDescriptor result;
109         if (config.isMaximalBodyDescriptor()) {
110             result = new MaximalBodyDescriptor(pParent);
111         } else {
112             result = new DefaultBodyDescriptor(pParent);
113         }
114         return result;
115     }
116 
117     /**
118      * Returns the current line number or <code>-1</code> if line number
119      * information is not available.
120      */
121     protected abstract int getLineNumber();
122     
123     protected abstract LineReaderInputStream getDataStream();
124     
125     private ByteArrayBuffer fillFieldBuffer() throws IOException, MimeException {
126         if (endOfHeader) 
127             throw new IllegalStateException();
128 
129         int maxLineLen = config.getMaxLineLen();
130         LineReaderInputStream instream = getDataStream();
131         ByteArrayBuffer fieldbuf = new ByteArrayBuffer(64);
132 
133         for (;;) {
134             // If there's still data stuck in the line buffer
135             // copy it to the field buffer
136             int len = linebuf.length();
137             if (maxLineLen > 0 && fieldbuf.length() + len >= maxLineLen) {
138                 throw new MaxLineLimitException("Maximum line length limit exceeded");
139             }
140             if (len > 0) {
141                 fieldbuf.append(linebuf.buffer(), 0, len);
142             }
143             linebuf.clear();
144             if (instream.readLine(linebuf) == -1) {
145                 monitor(Event.HEADERS_PREMATURE_END);
146                 endOfHeader = true;
147                 break;
148             }
149             len = linebuf.length();
150             if (len > 0 && linebuf.byteAt(len - 1) == '\n') {
151                 len--;
152             }
153             if (len > 0 && linebuf.byteAt(len - 1) == '\r') {
154                 len--;
155             }
156             if (len == 0) {
157                 // empty line detected 
158                 endOfHeader = true;
159                 break;
160             }
161             lineCount++;
162             if (lineCount > 1) {
163                 int ch = linebuf.byteAt(0);
164                 if (ch != CharsetUtil.SP && ch != CharsetUtil.HT) {
165                     // new header detected
166                     break;
167                 }
168             }
169         }
170 
171         return fieldbuf;
172     }
173 
174     protected boolean parseField() throws MimeException, IOException {
175         int maxHeaderLimit = config.getMaxHeaderCount();
176         for (;;) {
177             if (endOfHeader) {
178                 return false;
179             }
180             if (headerCount >= maxHeaderLimit) {
181                 throw new MaxHeaderLimitException("Maximum header limit exceeded");
182             }
183 
184             ByteArrayBuffer fieldbuf = fillFieldBuffer();
185             headerCount++;
186 
187             // Strip away line delimiter
188             int len = fieldbuf.length();
189             if (len > 0 && fieldbuf.byteAt(len - 1) == '\n') {
190                 len--;
191             }
192             if (len > 0 && fieldbuf.byteAt(len - 1) == '\r') {
193                 len--;
194             }
195             fieldbuf.setLength(len);
196             
197             boolean valid = true;
198             
199             int pos = fieldbuf.indexOf((byte) ':');
200             if (pos <= 0) {
201                 monitor(Event.INALID_HEADER);
202                 valid = false;
203             } else {
204                 for (int i = 0; i < pos; i++) {
205                     if (!fieldChars.get(fieldbuf.byteAt(i) & 0xff)) {
206                         monitor(Event.INALID_HEADER);
207                         valid = false;
208                         break;
209                     }
210                 }
211             }
212             if (valid) {
213                 field = new RawField(fieldbuf, pos);
214                 body.addField(field);            
215                 return true;
216             }
217         }
218     }
219 
220     /**
221      * <p>Gets a descriptor for the current entity.
222      * This method is valid if {@link #getState()} returns:</p>
223      * <ul>
224      * <li>{@link EntityStates#T_BODY}</li>
225      * <li>{@link EntityStates#T_START_MULTIPART}</li>
226      * <li>{@link EntityStates#T_EPILOGUE}</li>
227      * <li>{@link EntityStates#T_PREAMBLE}</li>
228      * </ul>
229      * @return <code>BodyDescriptor</code>, not nulls
230      */
231     public BodyDescriptor getBodyDescriptor() {
232         switch (getState()) {
233         case EntityStates.T_BODY:
234         case EntityStates.T_START_MULTIPART:
235         case EntityStates.T_PREAMBLE:
236         case EntityStates.T_EPILOGUE:
237         case EntityStates.T_END_OF_STREAM:
238             return body;
239         default:
240             throw new IllegalStateException("Invalid state :" + stateToString(state));
241         }
242     }
243 
244     /**
245      * This method is valid, if {@link #getState()} returns {@link EntityStates#T_FIELD}.
246      * @return String with the fields raw contents.
247      * @throws IllegalStateException {@link #getState()} returns another
248      *   value than {@link EntityStates#T_FIELD}.
249      */
250     public Field getField() {
251         switch (getState()) {
252         case EntityStates.T_FIELD:
253             return field;
254         default:
255             throw new IllegalStateException("Invalid state :" + stateToString(state));
256         }
257     }
258 
259     /**
260      * Monitors the given event.
261      * Subclasses may override to perform actions upon events.
262      * Base implementation logs at warn.
263      * @param event <code>Event</code>, not null
264      * @throws MimeException subclasses may elect to throw this exception upon
265      * invalid content
266      * @throws IOException subclasses may elect to throw this exception
267      */
268     protected void monitor(Event event) throws MimeException, IOException {
269         if (config.isStrictParsing()) {
270             throw new MimeParseEventException(event);
271         } else {
272             warn(event);
273         }
274     }
275     
276     /**
277      * Creates an indicative message suitable for display
278      * based on the given event and the current state of the system.
279      * @param event <code>Event</code>, not null
280      * @return message suitable for use as a message in an exception
281      * or for logging
282      */
283     protected String message(Event event) {
284         final String message;
285         if (event == null) {
286             message = "Event is unexpectedly null.";
287         } else {
288             message = event.toString();
289         }
290 
291         int lineNumber = getLineNumber();
292         if (lineNumber <= 0)
293             return message;
294         else
295             return "Line " + lineNumber + ": " + message;
296     }
297     
298     /**
299      * Logs (at warn) an indicative message based on the given event 
300      * and the current state of the system.
301      * @param event <code>Event</code>, not null
302      */
303     protected void warn(Event event) {
304         if (log.isWarnEnabled()) {
305             log.warn(message(event));
306         }
307     }
308     
309     /**
310      * Logs (at debug) an indicative message based on the given event
311      * and the current state of the system.
312      * @param event <code>Event</code>, not null
313      */
314     protected void debug(Event event) {
315         if (log.isDebugEnabled()) {
316             log.debug(message(event));
317         }
318     }
319 
320     @Override
321     public String toString() {
322         return getClass().getName() + " [" + stateToString(state)
323         + "][" + body.getMimeType() + "][" + body.getBoundary() + "]";
324     }
325 
326     /**
327      * Renders a state as a string suitable for logging.
328      * @param state 
329      * @return rendered as string, not null
330      */
331     public static final String stateToString(int state) {
332         final String result;
333         switch (state) {
334             case EntityStates.T_END_OF_STREAM:
335                 result = "End of stream";
336                 break;
337             case EntityStates.T_START_MESSAGE:
338                 result = "Start message";
339                 break;
340             case EntityStates.T_END_MESSAGE:
341                 result = "End message";
342                 break;
343             case EntityStates.T_RAW_ENTITY:
344                 result = "Raw entity";
345                 break;
346             case EntityStates.T_START_HEADER:
347                 result = "Start header";
348                 break;
349             case EntityStates.T_FIELD:
350                 result = "Field";
351                 break;
352             case EntityStates.T_END_HEADER:
353                 result = "End header";
354                 break;
355             case EntityStates.T_START_MULTIPART:
356                 result = "Start multipart";
357                 break;
358             case EntityStates.T_END_MULTIPART:
359                 result = "End multipart";
360                 break;
361             case EntityStates.T_PREAMBLE:
362                 result = "Preamble";
363                 break;
364             case EntityStates.T_EPILOGUE:
365                 result = "Epilogue";
366                 break;
367             case EntityStates.T_START_BODYPART:
368                 result = "Start bodypart";
369                 break;
370             case EntityStates.T_END_BODYPART:
371                 result = "End bodypart";
372                 break;
373             case EntityStates.T_BODY:
374                 result = "Body";
375                 break;
376             case T_IN_BODYPART:
377                 result = "Bodypart";
378                 break;
379             case T_IN_MESSAGE:
380                 result = "In message";
381                 break;
382             default:
383                 result = "Unknown";
384                 break;
385         }
386         return result;
387     }
388     
389 }