View Javadoc

1   /****************************************************************
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   ****************************************************************/
19  
20  package org.apache.james.transport.mailets;
21  
22  import org.apache.mailet.base.FlowedMessageUtils;
23  import org.apache.mailet.base.GenericMailet;
24  import org.apache.mailet.Mail;
25  import org.apache.mailet.MailetException;
26  
27  import javax.mail.MessagingException;
28  
29  import java.io.IOException;
30  import java.util.regex.Matcher;
31  import java.util.regex.Pattern;
32  
33  /**
34   * <p>Remove (best effort to) the hardcoded wrapping from a message.<br>
35   * If the text is  "format=flowed" then deflows the text. Otherwise it forces a dewrap of the text.
36   * </p>
37   * <p>Parameters:<br> 
38   * quotewidth - when we try to dewrap e quoted text it helps knowing the original
39   * with, so we can reconstruct "wrapped wraps" created by multiple wrappings by clients with
40   * different original width or simply to the add of the heading ">" that increase the line
41   * length.<br>
42   * The value should be "WIDTH+X" if the original length is known, "-X" otherwise.
43   * In the latter case the length of the longer line will be used.
44   * X is the tollerance needed for the quoting chars: if the original width is known the suggested
45   * value for X is 2 (because of "> " prefix), otherwise it is suggested to increase it to a value 
46   * like 10 (-10)</p>
47   * 
48   * <p>
49   * In summary, if the original wrap is known (for example 76, for flowed messages)<br>
50   *  <code>quotewidth = 78</code><br>
51   * Otherwise<br>
52   *  <code>quotewidth = -10</code>
53   * </p>
54   */
55  public class UnwrapText extends GenericMailet {
56      public final static String PARAMETER_NAME_QUOTEWIDTH = "quotewidth";
57      
58      private int quotewidth;
59      
60      /**
61       * returns a String describing this mailet.
62       * 
63       * @return A desciption of this mailet
64       */
65      public String getMailetInfo() {
66          return "UnwrapText";
67      }
68  
69      public void init() throws MailetException {
70          quotewidth = Integer.parseInt(getInitParameter(PARAMETER_NAME_QUOTEWIDTH, "-10"));
71      }
72  
73      public void service(Mail mail) throws MailetException {
74          try {
75              // TODO replace non standard quotes (at least "> " with ">", otherwise the widely used  "> > >" will not work.
76              
77              if (FlowedMessageUtils.isFlowedTextMessage(mail.getMessage()))
78                  FlowedMessageUtils.deflowMessage(mail.getMessage());
79              
80              else {
81                  Object o = mail.getMessage().getContent();
82                  if (o instanceof String) {
83                      String unwrapped = unwrap((String) o, quotewidth);
84                      mail.getMessage().setContent(unwrapped, mail.getMessage().getContentType());
85                      mail.getMessage().saveChanges();
86                  }
87              }
88              
89          } catch (MessagingException e) {
90              throw new MailetException("Could not unwrap message", e);
91              
92          } catch (IOException e) {
93              throw new MailetException("Could not unwrap message", e);
94          }
95          
96      }
97      
98      public static String unwrap(String text) {
99          return unwrap(text, - 10);
100     }
101 
102     public static String unwrap(String text, int qwidth) {
103         String[] lines = text.split("\r\n|\n", -1);
104         
105         //P1: Manage spaces without trims
106         Pattern p1 = Pattern.compile("([> ]*)(.*[^ .?!][ ]*)$", 0);
107         
108         //P2: Quotation char at the begin of a line and the first word starts with a lowercase char or a number. The word ends with a space, a tab or a lineend. 
109         Pattern p2 = Pattern.compile("^([> ]*)(([a-z\u00E0\u00E8\u00E9\u00EC\u00F2\u00F9][^ \t\r\n]*|[0-9][0-9,.]*)([ \t].*$|$))", 0);
110         
111         // Width computation
112         int width = 0;
113         for (int i = 0; i < lines.length - 1; i++) {
114             String l = lines[i].trim();
115             if (l.length() > width) width = l.length();
116         }
117         
118         if (width < 40) return text;
119         if (qwidth < 0) qwidth = width - qwidth;
120         
121         StringBuffer result = new StringBuffer();
122         int prevWrapped = 0;
123         for (int i = 0; i < lines.length; i++) {
124             if (prevWrapped != 0) {
125                 if (prevWrapped > 0 ) {
126                     if (result.charAt(result.length() - 1) != ' ') result.append(" ");
127                 }
128                 else result.append("\r\n");
129             }
130             String l = lines[i];
131             Matcher m1 = p1.matcher(l);
132             Matcher m2 = i < lines.length - 1 ? p2.matcher(lines[i + 1]) : null;
133             boolean b;
134             int w;
135             // if patterns match, the quote level are identical and if the line length added to the length of the following word is greater than width then it is a wrapped line.
136             if (m1.matches() && i < lines.length - 1 && m2.matches() && (
137                     // The following line has the same quoting of the previous.
138                     ((b = m1.group(1).trim().equals(m2.group(1).trim())) && l.length() + m2.group(3).length() + 1 > width)
139                     ||
140                     // The following line has no quoting (while the previous yes)
141                     (!b && m2.group(1).trim().equals("") && (w = l.length() + m2.group(2).trim().length() + 1) > width && w <= qwidth)
142                 )) {
143                 
144                 if (b) {
145                     if (prevWrapped > 0 && m1.groupCount() >= 2) result.append(m1.group(2));
146                     else result.append(l);
147                     prevWrapped = 1;
148                     
149                 } else {
150                     lines[i + 1] = l + (l.charAt(l.length() - 1) != ' ' ? " " : "") + m2.group(2).trim();
151                     // Revert the previous append
152                     if (prevWrapped != 0) {
153                         if (prevWrapped > 0) result.deleteCharAt(result.length() - 1);
154                         else result.delete(result.length() - 2, result.length());
155                     }
156                 }
157                 
158             } else {
159                 Matcher m3 = p2.matcher(l);
160                 if (prevWrapped > 0 && m3.matches()) result.append(m3.group(2));
161                 else result.append(lines[i]);
162                 prevWrapped = -1;
163             }
164         }
165         
166         return result.toString();
167     }
168     
169 }