1 /****************************************************************
2 * Licensed to the Apache Software Foundation (ASF) under one *
3 * or more contributor license agreements. See the NOTICE file *
4 * distributed with this work for additional information *
5 * regarding copyright ownership. The ASF licenses this file *
6 * to you under the Apache License, Version 2.0 (the *
7 * "License"); you may not use this file except in compliance *
8 * with the License. You may obtain a copy of the License at *
9 * *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, *
13 * software distributed under the License is distributed on an *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
15 * KIND, either express or implied. See the License for the *
16 * specific language governing permissions and limitations *
17 * under the License. *
18 ****************************************************************/
19
20 package org.apache.james.transport.mailets;
21
22 import org.apache.mailet.base.FlowedMessageUtils;
23 import org.apache.mailet.base.GenericMailet;
24 import org.apache.mailet.Mail;
25 import org.apache.mailet.MailetException;
26
27 import javax.mail.MessagingException;
28
29 import java.io.IOException;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32
33 /**
34 * <p>Remove (best effort to) the hardcoded wrapping from a message.<br>
35 * If the text is "format=flowed" then deflows the text. Otherwise it forces a dewrap of the text.
36 * </p>
37 * <p>Parameters:<br>
38 * quotewidth - when we try to dewrap e quoted text it helps knowing the original
39 * with, so we can reconstruct "wrapped wraps" created by multiple wrappings by clients with
40 * different original width or simply to the add of the heading ">" that increase the line
41 * length.<br>
42 * The value should be "WIDTH+X" if the original length is known, "-X" otherwise.
43 * In the latter case the length of the longer line will be used.
44 * X is the tollerance needed for the quoting chars: if the original width is known the suggested
45 * value for X is 2 (because of "> " prefix), otherwise it is suggested to increase it to a value
46 * like 10 (-10)</p>
47 *
48 * <p>
49 * In summary, if the original wrap is known (for example 76, for flowed messages)<br>
50 * <code>quotewidth = 78</code><br>
51 * Otherwise<br>
52 * <code>quotewidth = -10</code>
53 * </p>
54 */
55 public class UnwrapText extends GenericMailet {
56 public final static String PARAMETER_NAME_QUOTEWIDTH = "quotewidth";
57
58 private int quotewidth;
59
60 /**
61 * returns a String describing this mailet.
62 *
63 * @return A desciption of this mailet
64 */
65 public String getMailetInfo() {
66 return "UnwrapText";
67 }
68
69 public void init() throws MailetException {
70 quotewidth = Integer.parseInt(getInitParameter(PARAMETER_NAME_QUOTEWIDTH, "-10"));
71 }
72
73 public void service(Mail mail) throws MailetException {
74 try {
75 // TODO replace non standard quotes (at least "> " with ">", otherwise the widely used "> > >" will not work.
76
77 if (FlowedMessageUtils.isFlowedTextMessage(mail.getMessage()))
78 FlowedMessageUtils.deflowMessage(mail.getMessage());
79
80 else {
81 Object o = mail.getMessage().getContent();
82 if (o instanceof String) {
83 String unwrapped = unwrap((String) o, quotewidth);
84 mail.getMessage().setContent(unwrapped, mail.getMessage().getContentType());
85 mail.getMessage().saveChanges();
86 }
87 }
88
89 } catch (MessagingException e) {
90 throw new MailetException("Could not unwrap message", e);
91
92 } catch (IOException e) {
93 throw new MailetException("Could not unwrap message", e);
94 }
95
96 }
97
98 public static String unwrap(String text) {
99 return unwrap(text, - 10);
100 }
101
102 public static String unwrap(String text, int qwidth) {
103 String[] lines = text.split("\r\n|\n", -1);
104
105 //P1: Manage spaces without trims
106 Pattern p1 = Pattern.compile("([> ]*)(.*[^ .?!][ ]*)$", 0);
107
108 //P2: Quotation char at the begin of a line and the first word starts with a lowercase char or a number. The word ends with a space, a tab or a lineend.
109 Pattern p2 = Pattern.compile("^([> ]*)(([a-z\u00E0\u00E8\u00E9\u00EC\u00F2\u00F9][^ \t\r\n]*|[0-9][0-9,.]*)([ \t].*$|$))", 0);
110
111 // Width computation
112 int width = 0;
113 for (int i = 0; i < lines.length - 1; i++) {
114 String l = lines[i].trim();
115 if (l.length() > width) width = l.length();
116 }
117
118 if (width < 40) return text;
119 if (qwidth < 0) qwidth = width - qwidth;
120
121 StringBuffer result = new StringBuffer();
122 int prevWrapped = 0;
123 for (int i = 0; i < lines.length; i++) {
124 if (prevWrapped != 0) {
125 if (prevWrapped > 0 ) {
126 if (result.charAt(result.length() - 1) != ' ') result.append(" ");
127 }
128 else result.append("\r\n");
129 }
130 String l = lines[i];
131 Matcher m1 = p1.matcher(l);
132 Matcher m2 = i < lines.length - 1 ? p2.matcher(lines[i + 1]) : null;
133 boolean b;
134 int w;
135 // if patterns match, the quote level are identical and if the line length added to the length of the following word is greater than width then it is a wrapped line.
136 if (m1.matches() && i < lines.length - 1 && m2.matches() && (
137 // The following line has the same quoting of the previous.
138 ((b = m1.group(1).trim().equals(m2.group(1).trim())) && l.length() + m2.group(3).length() + 1 > width)
139 ||
140 // The following line has no quoting (while the previous yes)
141 (!b && m2.group(1).trim().equals("") && (w = l.length() + m2.group(2).trim().length() + 1) > width && w <= qwidth)
142 )) {
143
144 if (b) {
145 if (prevWrapped > 0 && m1.groupCount() >= 2) result.append(m1.group(2));
146 else result.append(l);
147 prevWrapped = 1;
148
149 } else {
150 lines[i + 1] = l + (l.charAt(l.length() - 1) != ' ' ? " " : "") + m2.group(2).trim();
151 // Revert the previous append
152 if (prevWrapped != 0) {
153 if (prevWrapped > 0) result.deleteCharAt(result.length() - 1);
154 else result.delete(result.length() - 2, result.length());
155 }
156 }
157
158 } else {
159 Matcher m3 = p2.matcher(l);
160 if (prevWrapped > 0 && m3.matches()) result.append(m3.group(2));
161 else result.append(lines[i]);
162 prevWrapped = -1;
163 }
164 }
165
166 return result.toString();
167 }
168
169 }