1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.apache.james.management.impl;
24
25 import java.io.BufferedReader;
26 import java.io.File;
27 import java.io.FileNotFoundException;
28 import java.io.FileOutputStream;
29 import java.io.FileReader;
30 import java.io.IOException;
31 import java.io.InputStreamReader;
32 import java.io.PrintWriter;
33 import java.io.InputStream;
34 import java.sql.SQLException;
35 import java.sql.Connection;
36 import java.util.Map;
37
38 import net.fortuna.mstor.data.MboxFile;
39
40 import org.apache.avalon.cornerstone.services.datasources.DataSourceSelector;
41 import org.apache.avalon.excalibur.datasource.DataSourceComponent;
42 import org.apache.avalon.framework.activity.Initializable;
43 import org.apache.avalon.framework.configuration.Configurable;
44 import org.apache.avalon.framework.configuration.Configuration;
45 import org.apache.avalon.framework.configuration.ConfigurationException;
46 import org.apache.avalon.framework.service.ServiceException;
47 import org.apache.avalon.framework.service.ServiceManager;
48 import org.apache.avalon.framework.service.Serviceable;
49 import org.apache.james.management.BayesianAnalyzerManagementException;
50 import org.apache.james.management.BayesianAnalyzerManagementMBean;
51 import org.apache.james.management.BayesianAnalyzerManagementService;
52 import org.apache.james.services.FileSystem;
53 import org.apache.james.util.bayesian.JDBCBayesianAnalyzer;
54
55 import com.thoughtworks.xstream.XStream;
56 import com.thoughtworks.xstream.io.xml.DomDriver;
57
58
59
60
61 public class BayesianAnalyzerManagement implements BayesianAnalyzerManagementService, Serviceable, Initializable, Configurable, BayesianAnalyzerManagementMBean {
62
63 private final static String HAM = "HAM";
64 private final static String SPAM = "SPAM";
65 private DataSourceSelector selector;
66 private DataSourceComponent component;
67 private String repos;
68 private String sqlFileUrl;
69 private FileSystem fileSystem;
70
71
72
73
74 public void service(ServiceManager arg0) throws ServiceException {
75 DataSourceSelector selector = (DataSourceSelector) arg0.lookup(DataSourceSelector.ROLE);
76 setDataSourceSelector(selector);
77 setFileSystem((FileSystem) arg0.lookup(FileSystem.ROLE));
78 }
79
80
81
82
83
84
85 private void setFileSystem(FileSystem system) {
86 this.fileSystem = system;
87 }
88
89
90
91
92 public void initialize() throws Exception {
93 if (repos != null) {
94 setDataSourceComponent((DataSourceComponent) selector.select(repos));
95 File sqlFile = fileSystem.getFile(sqlFileUrl);
96 analyzer.initSqlQueries(component.getConnection(), sqlFile.getAbsolutePath());
97 }
98 }
99
100
101
102
103 public void configure(Configuration arg0) throws ConfigurationException {
104 Configuration reposPath = arg0.getChild("repositoryPath",false);
105 if (reposPath != null) {
106 setRepositoryPath(reposPath.getValue());
107 }
108 sqlFileUrl = arg0.getChild("sqlFile").getValue();
109 if (sqlFileUrl == null) sqlFileUrl = "file://conf/sqlResources.xml";
110 }
111
112
113
114
115
116
117 public void setRepositoryPath(String repositoryPath) {
118 repos = repositoryPath.substring(5);
119 }
120
121
122
123
124
125
126 public void setDataSourceSelector (DataSourceSelector selector) {
127 this.selector = selector;
128 }
129
130
131
132
133
134
135 public void setDataSourceComponent(DataSourceComponent component) {
136 this.component = component;
137 }
138
139
140
141
142 public int addHamFromDir(String dir) throws BayesianAnalyzerManagementException {
143 if (repos == null) throw new BayesianAnalyzerManagementException("RepositoryPath not configured");
144
145 return feedBayesianAnalyzerFromDir(dir,HAM);
146 }
147
148
149
150
151 public int addSpamFromDir(String dir) throws BayesianAnalyzerManagementException {
152 if (repos == null) throw new BayesianAnalyzerManagementException("RepositoryPath not configured");
153
154 return feedBayesianAnalyzerFromDir(dir,SPAM);
155 }
156
157
158
159
160 public int addHamFromMbox(String file) throws BayesianAnalyzerManagementException {
161 if (repos == null) throw new BayesianAnalyzerManagementException("RepositoryPath not configured");
162 return feedBayesianAnalyzerFromMbox(file,HAM);
163 }
164
165
166
167
168 public int addSpamFromMbox(String file) throws BayesianAnalyzerManagementException {
169 if (repos == null) throw new BayesianAnalyzerManagementException("RepositoryPath not configured");
170 return feedBayesianAnalyzerFromMbox(file,SPAM);
171 }
172
173
174
175
176
177
178
179
180
181
182 private int feedBayesianAnalyzerFromDir(String dir, String type) throws BayesianAnalyzerManagementException {
183
184
185 analyzer.clear();
186
187 File tmpFile = new File(dir);
188 int count = 0;
189
190 synchronized(JDBCBayesianAnalyzer.DATABASE_LOCK) {
191
192
193 if (tmpFile.isDirectory()) {
194 File[] files = tmpFile.listFiles();
195
196 for (int i = 0; i < files.length; i++) {
197 BufferedReader stream = null;
198 try {
199 stream = new BufferedReader(new FileReader(files[i]));
200 } catch (FileNotFoundException e) {
201 throw new BayesianAnalyzerManagementException("acessing mail file failed.", e);
202 }
203 addMailToCorpus(type, stream);
204 count++;
205 }
206
207 updateTokens(type);
208
209 } else {
210 throw new IllegalArgumentException("Please provide an valid directory");
211 }
212 }
213
214 return count;
215 }
216
217
218
219
220
221
222
223 private void updateTokens(String type) throws BayesianAnalyzerManagementException {
224
225 try {
226 Connection connection = component.getConnection();
227 if (type.equalsIgnoreCase(HAM)) {
228 analyzer.updateHamTokens(connection);
229 } else if (type.equalsIgnoreCase(SPAM)) {
230 analyzer.updateSpamTokens(connection);
231 }
232 } catch (SQLException e) {
233 throw new BayesianAnalyzerManagementException("updating tokens failed.", e);
234 }
235 }
236
237
238
239
240
241
242
243
244 private void addMailToCorpus(String type, BufferedReader stream) throws BayesianAnalyzerManagementException {
245 try {
246 if (type.equalsIgnoreCase(HAM)) {
247 analyzer.addHam(stream);
248 } else if (type.equalsIgnoreCase(SPAM)) {
249 analyzer.addSpam(stream);
250 }
251 } catch (IOException e) {
252 throw new BayesianAnalyzerManagementException("adding to corpus failed.", e);
253 }
254 }
255
256
257
258
259
260
261
262
263
264
265 private int feedBayesianAnalyzerFromMbox(String mboxFile, String type) throws BayesianAnalyzerManagementException {
266 int count = 0;
267
268
269 analyzer.clear();
270
271 File tmpFile = new File(mboxFile);
272
273 if (MboxFile.isValid(tmpFile)) {
274 MboxFile mbox = new MboxFile(tmpFile,MboxFile.READ_ONLY);
275
276 synchronized(JDBCBayesianAnalyzer.DATABASE_LOCK) {
277 int messageCount = 0;
278 try {
279 messageCount = mbox.getMessageCount();
280 } catch (IOException e) {
281 throw new BayesianAnalyzerManagementException(e);
282 }
283 for (int i = 0; i < messageCount; i++) {
284 InputStream message = null;
285 try {
286 message = mbox.getMessageAsStream(i);
287 } catch (IOException e) {
288 throw new BayesianAnalyzerManagementException("could not access mail from mbox streanm", e);
289 }
290 BufferedReader stream = new BufferedReader(new InputStreamReader(message));
291 addMailToCorpus(type, stream);
292 count++;
293 }
294
295
296 updateTokens(type);
297 }
298 } else {
299 throw new IllegalArgumentException("Please provide an valid mbox file");
300 }
301
302 return count;
303 }
304
305
306
307
308 public void exportData(String file) throws BayesianAnalyzerManagementException {
309 if (repos == null) throw new BayesianAnalyzerManagementException("RepositoryPath not configured");
310
311 synchronized(JDBCBayesianAnalyzer.DATABASE_LOCK) {
312 try {
313 analyzer.loadHamNSpam(component.getConnection());
314 } catch (SQLException e) {
315 throw new BayesianAnalyzerManagementException("loading ham and spam failed.", e);
316 }
317
318 int hamMessageCount = analyzer.getHamMessageCount();
319 int spamMessageCount = analyzer.getSpamMessageCount();
320 Map hamTokenCounts = analyzer.getHamTokenCounts();
321 Map spamTokenCounts = analyzer.getSpamTokenCounts();
322
323 XStream xstream = new XStream(new DomDriver());
324 xstream.alias("bayesianAnalyzer", BayesianAnalyzerXml.class);
325 FileOutputStream fileOutputStream = null;
326 try {
327 fileOutputStream = new FileOutputStream(file);
328 } catch (FileNotFoundException e) {
329 throw new BayesianAnalyzerManagementException("opening export file failed", e);
330 }
331 PrintWriter printwriter = new PrintWriter(fileOutputStream);
332 printwriter.println(xstream.toXML(new BayesianAnalyzerXml(hamMessageCount,spamMessageCount,hamTokenCounts,spamTokenCounts)));
333 printwriter.close();
334 }
335 }
336
337
338
339
340 public void importData(String file) throws BayesianAnalyzerManagementException {
341 if (repos == null) throw new BayesianAnalyzerManagementException("RepositoryPath not configured");
342
343 synchronized(JDBCBayesianAnalyzer.DATABASE_LOCK){
344 XStream xstream = new XStream(new DomDriver());
345
346 FileReader fileReader = null;
347 try {
348 fileReader = new FileReader(file);
349 } catch (FileNotFoundException e) {
350 throw new BayesianAnalyzerManagementException("opening input file failed", e);
351 }
352 BayesianAnalyzerXml bAnalyzerXml = (BayesianAnalyzerXml) xstream.fromXML(fileReader);
353
354
355 analyzer.clear();
356 analyzer.tokenCountsClear();
357
358
359
360
361 analyzer.setHamMessageCount(bAnalyzerXml.getHamMessageCount());
362 analyzer.setSpamMessageCount(bAnalyzerXml.getSpamMessageCount());
363 analyzer.setHamTokenCounts(bAnalyzerXml.getHamTokenCounts());
364 analyzer.setSpamTokenCounts(bAnalyzerXml.getSpamTokenCounts());
365 updateTokens(HAM);
366 updateTokens(SPAM);
367 }
368
369 }
370
371 private JDBCBayesianAnalyzer analyzer = new JDBCBayesianAnalyzer() {
372 protected void delegatedLog(String logString) {
373
374 }
375 };
376
377
378
379
380
381 public void resetData() throws BayesianAnalyzerManagementException {
382 synchronized(JDBCBayesianAnalyzer.DATABASE_LOCK) {
383 try {
384 analyzer.resetData(component.getConnection());
385 } catch (SQLException e) {
386 throw new BayesianAnalyzerManagementException(e.getMessage());
387 }
388 }
389
390 }
391
392
393
394
395 private static class BayesianAnalyzerXml {
396 private int hamMessageCount = 0;
397 private int spamMessageCount = 0;
398 private Map hamTokenCounts;
399 private Map spamTokenCounts;
400
401
402
403
404
405
406
407
408
409 public BayesianAnalyzerXml(int hamMessageCount, int spamMessageCount, Map hamTokenCounts, Map spamTokenCounts) {
410 this.hamMessageCount = hamMessageCount;
411 this.spamMessageCount = spamMessageCount;
412 this.hamTokenCounts = hamTokenCounts;
413 this.spamTokenCounts = spamTokenCounts;
414 }
415
416
417
418
419
420
421 public int getHamMessageCount() {
422 return hamMessageCount;
423 }
424
425
426
427
428
429
430 public int getSpamMessageCount() {
431 return spamMessageCount;
432 }
433
434
435
436
437
438
439 public Map getHamTokenCounts() {
440 return hamTokenCounts;
441 }
442
443
444
445
446
447
448 public Map getSpamTokenCounts() {
449 return spamTokenCounts;
450 }
451
452 }
453
454 }