View Javadoc

1   /****************************************************************
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   ****************************************************************/
19  
20  
21  
22  package org.apache.james.util.bayesian;
23  
24  import org.apache.james.util.sql.JDBCUtil;
25  import org.apache.james.util.sql.SqlResources;
26  
27  import java.util.HashMap;
28  import java.util.Iterator;
29  import java.util.Map;
30  
31  import java.io.File;
32  
33  import java.sql.Connection;
34  import java.sql.PreparedStatement;
35  import java.sql.ResultSet;
36  import java.sql.SQLException;
37  import java.sql.DatabaseMetaData;
38  
39  /**
40   * Manages the persistence of the spam bayesian analysis corpus using a JDBC database.
41   *
42   * <p>This class is abstract to allow implementations to 
43   * take advantage of different logging capabilities/interfaces in
44   * different parts of the code.</p>
45  
46   * @version CVS $Revision: 684527 $ $Date: 2008-08-10 16:53:29 +0100 (Sun, 10 Aug 2008) $
47   * @since 2.3.0
48   */
49  
50  abstract public class JDBCBayesianAnalyzer
51  extends BayesianAnalyzer {
52      
53      /**
54       *Public object representing a lock on database activity.
55       */
56      public final static String DATABASE_LOCK = "database lock";
57      
58      /**
59       * An abstract method which child classes override to handle logging of
60       * errors in their particular environments.
61       *
62       * @param errorString the error message generated
63       */
64      abstract protected void delegatedLog(String errorString);
65  
66      /**
67       * The JDBCUtil helper class
68       */
69      private final JDBCUtil theJDBCUtil = new JDBCUtil() {
70          protected void delegatedLog(String logString) {
71              this.delegatedLog(logString);
72          }
73      };
74      
75      /**
76       * Contains all of the sql strings for this component.
77       */
78      private SqlResources sqlQueries = new SqlResources();
79  
80      /**
81       * Holds value of property sqlFileName.
82       */
83      private String sqlFileName;
84      
85      private File sqlFile;
86  
87      /**
88       * Holds value of property sqlParameters.
89       */
90      private Map sqlParameters = new HashMap();
91  
92      /**
93       * Holds value of property lastDatabaseUpdateTime.
94       */
95      private static long lastDatabaseUpdateTime;
96      
97      /**
98       * Getter for property sqlFileName.
99       * @return Value of property sqlFileName.
100      */
101     public String getSqlFileName() {
102 
103         return this.sqlFileName;
104     }
105 
106     /**
107      * Setter for property sqlFileName.
108      * @param sqlFileName New value of property sqlFileName.
109      */
110     public void setSqlFileName(String sqlFileName) {
111 
112         this.sqlFileName = sqlFileName;
113     }
114 
115     /**
116      * Getter for property sqlParameters.
117      * @return Value of property sqlParameters.
118      */
119     public Map getSqlParameters() {
120 
121         return this.sqlParameters;
122     }
123 
124     /**
125      * Setter for property sqlParameters.
126      * @param sqlParameters New value of property sqlParameters.
127      */
128     public void setSqlParameters(Map sqlParameters) {
129 
130         this.sqlParameters = sqlParameters;
131     }
132 
133     /**
134      * Getter for static lastDatabaseUpdateTime.
135      * @return Value of property lastDatabaseUpdateTime.
136      */
137     public static long getLastDatabaseUpdateTime() {
138 
139         return lastDatabaseUpdateTime;
140     }
141 
142     /**
143      * Sets static lastDatabaseUpdateTime to System.currentTimeMillis().
144      */
145     public static void touchLastDatabaseUpdateTime() {
146 
147         lastDatabaseUpdateTime = System.currentTimeMillis();
148     }
149 
150     /**
151      * Default constructor.
152      */
153     public JDBCBayesianAnalyzer() {
154     }
155         
156     /**
157      * Loads the token frequencies from the database.
158      * @param conn The connection for accessing the database
159      * @throws SQLException If a database error occurs
160      */
161     public void loadHamNSpam(Connection conn)
162     throws java.sql.SQLException {
163         PreparedStatement pstmt = null;
164         ResultSet rs = null;
165         
166         try {
167             pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectHamTokens", true));
168             rs = pstmt.executeQuery();
169             
170             Map ham = getHamTokenCounts();
171             while (rs.next()) {
172                 String token = rs.getString(1);
173                 int count = rs.getInt(2);
174                 // to reduce memory, use the token only if the count is > 1
175                 if (count > 1) {
176                     ham.put(token, new Integer(count));
177                 }
178             }
179             //Verbose.
180             delegatedLog("Ham tokens count: " + ham.size());
181             
182             rs.close();
183             pstmt.close();
184                         
185             //Get the spam tokens/counts.
186             pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectSpamTokens", true));
187             rs = pstmt.executeQuery();
188             
189             Map spam = getSpamTokenCounts();
190             while (rs.next()) {
191                 String token = rs.getString(1);
192                 int count = rs.getInt(2);
193                 // to reduce memory, use the token only if the count is > 1
194                 if (count > 1) {
195                     spam.put(token, new Integer(count));
196                 }
197             }
198             
199             //Verbose.
200             delegatedLog("Spam tokens count: " + spam.size());
201             
202             rs.close();
203             pstmt.close();
204                         
205             //Get the ham/spam message counts.
206             pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectMessageCounts", true));
207             rs = pstmt.executeQuery();
208             if (rs.next()) {
209                 setHamMessageCount(rs.getInt(1));
210                 setSpamMessageCount(rs.getInt(2));
211             }
212             
213             rs.close();
214             pstmt.close();
215             
216         } finally {
217             if (rs != null) {
218                 try {
219                     rs.close();
220                 } catch (java.sql.SQLException se) {
221                 }
222                 
223                 rs = null;
224             }
225             
226             if (pstmt != null) {
227                 try {
228                     pstmt.close();
229                 } catch (java.sql.SQLException se) {
230                 }
231                 
232                 pstmt = null;
233             }
234         }
235     }
236     
237     /**
238      * Updates the database with new "ham" token frequencies.
239      * @param conn The connection for accessing the database
240      * @throws SQLException If a database error occurs
241      */
242     public void updateHamTokens(Connection conn)
243     throws java.sql.SQLException {
244         updateTokens(conn, getHamTokenCounts(),
245                 sqlQueries.getSqlString("insertHamToken", true),
246                 sqlQueries.getSqlString("updateHamToken", true));
247         
248         setMessageCount(conn, sqlQueries.getSqlString("updateHamMessageCounts", true), getHamMessageCount());
249     }
250     
251     /**
252      * Updates the database with new "spam" token frequencies.
253      * @param conn The connection for accessing the database
254      * @throws SQLException If a database error occurs
255      */
256     public void updateSpamTokens(Connection conn)
257     throws java.sql.SQLException {
258          updateTokens(conn, getSpamTokenCounts(),
259                 sqlQueries.getSqlString("insertSpamToken", true),
260                 sqlQueries.getSqlString("updateSpamToken", true));
261        
262         setMessageCount(conn, sqlQueries.getSqlString("updateSpamMessageCounts", true), getSpamMessageCount());
263     }
264     
265     /**
266      * Reset all trained data
267      * 
268      * @param conn The connection for accessing the database
269      * @throws SQLException If a dtabase error occours
270      */
271     public void resetData(Connection conn) throws SQLException {
272         deleteData(conn,sqlQueries.getSqlString("deleteHamTokens",true));
273         deleteData(conn,sqlQueries.getSqlString("deleteSpamTokens",true));
274         deleteData(conn,sqlQueries.getSqlString("deleteMessageCounts",true));
275     }
276     
277     private void setMessageCount(Connection conn, String sqlStatement, int count)
278     throws java.sql.SQLException {
279         PreparedStatement init = null;
280         PreparedStatement update = null;
281         
282         try {
283             //set the ham/spam message counts.
284             init = conn.prepareStatement(sqlQueries.getSqlString("initializeMessageCounts", true));
285             update = conn.prepareStatement(sqlStatement);
286             
287             update.setInt(1, count);
288             
289             if (update.executeUpdate() == 0) {
290                 init.executeUpdate();
291                 update.executeUpdate();
292             }
293 
294         } finally {
295             if (init != null) {
296                 try {
297                     init.close();
298                 } catch (java.sql.SQLException ignore) {
299                 }
300             }
301             if (update != null) {
302                 try {
303                     update.close();
304                 } catch (java.sql.SQLException ignore) {
305                 }
306             }
307         }
308     }
309     
310     private void updateTokens(Connection conn, Map tokens, String insertSqlStatement, String updateSqlStatement)
311     throws java.sql.SQLException {
312         PreparedStatement insert = null;
313         PreparedStatement update = null;
314         
315         try {
316             //Used to insert new token entries.
317             insert = conn.prepareStatement(insertSqlStatement);
318             
319             //Used to update existing token entries.
320             update = conn.prepareStatement(updateSqlStatement);
321             
322             Iterator i = tokens.keySet().iterator();
323             while (i.hasNext()) {
324                 String key = (String) i.next();
325                 int value = ((Integer) tokens.get(key)).intValue();
326                 
327                 update.setInt(1, value);
328                 update.setString(2, key);
329                 
330                 //If the update affected 0 (zero) rows, then the token hasn't been
331                 //encountered before, and we need to add it to the corpus.
332                 if (update.executeUpdate() == 0) {
333                     insert.setString(1, key);
334                     insert.setInt(2, value);
335                     
336                     insert.executeUpdate();
337                 }
338             }
339         } finally {
340             if (insert != null) {
341                 try {
342                     insert.close();
343                 } catch (java.sql.SQLException ignore) {
344                 }
345                 
346                 insert = null;
347             }
348             
349             if (update != null) {
350                 try {
351                     update.close();
352                 } catch (java.sql.SQLException ignore) {
353                 }
354                 
355                 update = null;
356             }
357         }
358     }
359     
360     /**
361      * Initializes the sql query environment from the SqlResources file.
362      * Will look for conf/sqlResources.xml.
363      * @param conn The connection for accessing the database
364      * @param file The sqlResources.xml file
365      * @throws Exception If any error occurs
366      */
367     public void initSqlQueries(Connection conn, String file) throws Exception {
368         try {
369             if (conn.getAutoCommit()) {
370                 conn.setAutoCommit(false);
371             }
372             
373             this.sqlFile = new File(file).getCanonicalFile();
374             sqlQueries.init(this.sqlFile, JDBCBayesianAnalyzer.class.getName() , conn, getSqlParameters());
375             
376             checkTables(conn);
377         } finally {
378             theJDBCUtil.closeJDBCConnection(conn);
379         }
380     }
381     
382     private void checkTables(Connection conn) throws SQLException {
383         DatabaseMetaData dbMetaData = conn.getMetaData();
384         // Need to ask in the case that identifiers are stored, ask the DatabaseMetaInfo.
385         // Try UPPER, lower, and MixedCase, to see if the table is there.
386         
387         boolean dbUpdated = false;
388         
389         dbUpdated = createTable(conn, "hamTableName", "createHamTable");
390         
391         dbUpdated = createTable(conn, "spamTableName", "createSpamTable");
392         
393         dbUpdated = createTable(conn, "messageCountsTableName", "createMessageCountsTable");
394         
395         //Commit our changes if necessary.
396         if (conn != null && dbUpdated && !conn.getAutoCommit()) {
397             conn.commit();
398             dbUpdated = false;
399         }
400             
401     }
402     
403     private boolean createTable(Connection conn, String tableNameSqlStringName, String createSqlStringName) throws SQLException {
404         String tableName = sqlQueries.getSqlString(tableNameSqlStringName, true);
405         
406         DatabaseMetaData dbMetaData = conn.getMetaData();
407 
408         // Try UPPER, lower, and MixedCase, to see if the table is there.
409         if (theJDBCUtil.tableExists(dbMetaData, tableName)) {
410             return false;
411         }
412         
413         PreparedStatement createStatement = null;
414         
415         try {
416             createStatement =
417                     conn.prepareStatement(sqlQueries.getSqlString(createSqlStringName, true));
418             createStatement.execute();
419             
420             StringBuffer logBuffer = null;
421             logBuffer =
422                     new StringBuffer(64)
423                     .append("Created table '")
424                     .append(tableName)
425                     .append("' using sqlResources string '")
426                     .append(createSqlStringName)
427                     .append("'.");
428             delegatedLog(logBuffer.toString());
429             
430         } finally {
431             theJDBCUtil.closeJDBCStatement(createStatement);
432         }
433         
434         return true;
435     }
436     
437     private void deleteData(Connection conn, String deleteSqlStatement) throws SQLException {
438         PreparedStatement delete = null;
439         
440         try {
441             //Used to delete ham tokens
442             delete = conn.prepareStatement(deleteSqlStatement);
443             delete.executeUpdate();
444         } finally {
445             if (delete != null) {
446                 try {
447                     delete.close();
448                 } catch (java.sql.SQLException ignore) {
449                 }
450                 
451                 delete = null;
452             }
453         }
454     }
455 }