View Javadoc

1   /****************************************************************
2    * Licensed to the Apache Software Foundation (ASF) under one   *
3    * or more contributor license agreements.  See the NOTICE file *
4    * distributed with this work for additional information        *
5    * regarding copyright ownership.  The ASF licenses this file   *
6    * to you under the Apache License, Version 2.0 (the            *
7    * "License"); you may not use this file except in compliance   *
8    * with the License.  You may obtain a copy of the License at   *
9    *                                                              *
10   *   http://www.apache.org/licenses/LICENSE-2.0                 *
11   *                                                              *
12   * Unless required by applicable law or agreed to in writing,   *
13   * software distributed under the License is distributed on an  *
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15   * KIND, either express or implied.  See the License for the    *
16   * specific language governing permissions and limitations      *
17   * under the License.                                           *
18   ****************************************************************/
19  
20  package org.apache.james.ai.classic;
21  
22  
23  import java.sql.Connection;
24  import java.sql.DatabaseMetaData;
25  import java.sql.PreparedStatement;
26  import java.sql.ResultSet;
27  import java.sql.SQLException;
28  import java.util.HashMap;
29  import java.util.Map;
30  
31  import org.w3c.dom.Document;
32  
33  /**
34   * Manages the persistence of the spam bayesian analysis corpus using a JDBC
35   * database.
36   * 
37   * <p>
38   * This class is abstract to allow implementations to take advantage of
39   * different logging capabilities/interfaces in different parts of the code.
40   * </p>
41   * 
42   * @since 2.3.0
43   */
44  
45  abstract public class JDBCBayesianAnalyzer extends BayesianAnalyzer {
46  
47      /** Public object representing a lock on database activity. */
48      public final static String DATABASE_LOCK = "database lock";
49  
50      /**
51       * An abstract method which child classes override to handle logging of
52       * errors in their particular environments.
53       * 
54       * @param errorString
55       *            the error message generated
56       */
57      abstract protected void delegatedLog(String errorString);
58  
59      /**
60       * The JDBCUtil helper class
61       */
62      private final JDBCUtil theJDBCUtil = new JDBCUtil() {
63          protected void delegatedLog(String logString) {
64              this.delegatedLog(logString);
65          }
66      };
67  
68      /** Contains all of the sql strings for this component. */
69      private SqlResources sqlQueries = new SqlResources();
70  
71      /** Holds value of property sqlFileName. */
72      private String sqlFileName;
73  
74      /** Holds value of property sqlParameters. */
75      private Map<String, String> sqlParameters = new HashMap<String, String>();
76  
77      /** Holds value of property lastDatabaseUpdateTime. */
78      private static long lastDatabaseUpdateTime;
79  
80      /**
81       * Getter for property sqlFileName.
82       * 
83       * @return Value of property sqlFileName.
84       */
85      public String getSqlFileName() {
86  
87          return this.sqlFileName;
88      }
89  
90      /**
91       * Setter for property sqlFileName.
92       * 
93       * @param sqlFileName
94       *            New value of property sqlFileName.
95       */
96      public void setSqlFileName(String sqlFileName) {
97  
98          this.sqlFileName = sqlFileName;
99      }
100 
101     /**
102      * Getter for property sqlParameters.
103      * 
104      * @return Value of property sqlParameters.
105      */
106     public Map<String, String> getSqlParameters() {
107 
108         return this.sqlParameters;
109     }
110 
111     /**
112      * Setter for property sqlParameters.
113      * 
114      * @param sqlParameters
115      *            New value of property sqlParameters.
116      */
117     public void setSqlParameters(Map<String, String> sqlParameters) {
118 
119         this.sqlParameters = sqlParameters;
120     }
121 
122     /**
123      * Getter for static lastDatabaseUpdateTime.
124      * 
125      * @return Value of property lastDatabaseUpdateTime.
126      */
127     public static long getLastDatabaseUpdateTime() {
128 
129         return lastDatabaseUpdateTime;
130     }
131 
132     /**
133      * Sets static lastDatabaseUpdateTime to System.currentTimeMillis().
134      */
135     public static void touchLastDatabaseUpdateTime() {
136 
137         lastDatabaseUpdateTime = System.currentTimeMillis();
138     }
139 
140     /**
141      * Default constructor.
142      */
143     public JDBCBayesianAnalyzer() {
144     }
145 
146     /**
147      * Loads the token frequencies from the database.
148      * 
149      * @param conn
150      *            The connection for accessing the database
151      * @throws SQLException
152      *             If a database error occurs
153      */
154     public void loadHamNSpam(Connection conn) throws java.sql.SQLException {
155         PreparedStatement pstmt = null;
156         ResultSet rs = null;
157 
158         try {
159             pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectHamTokens", true));
160             rs = pstmt.executeQuery();
161 
162             Map<String, Integer> ham = getHamTokenCounts();
163             while (rs.next()) {
164                 String token = rs.getString(1);
165                 int count = rs.getInt(2);
166                 // to reduce memory, use the token only if the count is > 1
167                 if (count > 1) {
168                     ham.put(token, Integer.valueOf(count));
169                 }
170             }
171             // Verbose.
172             delegatedLog("Ham tokens count: " + ham.size());
173 
174             rs.close();
175             pstmt.close();
176 
177             // Get the spam tokens/counts.
178             pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectSpamTokens", true));
179             rs = pstmt.executeQuery();
180 
181             Map<String, Integer> spam = getSpamTokenCounts();
182             while (rs.next()) {
183                 String token = rs.getString(1);
184                 int count = rs.getInt(2);
185                 // to reduce memory, use the token only if the count is > 1
186                 if (count > 1) {
187                     spam.put(token, new Integer(count));
188                 }
189             }
190 
191             // Verbose.
192             delegatedLog("Spam tokens count: " + spam.size());
193 
194             rs.close();
195             pstmt.close();
196 
197             // Get the ham/spam message counts.
198             pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectMessageCounts", true));
199             rs = pstmt.executeQuery();
200             if (rs.next()) {
201                 setHamMessageCount(rs.getInt(1));
202                 setSpamMessageCount(rs.getInt(2));
203             }
204 
205             rs.close();
206             pstmt.close();
207 
208         } finally {
209             if (rs != null) {
210                 try {
211                     rs.close();
212                 } catch (java.sql.SQLException se) {
213                 }
214 
215                 rs = null;
216             }
217 
218             if (pstmt != null) {
219                 try {
220                     pstmt.close();
221                 } catch (java.sql.SQLException se) {
222                 }
223 
224                 pstmt = null;
225             }
226         }
227     }
228 
229     /**
230      * Updates the database with new "ham" token frequencies.
231      * 
232      * @param conn
233      *            The connection for accessing the database
234      * @throws SQLException
235      *             If a database error occurs
236      */
237     public void updateHamTokens(Connection conn) throws java.sql.SQLException {
238         updateTokens(conn, getHamTokenCounts(), sqlQueries.getSqlString("insertHamToken", true), sqlQueries.getSqlString("updateHamToken", true));
239 
240         setMessageCount(conn, sqlQueries.getSqlString("updateHamMessageCounts", true), getHamMessageCount());
241     }
242 
243     /**
244      * Updates the database with new "spam" token frequencies.
245      * 
246      * @param conn
247      *            The connection for accessing the database
248      * @throws SQLException
249      *             If a database error occurs
250      */
251     public void updateSpamTokens(Connection conn) throws java.sql.SQLException {
252         updateTokens(conn, getSpamTokenCounts(), sqlQueries.getSqlString("insertSpamToken", true), sqlQueries.getSqlString("updateSpamToken", true));
253 
254         setMessageCount(conn, sqlQueries.getSqlString("updateSpamMessageCounts", true), getSpamMessageCount());
255     }
256 
257     /**
258      * Reset all trained data
259      * 
260      * @param conn
261      *            The connection for accessing the database
262      * @throws SQLException
263      *             If a database error occours
264      */
265     public void resetData(Connection conn) throws SQLException {
266         deleteData(conn, sqlQueries.getSqlString("deleteHamTokens", true));
267         deleteData(conn, sqlQueries.getSqlString("deleteSpamTokens", true));
268         deleteData(conn, sqlQueries.getSqlString("deleteMessageCounts", true));
269     }
270 
271     private void setMessageCount(Connection conn, String sqlStatement, int count) throws java.sql.SQLException {
272         PreparedStatement init = null;
273         PreparedStatement update = null;
274 
275         try {
276             // set the ham/spam message counts.
277             init = conn.prepareStatement(sqlQueries.getSqlString("initializeMessageCounts", true));
278             update = conn.prepareStatement(sqlStatement);
279 
280             update.setInt(1, count);
281 
282             if (update.executeUpdate() == 0) {
283                 init.executeUpdate();
284                 update.executeUpdate();
285             }
286 
287         } finally {
288             if (init != null) {
289                 try {
290                     init.close();
291                 } catch (java.sql.SQLException ignore) {
292                 }
293             }
294             if (update != null) {
295                 try {
296                     update.close();
297                 } catch (java.sql.SQLException ignore) {
298                 }
299             }
300         }
301     }
302 
303     private void updateTokens(Connection conn, Map<String, Integer> tokens, String insertSqlStatement, String updateSqlStatement) throws java.sql.SQLException {
304         PreparedStatement insert = null;
305         PreparedStatement update = null;
306 
307         try {
308             // Used to insert new token entries.
309             insert = conn.prepareStatement(insertSqlStatement);
310 
311             // Used to update existing token entries.
312             update = conn.prepareStatement(updateSqlStatement);
313 
314             for (Map.Entry<String, Integer> entry : tokens.entrySet()) {
315                 update.setInt(1, entry.getValue());
316                 update.setString(2, entry.getKey());
317 
318                 // If the update affected 0 (zero) rows, then the token hasn't
319                 // been
320                 // encountered before, and we need to add it to the corpus.
321                 if (update.executeUpdate() == 0) {
322                     insert.setString(1, entry.getKey());
323                     insert.setInt(2, entry.getValue());
324 
325                     insert.executeUpdate();
326                 }
327             }
328         } finally {
329             if (insert != null) {
330                 try {
331                     insert.close();
332                 } catch (java.sql.SQLException ignore) {
333                 }
334 
335                 insert = null;
336             }
337 
338             if (update != null) {
339                 try {
340                     update.close();
341                 } catch (java.sql.SQLException ignore) {
342                 }
343 
344                 update = null;
345             }
346         }
347     }
348 
349     /**
350      * Initializes the sql query environment from the SqlResources file. Will
351      * look for conf/sqlResources.xml.
352      * 
353      * @param conn
354      *            The connection for accessing the database
355      * @param sqlConfiguration
356      *            The sqlResources configuration document
357      * @throws Exception
358      *             If any error occurs
359      */
360     public void initSqlQueries(Connection conn, Document sqlConfiguration) throws Exception {
361         try {
362             if (conn.getAutoCommit()) {
363                 conn.setAutoCommit(false);
364             }
365 
366             sqlQueries.init(sqlConfiguration, JDBCBayesianAnalyzer.class.getName(), conn, getSqlParameters());
367 
368             checkTables(conn);
369         } finally {
370             theJDBCUtil.closeJDBCConnection(conn);
371         }
372     }
373 
374     private void checkTables(Connection conn) throws SQLException {
375         // DatabaseMetaData dbMetaData = conn.getMetaData();
376         // Need to ask in the case that identifiers are stored, ask the
377         // DatabaseMetaInfo.
378         // Try UPPER, lower, and MixedCase, to see if the table is there.
379 
380         boolean dbUpdated = false;
381 
382         dbUpdated = createTable(conn, "hamTableName", "createHamTable");
383 
384         dbUpdated = createTable(conn, "spamTableName", "createSpamTable");
385 
386         dbUpdated = createTable(conn, "messageCountsTableName", "createMessageCountsTable");
387 
388         // Commit our changes if necessary.
389         if (conn != null && dbUpdated && !conn.getAutoCommit()) {
390             conn.commit();
391             dbUpdated = false;
392         }
393 
394     }
395 
396     private boolean createTable(Connection conn, String tableNameSqlStringName, String createSqlStringName) throws SQLException {
397         String tableName = sqlQueries.getSqlString(tableNameSqlStringName, true);
398 
399         DatabaseMetaData dbMetaData = conn.getMetaData();
400 
401         // Try UPPER, lower, and MixedCase, to see if the table is there.
402         if (theJDBCUtil.tableExists(dbMetaData, tableName)) {
403             return false;
404         }
405 
406         PreparedStatement createStatement = null;
407 
408         try {
409             createStatement = conn.prepareStatement(sqlQueries.getSqlString(createSqlStringName, true));
410             createStatement.execute();
411 
412             StringBuffer logBuffer = null;
413             logBuffer = new StringBuffer(64).append("Created table '").append(tableName).append("' using sqlResources string '").append(createSqlStringName).append("'.");
414             delegatedLog(logBuffer.toString());
415 
416         } finally {
417             theJDBCUtil.closeJDBCStatement(createStatement);
418         }
419 
420         return true;
421     }
422 
423     private void deleteData(Connection conn, String deleteSqlStatement) throws SQLException {
424         PreparedStatement delete = null;
425 
426         try {
427             // Used to delete ham tokens
428             delete = conn.prepareStatement(deleteSqlStatement);
429             delete.executeUpdate();
430         } finally {
431             if (delete != null) {
432                 try {
433                     delete.close();
434                 } catch (java.sql.SQLException ignore) {
435                 }
436 
437                 delete = null;
438             }
439         }
440     }
441 }