001 /**************************************************************** 002 * Licensed to the Apache Software Foundation (ASF) under one * 003 * or more contributor license agreements. See the NOTICE file * 004 * distributed with this work for additional information * 005 * regarding copyright ownership. The ASF licenses this file * 006 * to you under the Apache License, Version 2.0 (the * 007 * "License"); you may not use this file except in compliance * 008 * with the License. You may obtain a copy of the License at * 009 * * 010 * http://www.apache.org/licenses/LICENSE-2.0 * 011 * * 012 * Unless required by applicable law or agreed to in writing, * 013 * software distributed under the License is distributed on an * 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 015 * KIND, either express or implied. See the License for the * 016 * specific language governing permissions and limitations * 017 * under the License. * 018 ****************************************************************/ 019 020 package org.apache.james.ai.classic; 021 022 023 import java.sql.Connection; 024 import java.sql.DatabaseMetaData; 025 import java.sql.PreparedStatement; 026 import java.sql.ResultSet; 027 import java.sql.SQLException; 028 import java.util.HashMap; 029 import java.util.Map; 030 031 import org.w3c.dom.Document; 032 033 /** 034 * Manages the persistence of the spam bayesian analysis corpus using a JDBC 035 * database. 036 * 037 * <p> 038 * This class is abstract to allow implementations to take advantage of 039 * different logging capabilities/interfaces in different parts of the code. 040 * </p> 041 * 042 * @since 2.3.0 043 */ 044 045 abstract public class JDBCBayesianAnalyzer extends BayesianAnalyzer { 046 047 /** Public object representing a lock on database activity. */ 048 public final static String DATABASE_LOCK = "database lock"; 049 050 /** 051 * An abstract method which child classes override to handle logging of 052 * errors in their particular environments. 053 * 054 * @param errorString 055 * the error message generated 056 */ 057 abstract protected void delegatedLog(String errorString); 058 059 /** 060 * The JDBCUtil helper class 061 */ 062 private final JDBCUtil theJDBCUtil = new JDBCUtil() { 063 protected void delegatedLog(String logString) { 064 this.delegatedLog(logString); 065 } 066 }; 067 068 /** Contains all of the sql strings for this component. */ 069 private SqlResources sqlQueries = new SqlResources(); 070 071 /** Holds value of property sqlFileName. */ 072 private String sqlFileName; 073 074 /** Holds value of property sqlParameters. */ 075 private Map<String, String> sqlParameters = new HashMap<String, String>(); 076 077 /** Holds value of property lastDatabaseUpdateTime. */ 078 private static long lastDatabaseUpdateTime; 079 080 /** 081 * Getter for property sqlFileName. 082 * 083 * @return Value of property sqlFileName. 084 */ 085 public String getSqlFileName() { 086 087 return this.sqlFileName; 088 } 089 090 /** 091 * Setter for property sqlFileName. 092 * 093 * @param sqlFileName 094 * New value of property sqlFileName. 095 */ 096 public void setSqlFileName(String sqlFileName) { 097 098 this.sqlFileName = sqlFileName; 099 } 100 101 /** 102 * Getter for property sqlParameters. 103 * 104 * @return Value of property sqlParameters. 105 */ 106 public Map<String, String> getSqlParameters() { 107 108 return this.sqlParameters; 109 } 110 111 /** 112 * Setter for property sqlParameters. 113 * 114 * @param sqlParameters 115 * New value of property sqlParameters. 116 */ 117 public void setSqlParameters(Map<String, String> sqlParameters) { 118 119 this.sqlParameters = sqlParameters; 120 } 121 122 /** 123 * Getter for static lastDatabaseUpdateTime. 124 * 125 * @return Value of property lastDatabaseUpdateTime. 126 */ 127 public static long getLastDatabaseUpdateTime() { 128 129 return lastDatabaseUpdateTime; 130 } 131 132 /** 133 * Sets static lastDatabaseUpdateTime to System.currentTimeMillis(). 134 */ 135 public static void touchLastDatabaseUpdateTime() { 136 137 lastDatabaseUpdateTime = System.currentTimeMillis(); 138 } 139 140 /** 141 * Default constructor. 142 */ 143 public JDBCBayesianAnalyzer() { 144 } 145 146 /** 147 * Loads the token frequencies from the database. 148 * 149 * @param conn 150 * The connection for accessing the database 151 * @throws SQLException 152 * If a database error occurs 153 */ 154 public void loadHamNSpam(Connection conn) throws java.sql.SQLException { 155 PreparedStatement pstmt = null; 156 ResultSet rs = null; 157 158 try { 159 pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectHamTokens", true)); 160 rs = pstmt.executeQuery(); 161 162 Map<String, Integer> ham = getHamTokenCounts(); 163 while (rs.next()) { 164 String token = rs.getString(1); 165 int count = rs.getInt(2); 166 // to reduce memory, use the token only if the count is > 1 167 if (count > 1) { 168 ham.put(token, Integer.valueOf(count)); 169 } 170 } 171 // Verbose. 172 delegatedLog("Ham tokens count: " + ham.size()); 173 174 rs.close(); 175 pstmt.close(); 176 177 // Get the spam tokens/counts. 178 pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectSpamTokens", true)); 179 rs = pstmt.executeQuery(); 180 181 Map<String, Integer> spam = getSpamTokenCounts(); 182 while (rs.next()) { 183 String token = rs.getString(1); 184 int count = rs.getInt(2); 185 // to reduce memory, use the token only if the count is > 1 186 if (count > 1) { 187 spam.put(token, new Integer(count)); 188 } 189 } 190 191 // Verbose. 192 delegatedLog("Spam tokens count: " + spam.size()); 193 194 rs.close(); 195 pstmt.close(); 196 197 // Get the ham/spam message counts. 198 pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectMessageCounts", true)); 199 rs = pstmt.executeQuery(); 200 if (rs.next()) { 201 setHamMessageCount(rs.getInt(1)); 202 setSpamMessageCount(rs.getInt(2)); 203 } 204 205 rs.close(); 206 pstmt.close(); 207 208 } finally { 209 if (rs != null) { 210 try { 211 rs.close(); 212 } catch (java.sql.SQLException se) { 213 } 214 215 rs = null; 216 } 217 218 if (pstmt != null) { 219 try { 220 pstmt.close(); 221 } catch (java.sql.SQLException se) { 222 } 223 224 pstmt = null; 225 } 226 } 227 } 228 229 /** 230 * Updates the database with new "ham" token frequencies. 231 * 232 * @param conn 233 * The connection for accessing the database 234 * @throws SQLException 235 * If a database error occurs 236 */ 237 public void updateHamTokens(Connection conn) throws java.sql.SQLException { 238 updateTokens(conn, getHamTokenCounts(), sqlQueries.getSqlString("insertHamToken", true), sqlQueries.getSqlString("updateHamToken", true)); 239 240 setMessageCount(conn, sqlQueries.getSqlString("updateHamMessageCounts", true), getHamMessageCount()); 241 } 242 243 /** 244 * Updates the database with new "spam" token frequencies. 245 * 246 * @param conn 247 * The connection for accessing the database 248 * @throws SQLException 249 * If a database error occurs 250 */ 251 public void updateSpamTokens(Connection conn) throws java.sql.SQLException { 252 updateTokens(conn, getSpamTokenCounts(), sqlQueries.getSqlString("insertSpamToken", true), sqlQueries.getSqlString("updateSpamToken", true)); 253 254 setMessageCount(conn, sqlQueries.getSqlString("updateSpamMessageCounts", true), getSpamMessageCount()); 255 } 256 257 /** 258 * Reset all trained data 259 * 260 * @param conn 261 * The connection for accessing the database 262 * @throws SQLException 263 * If a database error occours 264 */ 265 public void resetData(Connection conn) throws SQLException { 266 deleteData(conn, sqlQueries.getSqlString("deleteHamTokens", true)); 267 deleteData(conn, sqlQueries.getSqlString("deleteSpamTokens", true)); 268 deleteData(conn, sqlQueries.getSqlString("deleteMessageCounts", true)); 269 } 270 271 private void setMessageCount(Connection conn, String sqlStatement, int count) throws java.sql.SQLException { 272 PreparedStatement init = null; 273 PreparedStatement update = null; 274 275 try { 276 // set the ham/spam message counts. 277 init = conn.prepareStatement(sqlQueries.getSqlString("initializeMessageCounts", true)); 278 update = conn.prepareStatement(sqlStatement); 279 280 update.setInt(1, count); 281 282 if (update.executeUpdate() == 0) { 283 init.executeUpdate(); 284 update.executeUpdate(); 285 } 286 287 } finally { 288 if (init != null) { 289 try { 290 init.close(); 291 } catch (java.sql.SQLException ignore) { 292 } 293 } 294 if (update != null) { 295 try { 296 update.close(); 297 } catch (java.sql.SQLException ignore) { 298 } 299 } 300 } 301 } 302 303 private void updateTokens(Connection conn, Map<String, Integer> tokens, String insertSqlStatement, String updateSqlStatement) throws java.sql.SQLException { 304 PreparedStatement insert = null; 305 PreparedStatement update = null; 306 307 try { 308 // Used to insert new token entries. 309 insert = conn.prepareStatement(insertSqlStatement); 310 311 // Used to update existing token entries. 312 update = conn.prepareStatement(updateSqlStatement); 313 314 for (Map.Entry<String, Integer> entry : tokens.entrySet()) { 315 update.setInt(1, entry.getValue()); 316 update.setString(2, entry.getKey()); 317 318 // If the update affected 0 (zero) rows, then the token hasn't 319 // been 320 // encountered before, and we need to add it to the corpus. 321 if (update.executeUpdate() == 0) { 322 insert.setString(1, entry.getKey()); 323 insert.setInt(2, entry.getValue()); 324 325 insert.executeUpdate(); 326 } 327 } 328 } finally { 329 if (insert != null) { 330 try { 331 insert.close(); 332 } catch (java.sql.SQLException ignore) { 333 } 334 335 insert = null; 336 } 337 338 if (update != null) { 339 try { 340 update.close(); 341 } catch (java.sql.SQLException ignore) { 342 } 343 344 update = null; 345 } 346 } 347 } 348 349 /** 350 * Initializes the sql query environment from the SqlResources file. Will 351 * look for conf/sqlResources.xml. 352 * 353 * @param conn 354 * The connection for accessing the database 355 * @param sqlConfiguration 356 * The sqlResources configuration document 357 * @throws Exception 358 * If any error occurs 359 */ 360 public void initSqlQueries(Connection conn, Document sqlConfiguration) throws Exception { 361 try { 362 if (conn.getAutoCommit()) { 363 conn.setAutoCommit(false); 364 } 365 366 sqlQueries.init(sqlConfiguration, JDBCBayesianAnalyzer.class.getName(), conn, getSqlParameters()); 367 368 checkTables(conn); 369 } finally { 370 theJDBCUtil.closeJDBCConnection(conn); 371 } 372 } 373 374 private void checkTables(Connection conn) throws SQLException { 375 // DatabaseMetaData dbMetaData = conn.getMetaData(); 376 // Need to ask in the case that identifiers are stored, ask the 377 // DatabaseMetaInfo. 378 // Try UPPER, lower, and MixedCase, to see if the table is there. 379 380 boolean dbUpdated = false; 381 382 dbUpdated = createTable(conn, "hamTableName", "createHamTable"); 383 384 dbUpdated = createTable(conn, "spamTableName", "createSpamTable"); 385 386 dbUpdated = createTable(conn, "messageCountsTableName", "createMessageCountsTable"); 387 388 // Commit our changes if necessary. 389 if (conn != null && dbUpdated && !conn.getAutoCommit()) { 390 conn.commit(); 391 dbUpdated = false; 392 } 393 394 } 395 396 private boolean createTable(Connection conn, String tableNameSqlStringName, String createSqlStringName) throws SQLException { 397 String tableName = sqlQueries.getSqlString(tableNameSqlStringName, true); 398 399 DatabaseMetaData dbMetaData = conn.getMetaData(); 400 401 // Try UPPER, lower, and MixedCase, to see if the table is there. 402 if (theJDBCUtil.tableExists(dbMetaData, tableName)) { 403 return false; 404 } 405 406 PreparedStatement createStatement = null; 407 408 try { 409 createStatement = conn.prepareStatement(sqlQueries.getSqlString(createSqlStringName, true)); 410 createStatement.execute(); 411 412 StringBuffer logBuffer = null; 413 logBuffer = new StringBuffer(64).append("Created table '").append(tableName).append("' using sqlResources string '").append(createSqlStringName).append("'."); 414 delegatedLog(logBuffer.toString()); 415 416 } finally { 417 theJDBCUtil.closeJDBCStatement(createStatement); 418 } 419 420 return true; 421 } 422 423 private void deleteData(Connection conn, String deleteSqlStatement) throws SQLException { 424 PreparedStatement delete = null; 425 426 try { 427 // Used to delete ham tokens 428 delete = conn.prepareStatement(deleteSqlStatement); 429 delete.executeUpdate(); 430 } finally { 431 if (delete != null) { 432 try { 433 delete.close(); 434 } catch (java.sql.SQLException ignore) { 435 } 436 437 delete = null; 438 } 439 } 440 } 441 }