001 /****************************************************************
002 * Licensed to the Apache Software Foundation (ASF) under one *
003 * or more contributor license agreements. See the NOTICE file *
004 * distributed with this work for additional information *
005 * regarding copyright ownership. The ASF licenses this file *
006 * to you under the Apache License, Version 2.0 (the *
007 * "License"); you may not use this file except in compliance *
008 * with the License. You may obtain a copy of the License at *
009 * *
010 * http://www.apache.org/licenses/LICENSE-2.0 *
011 * *
012 * Unless required by applicable law or agreed to in writing, *
013 * software distributed under the License is distributed on an *
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
015 * KIND, either express or implied. See the License for the *
016 * specific language governing permissions and limitations *
017 * under the License. *
018 ****************************************************************/
019
020 package org.apache.james.ai.classic;
021
022
023 import java.sql.Connection;
024 import java.sql.DatabaseMetaData;
025 import java.sql.PreparedStatement;
026 import java.sql.ResultSet;
027 import java.sql.SQLException;
028 import java.util.HashMap;
029 import java.util.Map;
030
031 import org.w3c.dom.Document;
032
033 /**
034 * Manages the persistence of the spam bayesian analysis corpus using a JDBC
035 * database.
036 *
037 * <p>
038 * This class is abstract to allow implementations to take advantage of
039 * different logging capabilities/interfaces in different parts of the code.
040 * </p>
041 *
042 * @since 2.3.0
043 */
044
045 abstract public class JDBCBayesianAnalyzer extends BayesianAnalyzer {
046
047 /** Public object representing a lock on database activity. */
048 public final static String DATABASE_LOCK = "database lock";
049
050 /**
051 * An abstract method which child classes override to handle logging of
052 * errors in their particular environments.
053 *
054 * @param errorString
055 * the error message generated
056 */
057 abstract protected void delegatedLog(String errorString);
058
059 /**
060 * The JDBCUtil helper class
061 */
062 private final JDBCUtil theJDBCUtil = new JDBCUtil() {
063 protected void delegatedLog(String logString) {
064 this.delegatedLog(logString);
065 }
066 };
067
068 /** Contains all of the sql strings for this component. */
069 private SqlResources sqlQueries = new SqlResources();
070
071 /** Holds value of property sqlFileName. */
072 private String sqlFileName;
073
074 /** Holds value of property sqlParameters. */
075 private Map<String, String> sqlParameters = new HashMap<String, String>();
076
077 /** Holds value of property lastDatabaseUpdateTime. */
078 private static long lastDatabaseUpdateTime;
079
080 /**
081 * Getter for property sqlFileName.
082 *
083 * @return Value of property sqlFileName.
084 */
085 public String getSqlFileName() {
086
087 return this.sqlFileName;
088 }
089
090 /**
091 * Setter for property sqlFileName.
092 *
093 * @param sqlFileName
094 * New value of property sqlFileName.
095 */
096 public void setSqlFileName(String sqlFileName) {
097
098 this.sqlFileName = sqlFileName;
099 }
100
101 /**
102 * Getter for property sqlParameters.
103 *
104 * @return Value of property sqlParameters.
105 */
106 public Map<String, String> getSqlParameters() {
107
108 return this.sqlParameters;
109 }
110
111 /**
112 * Setter for property sqlParameters.
113 *
114 * @param sqlParameters
115 * New value of property sqlParameters.
116 */
117 public void setSqlParameters(Map<String, String> sqlParameters) {
118
119 this.sqlParameters = sqlParameters;
120 }
121
122 /**
123 * Getter for static lastDatabaseUpdateTime.
124 *
125 * @return Value of property lastDatabaseUpdateTime.
126 */
127 public static long getLastDatabaseUpdateTime() {
128
129 return lastDatabaseUpdateTime;
130 }
131
132 /**
133 * Sets static lastDatabaseUpdateTime to System.currentTimeMillis().
134 */
135 public static void touchLastDatabaseUpdateTime() {
136
137 lastDatabaseUpdateTime = System.currentTimeMillis();
138 }
139
140 /**
141 * Default constructor.
142 */
143 public JDBCBayesianAnalyzer() {
144 }
145
146 /**
147 * Loads the token frequencies from the database.
148 *
149 * @param conn
150 * The connection for accessing the database
151 * @throws SQLException
152 * If a database error occurs
153 */
154 public void loadHamNSpam(Connection conn) throws java.sql.SQLException {
155 PreparedStatement pstmt = null;
156 ResultSet rs = null;
157
158 try {
159 pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectHamTokens", true));
160 rs = pstmt.executeQuery();
161
162 Map<String, Integer> ham = getHamTokenCounts();
163 while (rs.next()) {
164 String token = rs.getString(1);
165 int count = rs.getInt(2);
166 // to reduce memory, use the token only if the count is > 1
167 if (count > 1) {
168 ham.put(token, Integer.valueOf(count));
169 }
170 }
171 // Verbose.
172 delegatedLog("Ham tokens count: " + ham.size());
173
174 rs.close();
175 pstmt.close();
176
177 // Get the spam tokens/counts.
178 pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectSpamTokens", true));
179 rs = pstmt.executeQuery();
180
181 Map<String, Integer> spam = getSpamTokenCounts();
182 while (rs.next()) {
183 String token = rs.getString(1);
184 int count = rs.getInt(2);
185 // to reduce memory, use the token only if the count is > 1
186 if (count > 1) {
187 spam.put(token, new Integer(count));
188 }
189 }
190
191 // Verbose.
192 delegatedLog("Spam tokens count: " + spam.size());
193
194 rs.close();
195 pstmt.close();
196
197 // Get the ham/spam message counts.
198 pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectMessageCounts", true));
199 rs = pstmt.executeQuery();
200 if (rs.next()) {
201 setHamMessageCount(rs.getInt(1));
202 setSpamMessageCount(rs.getInt(2));
203 }
204
205 rs.close();
206 pstmt.close();
207
208 } finally {
209 if (rs != null) {
210 try {
211 rs.close();
212 } catch (java.sql.SQLException se) {
213 }
214
215 rs = null;
216 }
217
218 if (pstmt != null) {
219 try {
220 pstmt.close();
221 } catch (java.sql.SQLException se) {
222 }
223
224 pstmt = null;
225 }
226 }
227 }
228
229 /**
230 * Updates the database with new "ham" token frequencies.
231 *
232 * @param conn
233 * The connection for accessing the database
234 * @throws SQLException
235 * If a database error occurs
236 */
237 public void updateHamTokens(Connection conn) throws java.sql.SQLException {
238 updateTokens(conn, getHamTokenCounts(), sqlQueries.getSqlString("insertHamToken", true), sqlQueries.getSqlString("updateHamToken", true));
239
240 setMessageCount(conn, sqlQueries.getSqlString("updateHamMessageCounts", true), getHamMessageCount());
241 }
242
243 /**
244 * Updates the database with new "spam" token frequencies.
245 *
246 * @param conn
247 * The connection for accessing the database
248 * @throws SQLException
249 * If a database error occurs
250 */
251 public void updateSpamTokens(Connection conn) throws java.sql.SQLException {
252 updateTokens(conn, getSpamTokenCounts(), sqlQueries.getSqlString("insertSpamToken", true), sqlQueries.getSqlString("updateSpamToken", true));
253
254 setMessageCount(conn, sqlQueries.getSqlString("updateSpamMessageCounts", true), getSpamMessageCount());
255 }
256
257 /**
258 * Reset all trained data
259 *
260 * @param conn
261 * The connection for accessing the database
262 * @throws SQLException
263 * If a database error occours
264 */
265 public void resetData(Connection conn) throws SQLException {
266 deleteData(conn, sqlQueries.getSqlString("deleteHamTokens", true));
267 deleteData(conn, sqlQueries.getSqlString("deleteSpamTokens", true));
268 deleteData(conn, sqlQueries.getSqlString("deleteMessageCounts", true));
269 }
270
271 private void setMessageCount(Connection conn, String sqlStatement, int count) throws java.sql.SQLException {
272 PreparedStatement init = null;
273 PreparedStatement update = null;
274
275 try {
276 // set the ham/spam message counts.
277 init = conn.prepareStatement(sqlQueries.getSqlString("initializeMessageCounts", true));
278 update = conn.prepareStatement(sqlStatement);
279
280 update.setInt(1, count);
281
282 if (update.executeUpdate() == 0) {
283 init.executeUpdate();
284 update.executeUpdate();
285 }
286
287 } finally {
288 if (init != null) {
289 try {
290 init.close();
291 } catch (java.sql.SQLException ignore) {
292 }
293 }
294 if (update != null) {
295 try {
296 update.close();
297 } catch (java.sql.SQLException ignore) {
298 }
299 }
300 }
301 }
302
303 private void updateTokens(Connection conn, Map<String, Integer> tokens, String insertSqlStatement, String updateSqlStatement) throws java.sql.SQLException {
304 PreparedStatement insert = null;
305 PreparedStatement update = null;
306
307 try {
308 // Used to insert new token entries.
309 insert = conn.prepareStatement(insertSqlStatement);
310
311 // Used to update existing token entries.
312 update = conn.prepareStatement(updateSqlStatement);
313
314 for (Map.Entry<String, Integer> entry : tokens.entrySet()) {
315 update.setInt(1, entry.getValue());
316 update.setString(2, entry.getKey());
317
318 // If the update affected 0 (zero) rows, then the token hasn't
319 // been
320 // encountered before, and we need to add it to the corpus.
321 if (update.executeUpdate() == 0) {
322 insert.setString(1, entry.getKey());
323 insert.setInt(2, entry.getValue());
324
325 insert.executeUpdate();
326 }
327 }
328 } finally {
329 if (insert != null) {
330 try {
331 insert.close();
332 } catch (java.sql.SQLException ignore) {
333 }
334
335 insert = null;
336 }
337
338 if (update != null) {
339 try {
340 update.close();
341 } catch (java.sql.SQLException ignore) {
342 }
343
344 update = null;
345 }
346 }
347 }
348
349 /**
350 * Initializes the sql query environment from the SqlResources file. Will
351 * look for conf/sqlResources.xml.
352 *
353 * @param conn
354 * The connection for accessing the database
355 * @param sqlConfiguration
356 * The sqlResources configuration document
357 * @throws Exception
358 * If any error occurs
359 */
360 public void initSqlQueries(Connection conn, Document sqlConfiguration) throws Exception {
361 try {
362 if (conn.getAutoCommit()) {
363 conn.setAutoCommit(false);
364 }
365
366 sqlQueries.init(sqlConfiguration, JDBCBayesianAnalyzer.class.getName(), conn, getSqlParameters());
367
368 checkTables(conn);
369 } finally {
370 theJDBCUtil.closeJDBCConnection(conn);
371 }
372 }
373
374 private void checkTables(Connection conn) throws SQLException {
375 // DatabaseMetaData dbMetaData = conn.getMetaData();
376 // Need to ask in the case that identifiers are stored, ask the
377 // DatabaseMetaInfo.
378 // Try UPPER, lower, and MixedCase, to see if the table is there.
379
380 boolean dbUpdated = false;
381
382 dbUpdated = createTable(conn, "hamTableName", "createHamTable");
383
384 dbUpdated = createTable(conn, "spamTableName", "createSpamTable");
385
386 dbUpdated = createTable(conn, "messageCountsTableName", "createMessageCountsTable");
387
388 // Commit our changes if necessary.
389 if (conn != null && dbUpdated && !conn.getAutoCommit()) {
390 conn.commit();
391 dbUpdated = false;
392 }
393
394 }
395
396 private boolean createTable(Connection conn, String tableNameSqlStringName, String createSqlStringName) throws SQLException {
397 String tableName = sqlQueries.getSqlString(tableNameSqlStringName, true);
398
399 DatabaseMetaData dbMetaData = conn.getMetaData();
400
401 // Try UPPER, lower, and MixedCase, to see if the table is there.
402 if (theJDBCUtil.tableExists(dbMetaData, tableName)) {
403 return false;
404 }
405
406 PreparedStatement createStatement = null;
407
408 try {
409 createStatement = conn.prepareStatement(sqlQueries.getSqlString(createSqlStringName, true));
410 createStatement.execute();
411
412 StringBuffer logBuffer = null;
413 logBuffer = new StringBuffer(64).append("Created table '").append(tableName).append("' using sqlResources string '").append(createSqlStringName).append("'.");
414 delegatedLog(logBuffer.toString());
415
416 } finally {
417 theJDBCUtil.closeJDBCStatement(createStatement);
418 }
419
420 return true;
421 }
422
423 private void deleteData(Connection conn, String deleteSqlStatement) throws SQLException {
424 PreparedStatement delete = null;
425
426 try {
427 // Used to delete ham tokens
428 delete = conn.prepareStatement(deleteSqlStatement);
429 delete.executeUpdate();
430 } finally {
431 if (delete != null) {
432 try {
433 delete.close();
434 } catch (java.sql.SQLException ignore) {
435 }
436
437 delete = null;
438 }
439 }
440 }
441 }