001    /****************************************************************
002     * Licensed to the Apache Software Foundation (ASF) under one   *
003     * or more contributor license agreements.  See the NOTICE file *
004     * distributed with this work for additional information        *
005     * regarding copyright ownership.  The ASF licenses this file   *
006     * to you under the Apache License, Version 2.0 (the            *
007     * "License"); you may not use this file except in compliance   *
008     * with the License.  You may obtain a copy of the License at   *
009     *                                                              *
010     *   http://www.apache.org/licenses/LICENSE-2.0                 *
011     *                                                              *
012     * Unless required by applicable law or agreed to in writing,   *
013     * software distributed under the License is distributed on an  *
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
015     * KIND, either express or implied.  See the License for the    *
016     * specific language governing permissions and limitations      *
017     * under the License.                                           *
018     ****************************************************************/
019    
020    package org.apache.james.ai.classic;
021    
022    
023    import java.sql.Connection;
024    import java.sql.DatabaseMetaData;
025    import java.sql.PreparedStatement;
026    import java.sql.ResultSet;
027    import java.sql.SQLException;
028    import java.util.HashMap;
029    import java.util.Map;
030    
031    import org.w3c.dom.Document;
032    
033    /**
034     * Manages the persistence of the spam bayesian analysis corpus using a JDBC
035     * database.
036     * 
037     * <p>
038     * This class is abstract to allow implementations to take advantage of
039     * different logging capabilities/interfaces in different parts of the code.
040     * </p>
041     * 
042     * @since 2.3.0
043     */
044    
045    abstract public class JDBCBayesianAnalyzer extends BayesianAnalyzer {
046    
047        /** Public object representing a lock on database activity. */
048        public final static String DATABASE_LOCK = "database lock";
049    
050        /**
051         * An abstract method which child classes override to handle logging of
052         * errors in their particular environments.
053         * 
054         * @param errorString
055         *            the error message generated
056         */
057        abstract protected void delegatedLog(String errorString);
058    
059        /**
060         * The JDBCUtil helper class
061         */
062        private final JDBCUtil theJDBCUtil = new JDBCUtil() {
063            protected void delegatedLog(String logString) {
064                this.delegatedLog(logString);
065            }
066        };
067    
068        /** Contains all of the sql strings for this component. */
069        private SqlResources sqlQueries = new SqlResources();
070    
071        /** Holds value of property sqlFileName. */
072        private String sqlFileName;
073    
074        /** Holds value of property sqlParameters. */
075        private Map<String, String> sqlParameters = new HashMap<String, String>();
076    
077        /** Holds value of property lastDatabaseUpdateTime. */
078        private static long lastDatabaseUpdateTime;
079    
080        /**
081         * Getter for property sqlFileName.
082         * 
083         * @return Value of property sqlFileName.
084         */
085        public String getSqlFileName() {
086    
087            return this.sqlFileName;
088        }
089    
090        /**
091         * Setter for property sqlFileName.
092         * 
093         * @param sqlFileName
094         *            New value of property sqlFileName.
095         */
096        public void setSqlFileName(String sqlFileName) {
097    
098            this.sqlFileName = sqlFileName;
099        }
100    
101        /**
102         * Getter for property sqlParameters.
103         * 
104         * @return Value of property sqlParameters.
105         */
106        public Map<String, String> getSqlParameters() {
107    
108            return this.sqlParameters;
109        }
110    
111        /**
112         * Setter for property sqlParameters.
113         * 
114         * @param sqlParameters
115         *            New value of property sqlParameters.
116         */
117        public void setSqlParameters(Map<String, String> sqlParameters) {
118    
119            this.sqlParameters = sqlParameters;
120        }
121    
122        /**
123         * Getter for static lastDatabaseUpdateTime.
124         * 
125         * @return Value of property lastDatabaseUpdateTime.
126         */
127        public static long getLastDatabaseUpdateTime() {
128    
129            return lastDatabaseUpdateTime;
130        }
131    
132        /**
133         * Sets static lastDatabaseUpdateTime to System.currentTimeMillis().
134         */
135        public static void touchLastDatabaseUpdateTime() {
136    
137            lastDatabaseUpdateTime = System.currentTimeMillis();
138        }
139    
140        /**
141         * Default constructor.
142         */
143        public JDBCBayesianAnalyzer() {
144        }
145    
146        /**
147         * Loads the token frequencies from the database.
148         * 
149         * @param conn
150         *            The connection for accessing the database
151         * @throws SQLException
152         *             If a database error occurs
153         */
154        public void loadHamNSpam(Connection conn) throws java.sql.SQLException {
155            PreparedStatement pstmt = null;
156            ResultSet rs = null;
157    
158            try {
159                pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectHamTokens", true));
160                rs = pstmt.executeQuery();
161    
162                Map<String, Integer> ham = getHamTokenCounts();
163                while (rs.next()) {
164                    String token = rs.getString(1);
165                    int count = rs.getInt(2);
166                    // to reduce memory, use the token only if the count is > 1
167                    if (count > 1) {
168                        ham.put(token, Integer.valueOf(count));
169                    }
170                }
171                // Verbose.
172                delegatedLog("Ham tokens count: " + ham.size());
173    
174                rs.close();
175                pstmt.close();
176    
177                // Get the spam tokens/counts.
178                pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectSpamTokens", true));
179                rs = pstmt.executeQuery();
180    
181                Map<String, Integer> spam = getSpamTokenCounts();
182                while (rs.next()) {
183                    String token = rs.getString(1);
184                    int count = rs.getInt(2);
185                    // to reduce memory, use the token only if the count is > 1
186                    if (count > 1) {
187                        spam.put(token, new Integer(count));
188                    }
189                }
190    
191                // Verbose.
192                delegatedLog("Spam tokens count: " + spam.size());
193    
194                rs.close();
195                pstmt.close();
196    
197                // Get the ham/spam message counts.
198                pstmt = conn.prepareStatement(sqlQueries.getSqlString("selectMessageCounts", true));
199                rs = pstmt.executeQuery();
200                if (rs.next()) {
201                    setHamMessageCount(rs.getInt(1));
202                    setSpamMessageCount(rs.getInt(2));
203                }
204    
205                rs.close();
206                pstmt.close();
207    
208            } finally {
209                if (rs != null) {
210                    try {
211                        rs.close();
212                    } catch (java.sql.SQLException se) {
213                    }
214    
215                    rs = null;
216                }
217    
218                if (pstmt != null) {
219                    try {
220                        pstmt.close();
221                    } catch (java.sql.SQLException se) {
222                    }
223    
224                    pstmt = null;
225                }
226            }
227        }
228    
229        /**
230         * Updates the database with new "ham" token frequencies.
231         * 
232         * @param conn
233         *            The connection for accessing the database
234         * @throws SQLException
235         *             If a database error occurs
236         */
237        public void updateHamTokens(Connection conn) throws java.sql.SQLException {
238            updateTokens(conn, getHamTokenCounts(), sqlQueries.getSqlString("insertHamToken", true), sqlQueries.getSqlString("updateHamToken", true));
239    
240            setMessageCount(conn, sqlQueries.getSqlString("updateHamMessageCounts", true), getHamMessageCount());
241        }
242    
243        /**
244         * Updates the database with new "spam" token frequencies.
245         * 
246         * @param conn
247         *            The connection for accessing the database
248         * @throws SQLException
249         *             If a database error occurs
250         */
251        public void updateSpamTokens(Connection conn) throws java.sql.SQLException {
252            updateTokens(conn, getSpamTokenCounts(), sqlQueries.getSqlString("insertSpamToken", true), sqlQueries.getSqlString("updateSpamToken", true));
253    
254            setMessageCount(conn, sqlQueries.getSqlString("updateSpamMessageCounts", true), getSpamMessageCount());
255        }
256    
257        /**
258         * Reset all trained data
259         * 
260         * @param conn
261         *            The connection for accessing the database
262         * @throws SQLException
263         *             If a database error occours
264         */
265        public void resetData(Connection conn) throws SQLException {
266            deleteData(conn, sqlQueries.getSqlString("deleteHamTokens", true));
267            deleteData(conn, sqlQueries.getSqlString("deleteSpamTokens", true));
268            deleteData(conn, sqlQueries.getSqlString("deleteMessageCounts", true));
269        }
270    
271        private void setMessageCount(Connection conn, String sqlStatement, int count) throws java.sql.SQLException {
272            PreparedStatement init = null;
273            PreparedStatement update = null;
274    
275            try {
276                // set the ham/spam message counts.
277                init = conn.prepareStatement(sqlQueries.getSqlString("initializeMessageCounts", true));
278                update = conn.prepareStatement(sqlStatement);
279    
280                update.setInt(1, count);
281    
282                if (update.executeUpdate() == 0) {
283                    init.executeUpdate();
284                    update.executeUpdate();
285                }
286    
287            } finally {
288                if (init != null) {
289                    try {
290                        init.close();
291                    } catch (java.sql.SQLException ignore) {
292                    }
293                }
294                if (update != null) {
295                    try {
296                        update.close();
297                    } catch (java.sql.SQLException ignore) {
298                    }
299                }
300            }
301        }
302    
303        private void updateTokens(Connection conn, Map<String, Integer> tokens, String insertSqlStatement, String updateSqlStatement) throws java.sql.SQLException {
304            PreparedStatement insert = null;
305            PreparedStatement update = null;
306    
307            try {
308                // Used to insert new token entries.
309                insert = conn.prepareStatement(insertSqlStatement);
310    
311                // Used to update existing token entries.
312                update = conn.prepareStatement(updateSqlStatement);
313    
314                for (Map.Entry<String, Integer> entry : tokens.entrySet()) {
315                    update.setInt(1, entry.getValue());
316                    update.setString(2, entry.getKey());
317    
318                    // If the update affected 0 (zero) rows, then the token hasn't
319                    // been
320                    // encountered before, and we need to add it to the corpus.
321                    if (update.executeUpdate() == 0) {
322                        insert.setString(1, entry.getKey());
323                        insert.setInt(2, entry.getValue());
324    
325                        insert.executeUpdate();
326                    }
327                }
328            } finally {
329                if (insert != null) {
330                    try {
331                        insert.close();
332                    } catch (java.sql.SQLException ignore) {
333                    }
334    
335                    insert = null;
336                }
337    
338                if (update != null) {
339                    try {
340                        update.close();
341                    } catch (java.sql.SQLException ignore) {
342                    }
343    
344                    update = null;
345                }
346            }
347        }
348    
349        /**
350         * Initializes the sql query environment from the SqlResources file. Will
351         * look for conf/sqlResources.xml.
352         * 
353         * @param conn
354         *            The connection for accessing the database
355         * @param sqlConfiguration
356         *            The sqlResources configuration document
357         * @throws Exception
358         *             If any error occurs
359         */
360        public void initSqlQueries(Connection conn, Document sqlConfiguration) throws Exception {
361            try {
362                if (conn.getAutoCommit()) {
363                    conn.setAutoCommit(false);
364                }
365    
366                sqlQueries.init(sqlConfiguration, JDBCBayesianAnalyzer.class.getName(), conn, getSqlParameters());
367    
368                checkTables(conn);
369            } finally {
370                theJDBCUtil.closeJDBCConnection(conn);
371            }
372        }
373    
374        private void checkTables(Connection conn) throws SQLException {
375            // DatabaseMetaData dbMetaData = conn.getMetaData();
376            // Need to ask in the case that identifiers are stored, ask the
377            // DatabaseMetaInfo.
378            // Try UPPER, lower, and MixedCase, to see if the table is there.
379    
380            boolean dbUpdated = false;
381    
382            dbUpdated = createTable(conn, "hamTableName", "createHamTable");
383    
384            dbUpdated = createTable(conn, "spamTableName", "createSpamTable");
385    
386            dbUpdated = createTable(conn, "messageCountsTableName", "createMessageCountsTable");
387    
388            // Commit our changes if necessary.
389            if (conn != null && dbUpdated && !conn.getAutoCommit()) {
390                conn.commit();
391                dbUpdated = false;
392            }
393    
394        }
395    
396        private boolean createTable(Connection conn, String tableNameSqlStringName, String createSqlStringName) throws SQLException {
397            String tableName = sqlQueries.getSqlString(tableNameSqlStringName, true);
398    
399            DatabaseMetaData dbMetaData = conn.getMetaData();
400    
401            // Try UPPER, lower, and MixedCase, to see if the table is there.
402            if (theJDBCUtil.tableExists(dbMetaData, tableName)) {
403                return false;
404            }
405    
406            PreparedStatement createStatement = null;
407    
408            try {
409                createStatement = conn.prepareStatement(sqlQueries.getSqlString(createSqlStringName, true));
410                createStatement.execute();
411    
412                StringBuffer logBuffer = null;
413                logBuffer = new StringBuffer(64).append("Created table '").append(tableName).append("' using sqlResources string '").append(createSqlStringName).append("'.");
414                delegatedLog(logBuffer.toString());
415    
416            } finally {
417                theJDBCUtil.closeJDBCStatement(createStatement);
418            }
419    
420            return true;
421        }
422    
423        private void deleteData(Connection conn, String deleteSqlStatement) throws SQLException {
424            PreparedStatement delete = null;
425    
426            try {
427                // Used to delete ham tokens
428                delete = conn.prepareStatement(deleteSqlStatement);
429                delete.executeUpdate();
430            } finally {
431                if (delete != null) {
432                    try {
433                        delete.close();
434                    } catch (java.sql.SQLException ignore) {
435                    }
436    
437                    delete = null;
438                }
439            }
440        }
441    }