package com.laozizhu.article.util;
import java.io.IOException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import javax.sql.DataSource;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
/**
* 基于庖丁解牛的Lucene 2.4的全文搜索代碼。
*
* @author 老紫竹研究室(laozizhu.com)
*/
public class LucenePaoDing {
private static final String indexPath = "d:/indexpaoding/www.laozizhu.com";
/**
?? * @param args
?? * @throws Exception
?? */
public static void main(String[] args) throws Exception {
??? rebuildAll();
??? String keyword = "Spring.jar";
??? LucenePaoDing l = new LucenePaoDing();
??? System.out.println("索引搜索\n------------------------------");
??? System.out.println(l.seacherIndex(keyword));
}
public static void rebuildAll() {
??? synchronized (indexPath) {
????? LucenePaoDing l = new LucenePaoDing();
????? DataSource ds = (DataSource) Factory.getBean("dataSource");
????? Connection con = null;
????? Statement stat = null;
????? ResultSet rs = null;
????? try {
??????? con = ds.getConnection();
??????? stat = con.createStatement();
??????? rs = stat.executeQuery("select id,subject,content from t_article");
??????? if (rs != null) {
????????? l.Index(rs);
??????? }
????? } catch (Exception ex) {
??????? ex.printStackTrace();
????? } finally {
??????? if (rs != null) {
????????? try {
??????????? rs.close();
????????? } catch (Exception ex) {}
??????? }
??????? if (stat != null) {
????????? try {
??????????? stat.close();
????????? } catch (Exception ex) {}
??????? }
??????? if (con != null) {
????????? try {
??????????? con.close();
????????? } catch (Exception ex) {}
??????? }
????? }
??? }
}
public synchronized Analyzer getAnalyzer() {
??? return new PaodingAnalyzer();
}
private synchronized void Index(ResultSet rs) {// 通過結(jié)果集就可以獲得數(shù)據(jù)源了
??? try {
????? IndexWriter writer = new IndexWriter(indexPath, getAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
????? writer.setMaxFieldLength(10000000);
????? Date start = new Date();
????? int index = 1;
????? while (rs.next()) {
??????? Document doc = new Document();// 一個文檔相當(dāng)與表的一條記錄
??????? doc.add(new Field("id", rs.getString("id"), Field.Store.YES, Field.Index.NOT_ANALYZED));// 字段id放的是數(shù)據(jù)庫表中的id,lucene的一條記錄的一個字段下的數(shù)據(jù)可以放多個值,這點與數(shù)據(jù)庫表不同
??????? doc.add(new Field("subject", rs.getString("subject"), Field.Store.YES, Field.Index.ANALYZED));
??????? doc.add(new Field("content", rs.getString("content"), Field.Store.YES, Field.Index.ANALYZED));
??????? writer.addDocument(doc);
??????? if (index++ == 1000) {
????????? writer.commit();
????????? index = 0;
??????? }
????? }
????? writer.commit();
????? writer.optimize();// 優(yōu)化
????? writer.close();// 一定要關(guān)閉,否則不能把內(nèi)存中的數(shù)據(jù)寫到文件
????? Date end = new Date();
????? System.out.println("重建索引成功?。。。? + "用時" + (end.getTime() - start.getTime()) + "毫秒");
??? } catch (IOException e) {
????? System.out.println(e);
??? } catch (SQLException e) {
????? System.out.println(e);
??? }
}
public void IndexSingle(long id, String subject, String content) {// 通過結(jié)果集就可以獲得數(shù)據(jù)源了
??? synchronized (indexPath) {
????? try {
??????? IndexWriter writer = new IndexWriter(indexPath, getAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);
??????? writer.setMaxFieldLength(10000000);
??????? Date start = new Date();
??????? Document doc = new Document();// 一個文檔相當(dāng)與表的一條記錄
??????? doc.add(new Field("id", Long.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED));// 字段id放的是數(shù)據(jù)庫表中的id,lucene的一條記錄的一個字段下的數(shù)據(jù)可以放多個值,這點與數(shù)據(jù)庫表不同
??????? doc.add(new Field("subject", subject, Field.Store.YES, Field.Index.ANALYZED));
??????? doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
??????? writer.addDocument(doc);
??????? // writer.optimize();// 優(yōu)化
??????? writer.close();// 一定要關(guān)閉,否則不能把內(nèi)存中的數(shù)據(jù)寫到文件
??????? Date end = new Date();
??????? System.out.println("索引建立成功?。。?!" + "用時" + (end.getTime() - start.getTime()) + "毫秒");
????? } catch (IOException e) {
??????? System.out.println(e);
????? }
??? }
}
/**
?? * 最主要的搜索方法。
?? *
?? * @param queryString
?? * @return
?? */
public List<Long> seacherIndex(String queryString) {// 根據(jù)關(guān)鍵字搜索
??? try {
????? IndexSearcher isearcher = new IndexSearcher(indexPath);
????? /* 下面這個表示要同時搜索這兩個域,而且只要一個域里面有滿足我們搜索的內(nèi)容就行 */
????? BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };
????? TopDocCollector collector = new TopDocCollector(10);
????? Query query = MultiFieldQueryParser.parse(queryString, new String[] { "subject", "content" }, clauses, getAnalyzer());
????? isearcher.search(query, collector);
????? ScoreDoc[] hits = collector.topDocs().scoreDocs;
????? List<Long> rtn = new ArrayList<Long>();
????? Long id;
????? int docId;
????? for (int i = 0; i < hits.length; i++) {
??????? docId = hits[i].doc;
??????? Document doc = isearcher.doc(docId);
??????? id = Long.parseLong(doc.get("id").trim());
??????? if (!rtn.contains(id)) {
????????? rtn.add(id);
??????? }
????? }
????? isearcher.close();
????? return rtn;
??? } catch (Exception e) {
????? e.printStackTrace();
????? return null;
??? }
}
}
posted on 2009-03-09 17:24
藍(lán)山 閱讀(482)
評論(0) 編輯 收藏