用lucene來建立搜索程序,在檢索的時候效率大大的提高了,但是卻以建立索引為代價,建立索引本身就是個耗內存大、時間長的過程(數據量比較大,數據少何必用lucene來建立全文檢索,個人拙見),從而索引的建立就是個瓶頸,如果我們建立好索引,然后每次更新數據后重新建立索引,無疑是不合理的,為什么不能在原先索引文件的基礎上再把新更新的加在上面呢?增量索引就是在建完索引的后,將數據庫的最后一條記錄的ID存儲起來,下次建立時候將這個ID拿到,從而可以把更新的數據拿到,并把這些更新數據的索引文件加在原先的索引文件里面,下面來看個簡單的例子
數據庫有兩個字段id和title,話不多說,直接上代碼,一看便知
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
public class Index {
public static void main(String[] args) {
try {
Index index = new Index();
String path = "d:\\index";//索引文件的存放路徑
String storeIdPath = "d:\\storeId.txt";//存儲ID的路徑
String storeId ="";
storeId = index.getStoreId(storeIdPath);
ResultSet rs = index.getResult(storeId);
index.indexBuilding(path, storeIdPath, rs);
storeId = index.getStoreId(storeIdPath);
System.out.println(storeId);//打印出這次存儲起來的ID
} catch (Exception e) {
e.printStackTrace();
}
}
public ResultSet getResult(String storeId) throws Exception{
Class.forName("com.mysql.jdbc.Driver").newInstance();
String url = "jdbc:mysql://localhost:3306/ding";
String userName = "root";
String password = "ding";
Connection conn = DriverManager.getConnection(url,userName,password);
Statement stmt = conn
.createStatement();
ResultSet rs = stmt
.executeQuery("select * from newitem where id > '"+storeId+"'order by id");
return rs;
}
public boolean indexBuilding(String path,String storeIdPath, ResultSet rs) {// 把RS換成LIST原理一樣
try {
Analyzer luceneAnalyzer = new StandardAnalyzer();
// 取得存儲起來的ID,以判定是增量索引還是重新索引
boolean isEmpty = true;
try {
File file = new File(storeIdPath);
if (!file.exists()) {
file.createNewFile();
}
FileReader fr = new FileReader(storeIdPath);
BufferedReader br = new BufferedReader(fr);
if(br.readLine()!= null) {
isEmpty = false;
}
br.close();
fr.close();
} catch (IOException e) {
e.printStackTrace();
}
IndexWriter writer = new IndexWriter(path, luceneAnalyzer, isEmpty);//參數isEmpty是false表示增量索引
String storeId = "";
boolean indexFlag = false;
String id;
String title;
while (rs.next()) {
// for(Iterator it = list.iterator();it.hasNext();){
id = rs.getString("id");
title = rs.getString("title");
writer.addDocument(Document(id, title));
storeId = id;//將拿到的id給storeId,這種拿法不合理,這里為了方便
indexFlag = true;
}
writer.optimize();
writer.close();
if(indexFlag){
// 將最后一個的ID存到磁盤文件中
this.writeStoreId(storeIdPath, storeId);
}
return true;
} catch (Exception e) {
e.printStackTrace();
System.out.println("出錯了" + e.getClass() + "\n 錯誤信息為: "
+ e.getMessage());
return false;
}
}
public static Document Document(String id, String title) {
Document doc = new Document();
doc.add(new Field("ID", id, Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("TITLE", title, Field.Store.YES,
Field.Index.TOKENIZED));
return doc;
}
// 取得存儲在磁盤中的ID
public static String getStoreId(String path) {
String storeId = "";
try {
File file = new File(path);
if (!file.exists()) {
file.createNewFile();
}
FileReader fr = new FileReader(path);
BufferedReader br = new BufferedReader(fr);
storeId = br.readLine();
if (storeId == null || storeId == "")
storeId = "0";
br.close();
fr.close();
} catch (Exception e) {
e.printStackTrace();
}
return storeId;
}
// 將ID寫入到磁盤文件中
public static boolean writeStoreId(String path,String storeId) {
boolean b = false;
try {
File file = new File(path);
if (!file.exists()) {
file.createNewFile();
}
FileWriter fw = new FileWriter(path);
PrintWriter out = new PrintWriter(fw);
out.write(storeId);
out.close();
fw.close();
b=true;
} catch (IOException e) {
e.printStackTrace();
}
return b;
}
}
這里代碼寫的比較簡單,很多需要改進的地方,自己改進就行了,這里只是說明了增量索引的原理,望指正。
posted on 2009-05-31 16:37
老丁 閱讀(10725)
評論(11) 編輯 收藏 所屬分類:
搜索引擎 lucene