锘??xml version="1.0" encoding="utf-8" standalone="yes"?> 棣栧厛,鍩轟簬涓涓畝鍗曠殑鏂伴椈緋葷粺,瑕佹兂鍋氬叏鏂囨绱?鏂伴椈緋葷粺鐨勭鐞嗙瓑鍦ㄨ繖閲屼笉鍦ㄥ叿浣撴彁鍑?涓嬮潰鍒楀嚭鏂伴椈瀵硅薄鐨勭被:
娉?紼嬪簭鐢ㄤ細鍒頒竴浜涘伐鍏風被,涓嶅湪姝ゅ垪鍑?鐢ㄦ埛鍙互鑷繁瀹炵幇.
package com.jscud.website.newsinfo.bean;
import java.sql.Timestamp;
import com.jscud.util.DateTime;
import com.jscud.util.StringFunc;
import com.jscud.website.newsinfo.NewsConst;
/**
* 涓涓柊闂?
*
* @author scud(椋炰簯灝忎緺) http://www.jscud.com
*
*/
public class NewsItem
{
private int nid; //鏂伴椈緙栧彿
private int cid; //綾誨埆緙栧彿
private String title;//鏍囬
private int showtype; //鍐呭綾誨瀷:鐩墠鏀寔url鍜宧tml
private String content;//鍐呭
private String url;//瀵瑰簲緗戝潃,濡傛灉鍐呭綾誨瀷鏄痷rl鐨勮瘽
private Timestamp addtime; //澧炲姞鏃墮棿
private int click; //鐐瑰嚮鏁?br />
//瀵瑰簲鐨刧et,set鍑芥暟,杈冨涓嶅湪鍒楀嚭,鍙互浣跨敤宸ュ叿鐢熸垚
//......
/**
* 鎸夌収綾誨瀷鏍煎紡鍖?br />
*/
public String getShowContent()
{
String sRes = content;
if(showtype == NewsConst.ShowType_HTML)
{
}
return sRes;
}
public String getTarget()
{
if(showtype == NewsConst.ShowType_URL)
{
return "_blank";
}
else
return "";
}
/**
* 闈欐丠tml鏂囦歡鐨勮礬寰勫強鍏跺悕瀛?br />
*/
public String getHtmlFileName()
{
int nYear = DateTime.getYear_Date(getAddtime());
int nMonth = DateTime.getMonth_Date(getAddtime());
String sGeneFileName =
"/news/" + getCid() + "/" + nYear + "/" + nMonth +"/" + getNid() + ".htm";
return sGeneFileName;
}
/**
* 闈欐丠tml鏂囦歡鐨勮礬寰?br />
*/
public String getHtmlFilePath()
{
int nYear = DateTime.getYear_Date(getAddtime());
int nMonth = DateTime.getMonth_Date(getAddtime());
String sGeneFilePath =
getCid() + "_" + nYear + "_" + nMonth;
return sGeneFilePath;
}
}
鍙互鐪嬪埌,鎴戜滑闇瑕佸鏍囬鍜屽唴瀹硅繘琛屾绱?涓轟簡榪欎釜鐩殑,鎴戜滑棣栧厛闇瑕佹潵鐮旂┒涓涓媗ucene.
鍦↙ucene涓?濡傛灉瑕佽繘琛屽叏鏂囨绱?蹇呴』瑕佸厛寤虹珛绱㈠紩鐒跺悗鎵嶈兘榪涜媯绱?褰撶劧瀹為檯宸ヤ綔涓繕浼氭湁鍒犻櫎绱㈠紩鍜屾洿鏂扮儲寮曠殑宸ヤ綔.
鍦ㄦ涔嬪墠,浠嬬粛涓涓渶鍩烘湰鐨勭被(鎽樻妱鑷?a href="http://www.tkk7.com/cap/archive/2005/07/17/7849.html">http://www.tkk7.com/cap/archive/2005/07/17/7849.html):
Analyzer 鏂囦歡鐨勫垎鏋愬櫒錛堝惉璧鋒潵鍒壄錛岃繕鏄彨Analyzer濂戒簡)鐨勬娊璞★紝榪欎釜綾葷敤鏉ュ鐞嗗垎璇?瀵逛腑鏂囧挨鍏墮噸瑕侊紝杞崲澶у皬鍐?Computer->computer,瀹炵幇鏌ヨ澶у皬鍐欐棤鍏?錛岃漿鎹㈣瘝鏍?computers->computer),娑堥櫎stop words絳?榪樿礋璐f妸鍏朵粬鏍煎紡鏂囨。杞崲涓虹函鏂囨湰絳?
鍦╨ucene涓?涓鑸細浣跨敤StandardAnalyzer鏉ュ垎鏋愬唴瀹?瀹冩敮鎸佷腑鏂囩瓑澶氬瓧鑺傝璦,褰撶劧鍙互鑷繁瀹炵幇鐗規畩鐨勮В鏋愬櫒.StandardAnalyzer鐩墠瀵逛腑鏂囩殑澶勭悊鏄寜鐓у崟瀛楁潵澶勭悊鐨?榪欐槸鏈綆鍗曠殑鍔炴硶,浣嗘槸涔熸湁緙虹偣,浼氱粍鍚堝嚭涓浜涙病鏈夋剰涔夌殑緇撴灉鏉?
棣栧厛鎴戜滑鏉ヤ簡瑙e緩绔嬬儲寮?寤虹珛绱㈠紩鍖呭惈2縐嶆儏鍐?涓縐嶆槸緇欎竴鏉℃柊闂誨緩绔嬬儲寮?鍙﹀鐨勬儏鍐墊槸鍦ㄥ紑濮嬫垨鑰呬竴瀹氱殑鏃墮棿緇欐壒閲忕殑鏂伴椈寤虹珛绱㈠紩,鎵浠ヤ負浜嗛氱敤,鎴戜滑鍐欎竴涓氱敤鐨勫緩绔嬬儲寮曠殑鍑芥暟:
(涓鑸竴綾葷殑绱㈠紩閮芥斁鍦ㄤ竴涓洰褰曚笅,榪欎釜閰嶇疆鍙互鍦ㄥ嚱鏁頒腑瀹氫箟,涔熷彲浠ュ啓鍦ㄩ厤緗枃浠朵腑,閫氳繃鍙傛暟浼犻掔粰鍑芥暟.)
/** * 鐢熸垚绱㈠紩. * * @param doc 鐩爣鏂囨。 * @param indexDir 绱㈠紩鐩綍 */ public static void makeIndex(Document doc, String indexDir) { List aList = new ArrayList(); aList.add(doc); makeIndex(aList, indexDir); } /** * 鐢熸垚绱㈠紩. * * @param doc 鐢熸垚鐨刣ocument. * @param indexDir 绱㈠紩鐩綍 */ public static void makeIndex(List docs, String indexDir) { if (null == docs) { return; } boolean indexExist = indexExist(indexDir); IndexWriter writer = null; //娣誨姞涓鏉℃枃妗?br />
for (int i = 0; i < docs.size(); i++) //绱㈠紩瀹屾垚鍚庣殑澶勭悊 |
鍙互鐪嬪埌,寤虹珛绱㈠紩鐢ㄥ埌綾繪槸IndexWrite,瀹冨彲浠ユ柊寤虹儲寮曟垨鑰呰拷鍔犵儲寮?浣嗘槸闇瑕佽嚜宸卞垽鏂?鍒ゆ柇鏄氳繃IndexReader榪欎釜綾繪潵瀹炵幇鐨?鍑芥暟濡備笅:
/** * 媯鏌ョ儲寮曟槸鍚﹀瓨鍦? * @param indexDir * @return */ public static boolean indexExist(String indexDir) { return IndexReader.indexExists(indexDir); } |
濡傛灉姣忔閮芥槸鏂板緩绱㈠紩鐨勮瘽,浼氭妸鍘熸潵鐨勮褰曞垹闄?鎴戝湪浣跨敤鐨勬椂鍊欎竴寮濮嬪氨娌℃湁娉ㄦ剰鍒?鍚庢潵瑙傚療浜嗕竴涓嬬儲寮曟枃浠?鎵嶅彂鐜拌繖涓棶棰?
榪樺彲浠ョ湅鍒?寤虹珛绱㈠紩鏄粰鐢ㄦ埛鐨凞ocument瀵硅薄寤虹珛绱㈠紩,Document琛ㄧず绱㈠紩涓殑涓鏉℃枃妗h褰?閭d箞鎴戜滑濡備綍寤虹珛涓涓枃妗i偅?浠ユ柊闂葷郴緇熶負渚?浠g爜濡備笅:
/** * 鐢熸垚鏂伴椈鐨凞ocument. * * @param aNews 涓鏉℃柊闂? * * @return lucene鐨勬枃妗e璞?br /> */ public static Document makeNewsSearchDocument(NewsItem aNews) { Document doc = new Document(); doc.add(Field.Keyword("nid", String.valueOf(aNews.getNid()))); doc.add(Field.Text("title", aNews.getTitle())); //瀵笻tml榪涜瑙f瀽,濡傛灉涓嶆槸html,鍒欎笉闇瑕佽В鏋?鎴栬呮牴鎹牸寮忚皟鐢ㄨ嚜宸辯殑瑙f瀽鏂規硶 String content = parseHtmlContent(aNews.getContent()); doc.add(Field.UnStored("content", content)); doc.add(Field.Keyword("addtime", aNews.getAddtime())); //鍙互鍔犲叆鍏朵粬鐨勫唴瀹?渚嬪鏂伴椈鐨勮瘎璁虹瓑 doc.add(Field.UnStored("other", "")); //璁塊棶url String newsUrl = "/srun/news/viewhtml/" + aNews.getHtmlFilePath() + "/" + aNews.getNid() + ".htm"; doc.add(Field.UnIndexed("visiturl", newsUrl)); return doc; } |
閫氳繃涓婇潰鐨勪唬鐮?鎴戜滑鎶婁竴鏉℃柊闂昏漿鎹負lucene鐨凞ocument瀵硅薄,浠庤岃繘琛岀儲寮曞伐浣?鍦ㄤ笂闈㈢殑浠g爜涓?鎴戜滑鍙堝紩鍏ヤ簡lucene涓殑Field(瀛楁)綾?Document鏂囨。灝卞儚鏁版嵁搴撲腑鐨勪竴鏉¤褰?瀹冩湁寰堝瀛楁,姣忎釜瀛楁鏄竴涓狥ield瀵硅薄.
浠庡埆鐨勬枃绔犳憳鎶勪竴孌靛叧浜嶧ield鐨勮鏄?鎽樻妱鑷?a href="http://www.tkk7.com/cap/archive/2005/07/17/7849.html">http://www.tkk7.com/cap/archive/2005/07/17/7849.html):
[quote]
綾誨瀷 Analyzed Indexed Stored 璇存槑
Field.Keyword(String,String/Date) N Y Y 榪欎釜Field鐢ㄦ潵鍌ㄥ瓨浼氱洿鎺ョ敤鏉ユ绱㈢殑姣斿(緙栧彿,濮撳悕,鏃ユ湡絳?
Field.UnIndexed(String,String) N N Y 涓嶄細鐢ㄦ潵媯绱㈢殑淇℃伅,浣嗘槸媯绱㈠悗闇瑕佹樉紺虹殑,姣斿,紜歡搴忓垪鍙?鏂囨。鐨剈rl鍦板潃
Field.UnStored(String,String) Y Y N 澶ф鏂囨湰鍐呭,浼氱敤鏉ユ绱?浣嗘槸媯绱㈠悗涓嶉渶瑕佷粠index涓彇鍐呭,鍙互鏍規嵁url鍘籰oad鐪熷疄鐨勫唴瀹?
Field.Text(String,String) Y Y Y 媯绱?鑾峰彇閮介渶瑕佺殑鍐呭,鐩存帴鏀緄ndex涓?涓嶈繃榪欐牱浼氬澶ndex
Field.Text(String,Reader) Y Y N 濡傛灉鏄竴涓猂eader, lucene鐚滄祴鍐呭姣旇緝澶?浼氶噰鐢║nstored鐨勭瓥鐣?
[/quote]
鎴戜滑鍙互鐪嬪埌鏂伴椈鐨勭紪鍙鋒槸鐩存帴鐢ㄦ潵媯绱㈢殑,鎵浠ユ槸Keyword綾誨瀷鐨勫瓧孌?鏂伴椈鐨勬爣棰樻槸闇瑕佹绱㈠拰鏄劇ず鐢ㄧ殑,鎵浠ユ槸Text綾誨瀷,鑰屾柊闂葷殑鍐呭鍥犱負鏄疕tml鏍煎紡鐨?鎵浠ュ湪緇忚繃瑙f瀽鍣ㄧ殑澶勭悊鐢?浣跨敤鐨刄nStored鐨勬牸寮?鑰屾柊闂葷殑鏃墮棿鏄洿鎺ョ敤鏉ユ绱㈢殑,鎵浠ユ槸KeyWord綾誨瀷.涓轟簡鍦ㄦ柊闂葷儲寮曞悗鐢ㄦ埛鍙互璁塊棶鍒板畬鏁寸殑鏂伴椈欏甸潰,榪樿緗簡涓涓猆nIndexed綾誨瀷鐨勮闂湴鍧瀛楁.
(瀵笻tml榪涜瑙f瀽鐨勫鐞嗙◢鍚庡湪榪涜璁茶В)
涓轟竴鏉℃柊闂誨緩绔嬬儲寮曢渶瑕佷袱涓楠?鑾峰彇Document,浼犵粰makeIndex鍑芥暟,浠g爜濡備笅:
public static void makeNewsInfoIndex(NewsItem aNews) { if (null == aNews) { return; } makeIndex(makeNewsSearchDocument(aNews),indexDir); } |
寤虹珛绱㈠紩鐨勫伐浣滃氨榪涜瀹屼簡,鍙鍦ㄥ鍔犳柊闂誨悗璋冪敤 makeNewsInfoIndex(newsitem); 灝卞彲浠ュ緩绔嬬儲寮曚簡.
濡傛灉闇瑕佸垹闄ゆ柊闂?閭d箞涔熻鍒犻櫎瀵瑰簲鐨勭儲寮?鍒犻櫎绱㈠紩鏄氳繃IndexReader綾繪潵瀹屾垚鐨?
/** * 鍒犻櫎绱㈠紩. * @param aTerm 绱㈠紩鍒犻櫎鏉′歡 * @param indexDir 绱㈠紩鐩綍 */ public static void deleteIndex(Term aTerm, String indexDir) { List aList = new ArrayList(); aList.add(aTerm); deleteIndex(aList, indexDir); } /** IndexReader reader = null; |
鍒犻櫎绱㈠紩闇瑕佷竴涓潯浠?綾諱技鏁版嵁搴撲腑鐨勫瓧孌墊潯浠?渚嬪鍒犻櫎涓鏉℃柊闂葷殑浠g爜濡備笅:
public static void deleteNewsInfoIndex(int nid) { Term aTerm = new Term("nid", String.valueOf(nid)); deleteIndex(aTerm,indexDir); } |
閫氳繃鏂伴椈鐨処D,灝卞彲浠ュ垹闄や竴鏉℃柊闂?
濡傛灉闇瑕佹洿鏂版柊闂?濡備綍鏇存柊绱㈠紩鍝? 鏇存柊绱㈠紩闇瑕佸厛鍒犻櫎绱㈠紩鐒跺悗鏂板緩绱㈠紩2涓楠?鍏跺疄灝辨槸鎶婁笂闈㈢殑浠g爜緇勫悎璧鋒潵,渚嬪鏇存柊涓鏉℃柊闂?
public static void updateNewsInfoIndex(NewsItem aNews) { if (null == aNews) { return; } deleteNewsInfoIndex(aNews.getNid()); makeNewsInfoIndex(aNews); } |
鑷蟲,绱㈠紩鐨勫緩绔嬫洿鏂板拰鍒犻櫎灝卞憡涓孌佃惤浜?鍏朵腑鎵歸噺鏇存柊鏂伴椈鐨勪唬鐮佸涓?
(鎵歸噺鏇存柊搴旇鍦ㄨ闂漢鏁板皯鎴栬呭悗鍙扮▼搴忓湪澶滈棿鎵ц)
public static void makeAllNewsInfoIndex(List newsList) { List terms = new ArrayList(); List docs = new ArrayList(); for (int i = 0; i < newsList.size(); i++) deleteIndex(terms,indexDir); |