锘??xml version="1.0" encoding="utf-8" standalone="yes"?>亚洲熟妇少妇任你躁在线观看无码,国产偷国产偷亚洲高清日韩,男人的天堂av亚洲一区2区http://www.tkk7.com/baoyaer/category/20033.htmljavazh-cnFri, 18 Jan 2008 15:25:23 GMTFri, 18 Jan 2008 15:25:23 GMT60lucene鍏ㄦ枃媯(gè)绱㈠簲鐢ㄧず渚嬪強(qiáng)浠g爜綆鏋?http://www.tkk7.com/baoyaer/articles/175099.html澶х敯鏂?/dc:creator>澶х敯鏂?/author>Mon, 14 Jan 2008 02:38:00 GMThttp://www.tkk7.com/baoyaer/articles/175099.htmlhttp://www.tkk7.com/baoyaer/comments/175099.htmlhttp://www.tkk7.com/baoyaer/articles/175099.html#Feedback0http://www.tkk7.com/baoyaer/comments/commentRss/175099.htmlhttp://www.tkk7.com/baoyaer/services/trackbacks/175099.html闃呰鍏ㄦ枃

]]>
lucene瀹炰緥浣跨敤http://www.tkk7.com/baoyaer/articles/175096.html澶х敯鏂?/dc:creator>澶х敯鏂?/author>Mon, 14 Jan 2008 02:32:00 GMThttp://www.tkk7.com/baoyaer/articles/175096.htmlhttp://www.tkk7.com/baoyaer/comments/175096.htmlhttp://www.tkk7.com/baoyaer/articles/175096.html#Feedback0http://www.tkk7.com/baoyaer/comments/commentRss/175096.htmlhttp://www.tkk7.com/baoyaer/services/trackbacks/175096.html闃呰鍏ㄦ枃

]]>
Lucene鍩烘湰浣跨敤浠嬬粛http://www.tkk7.com/baoyaer/articles/99661.html澶х敯鏂?/dc:creator>澶х敯鏂?/author>Tue, 13 Feb 2007 03:50:00 GMThttp://www.tkk7.com/baoyaer/articles/99661.htmlhttp://www.tkk7.com/baoyaer/comments/99661.htmlhttp://www.tkk7.com/baoyaer/articles/99661.html#Feedback0http://www.tkk7.com/baoyaer/comments/commentRss/99661.htmlhttp://www.tkk7.com/baoyaer/services/trackbacks/99661.html

浠婂ぉ鐢ㄤ簡涓婰ucene錛屽彂鐜扮綉涓婅櫧鐒朵篃鏈変笉灝戜粙緇嶅畠鐨勬枃妗o紝涓嶈繃寰堝閮藉亸鍚戜粙緇嶆蹇靛憖銆佽璁℃垨鑰呮槸涓浜涙洿涓烘繁鍏ョ殑涓滆タ錛屽浜庡叾鍏ラ棬浣跨敤鐨勪粙緇嶆х殑鏂囨。騫朵笉澶氾紝灝卞啓浜嗚繖涔堜竴綃囥?br />

Lucene 鍩烘湰浣跨敤浠嬬粛

鏈枃鐨勭洰鐨勪笉鍦ㄤ簬瀵筁ucene鐨勬蹇靛拰璁捐榪欎簺榪涜浠嬬粛錛屼粎鍦ㄤ簬浠嬬粛鎬庝箞鏍峰幓浣跨敤Lucene鏉ヨ揪鍒拌嚜宸辨兂瑕佺殑鍑犵甯歌鐨勫叏鏂囨绱㈢殑闇姹傦紝濡傛灉鎯蟲繁鍏ヤ簡瑙ucene鐨勮瘽鏈枃涓嶄細(xì)甯︾粰浣犱粈涔堟敹鑾風(fēng)殑銆傜湅瀹屾湰鏂囧悗鎯蟲洿娣卞叆鐨勪簡瑙ucene璇瘋闂細(xì)http://lucene.apache.org

涓.聽 姒傝堪

闅忕潃緋葷粺淇℃伅鐨勮秺鏉ヨ秺澶氾紝鎬庝箞鏍蜂粠榪欎簺淇℃伅嫻鋒磱涓崬璧瘋嚜宸辨兂瑕佺殑閭d竴鏍歸拡灝卞彉寰楅潪甯擱噸瑕佷簡錛屽叏鏂囨绱㈡槸閫氬父鐢ㄤ簬瑙e喅姝ょ被闂鐨勬柟妗堬紝鑰孡ucene鍒欎負(fù)瀹炵幇鍏ㄦ枃媯(gè)绱㈢殑宸ュ叿錛屼換浣曞簲鐢ㄩ兘鍙氳繃宓屽叆瀹冩潵瀹炵幇鍏ㄦ枃媯(gè)绱€?/p>

浜?聽 鐜鎼緩

浠巐ucene.apache.org涓婁笅杞芥渶鏂扮増鏈殑lucene.jar錛屽皢姝ar浣滀負(fù)欏圭洰鐨刡uild path錛岄偅涔堝湪欏圭洰涓氨鍙互鐩存帴浣跨敤lucene浜嗐?/p>

涓?聽 浣跨敤璇存槑

3.1.聽聽聽聽聽聽 鍩烘湰姒傚康

榪欓噷浠嬬粛鐨勪富瑕佷負(fù)鍦ㄤ嬌鐢ㄤ腑緇忓父紕板埌涓浜涙蹇碉紝浠ュぇ瀹墮兘姣旇緝鐔熸?zhèn)夌殑鏁版嵁搴撴潵杩涜绫绘瘮鐨勮瑙eQ屼嬌鐢↙ucene榪涜鍏ㄦ枃媯(gè)绱㈢殑榪囩▼鏈夌偣綾諱技鏁版嵁搴撶殑榪欎釜榪囩▼錛宼able---脿鏌ヨ鐩稿簲鐨勫瓧孌墊垨鏌ヨ鏉′歡----脿榪斿洖鐩稿簲鐨勮褰曪紝棣栧厛鏄疘ndexWriter錛岄氳繃瀹冨緩绔嬬浉搴旂殑绱㈠紩琛紝鐩稿綋浜庢暟鎹簱涓殑table錛屽湪鏋勫緩姝ょ儲(chǔ)寮曡〃鏃墮渶鎸囧畾鐨勪負(fù)璇ョ儲(chǔ)寮曡〃閲囩敤浣曠鏂瑰紡榪涜鏋勫緩錛屼篃灝辨槸璇村浜庡叾涓殑璁板綍鐨勫瓧孌典互浠涔堟柟寮忔潵榪涜鏍煎紡鐨勫垝鍒嗭紝榪欎釜鍦↙ucene涓О涓篈nalyzer錛孡ucene鎻愪緵浜嗗嚑縐嶇幆澧冧笅浣跨敤鐨凙nalyzer錛歋impleAnalyzer銆丼tandardAnalyzer銆丟ermanAnalyzer絳夛紝鍏朵腑StandardAnalyzer鏄粡甯鎬嬌鐢ㄧ殑錛屽洜涓哄畠鎻愪緵浜嗗浜庝腑鏂囩殑鏀寔錛屽湪琛ㄥ緩濂藉悗鎴戜滑灝遍渶瑕佸線閲岄潰鎻掑叆鐢ㄤ簬绱㈠紩鐨勮褰曪紝鍦↙ucene涓繖涓О涓篋ocument錛屾湁鐐圭被浼兼暟鎹簱涓璽able鐨勪竴琛岃褰曪紝璁板綍涓殑瀛楁鐨勬坊鍔犳柟娉曪紝鍦↙ucene涓О涓篎ield錛岃繖涓拰鏁版嵁搴撲腑鍩烘湰涓鏍鳳紝瀵逛簬Field Lucene鍒嗕負(fù)鍙绱㈠紩鐨勶紝鍙垏鍒嗙殑錛屼笉鍙鍒囧垎鐨勶紝涓嶅彲琚儲(chǔ)寮曠殑鍑犵緇勫悎綾誨瀷錛岄氳繃榪欏嚑涓厓绱犲熀鏈笂灝卞彲浠ュ緩绔嬭搗绱㈠紩浜嗐傚湪鏌ヨ鏃剁粡甯哥鍒扮殑涓哄彟澶栧嚑涓蹇碉紝棣栧厛鏄疩uery錛孡ucene鎻愪緵浜嗗嚑縐嶇粡甯稿彲浠ョ敤鍒扮殑Query錛歍ermQuery銆丮ultiTermQuery銆丅ooleanQuery銆乄ildcardQuery銆丳hraseQuery銆丳refixQuery銆丳hrasePrefixQuery銆丗uzzyQuery銆丷angeQuery銆丼panQuery錛孮uery鍏跺疄涔熷氨鏄寚瀵逛簬闇瑕佹煡璇㈢殑瀛楁閲囩敤浠涔堟牱鐨勬柟寮忚繘琛屾煡璇紝濡傛ā緋婃煡璇€佽涔夋煡璇€佺煭璇煡璇€佽寖鍥存煡璇€佺粍鍚堟煡璇㈢瓑錛岃繕鏈夊氨鏄疩ueryParser錛孮ueryParser鍙敤浜庡垱寤轟笉鍚岀殑Query錛岃繕鏈変竴涓狹ultiFieldQueryParser鏀寔瀵逛簬澶氫釜瀛楁榪涜鍚屼竴鍏抽敭瀛楃殑鏌ヨ錛孖ndexSearcher姒傚康鎸囩殑涓洪渶瑕佸浣曠洰褰曚笅鐨勭儲(chǔ)寮曟枃浠惰繘琛屼綍縐嶆柟寮忕殑鍒嗘瀽鐨勬煡璇紝鏈夌偣璞″鏁版嵁搴撶殑鍝绱㈠紩琛ㄨ繘琛屾煡璇㈠茍鎸変竴瀹氭柟寮忚繘琛岃褰曚腑瀛楁鐨勫垎瑙f煡璇㈢殑姒傚康錛岄氳繃IndexSearcher浠ュ強(qiáng)Query鍗沖彲鏌ヨ鍑洪渶瑕佺殑緇撴灉錛孡ucene榪斿洖鐨勪負(fù)Hits.閫氳繃閬嶅巻Hits鍙幏鍙栬繑鍥炵殑緇撴灉鐨凞ocument錛岄氳繃Document鍒欏彲鑾峰彇Field涓殑鐩稿叧淇℃伅浜嗐?/p>

閫氳繃瀵逛簬涓婇潰鍦ㄥ緩绔嬬儲(chǔ)寮曞拰鍏ㄦ枃媯(gè)绱㈢殑鍩烘湰姒傚康鐨勪粙緇嶅笇鏈涜兘璁╀綘瀵筁ucene寤虹珛涓瀹氱殑浜嗚В銆?/p>

3.2.聽聽聽聽聽聽 鍏ㄦ枃媯(gè)绱㈤渶姹傜殑瀹炵幇

绱㈠紩寤虹珛閮ㄥ垎鐨勪唬鐮侊細(xì)


private void createIndex(String indexFilePath) throws Exception{

聽聽聽聽聽聽聽 IndexWriter iwriter=getWriter(indexFilePath);

聽聽聽聽聽聽聽 Document doc=new Document();

聽聽聽聽聽聽聽 doc.add(Field.Keyword("name","jerry"));

聽聽聽聽聽聽聽 doc.add(Field.Text("sender","bluedavy@gmail.com"));

聽聽聽聽聽聽聽 doc.add(Field.Text("receiver","google@gmail.com"));

聽聽聽聽聽聽聽 doc.add(Field.Text("title","鐢ㄤ簬绱㈠紩鐨勬爣棰?));

聽聽聽聽聽聽聽 doc.add(Field.UnIndexed("content","涓嶅緩绔嬬儲(chǔ)寮曠殑鍐呭"));

聽聽聽聽聽聽聽 Document doc2=new Document();

聽聽聽聽聽聽聽 doc2.add(Field.Keyword("name","jerry.lin"));

聽聽聽聽聽聽聽 doc2.add(Field.Text("sender","bluedavy@hotmail.com"));

聽聽聽聽聽聽聽 doc2.add(Field.Text("receiver","msn@hotmail.com"));

聽聽聽聽聽聽聽 doc2.add(Field.Text("title","鐢ㄤ簬绱㈠紩鐨勭浜屼釜鏍囬"));

聽聽聽聽聽聽聽 doc2.add(Field.Text("content","寤虹珛绱㈠紩鐨勫唴瀹?));

聽聽聽聽聽聽聽 iwriter.addDocument(doc);

聽聽聽聽聽聽聽 iwriter.addDocument(doc2);

聽聽聽聽聽聽聽 iwriter.optimize();

聽聽聽聽聽聽聽 iwriter.close();

聽聽聽 }

聽聽聽

聽聽聽 private IndexWriter getWriter(String indexFilePath) throws Exception{

聽聽聽聽聽聽聽 boolean append=true;

聽聽聽聽聽聽聽 File file=new File(indexFilePath+File.separator+"segments");

聽聽聽聽聽聽聽 if(file.exists())

聽聽聽聽聽聽聽聽聽聽聽 append=false;

聽聽聽聽聽聽聽 return new IndexWriter(indexFilePath,analyzer,append);

聽聽聽 }


3.2.1.聽聽聽聽聽聽 瀵逛簬鏌愬瓧孌電殑鍏抽敭瀛楃殑妯$硦鏌ヨ


Query query=new WildcardQuery(new Term("sender","*davy*"));

聽聽聽聽聽聽聽

聽聽聽聽聽聽聽 Searcher searcher=new IndexSearcher(indexFilePath);

聽聽聽聽聽聽聽 Hits hits=searcher.search(query);

聽聽聽聽聽聽聽 for (int i = 0; i < hits.length(); i++) {

聽聽聽聽聽聽聽聽聽聽聽 System.out.println(hits.doc(i).get("name"));

聽聽聽聽聽聽聽 }


3.2.2.聽聽聽聽聽聽 瀵逛簬鏌愬瓧孌電殑鍏抽敭瀛楃殑璇箟鏌ヨ


Query query=QueryParser.parse("绱㈠紩","title",analyzer);

聽聽聽聽聽聽聽

聽聽聽聽聽聽聽 Searcher searcher=new IndexSearcher(indexFilePath);

聽聽聽聽聽聽聽 Hits hits=searcher.search(query);

聽聽聽聽聽聽聽 for (int i = 0; i < hits.length(); i++) {

聽聽聽聽聽聽聽聽聽聽聽 System.out.println(hits.doc(i).get("name"));

聽聽聽聽聽聽聽 }


3.2.3.聽聽聽聽聽聽 瀵逛簬澶氬瓧孌電殑鍏抽敭瀛楃殑鏌ヨ


Query query=MultiFieldQueryParser.parse("绱㈠紩",new String[]{"title","content"},analyzer);

聽聽聽聽聽聽聽

聽聽聽聽聽聽聽 Searcher searcher=new IndexSearcher(indexFilePath);

聽聽聽聽聽聽聽 Hits hits=searcher.search(query);

聽聽聽聽聽聽聽 for (int i = 0; i < hits.length(); i++) {

聽聽聽聽聽聽聽聽聽聽聽 System.out.println(hits.doc(i).get("name"));

聽聽聽聽聽聽聽 }


3.2.4.聽聽聽聽聽聽 澶嶅悎鏌ヨ(澶氱鏌ヨ鏉′歡鐨勭患鍚堟煡璇?


Query query=MultiFieldQueryParser.parse("绱㈠紩",new String[]{"title","content"},analyzer);

聽聽聽聽聽聽聽 Query mquery=new WildcardQuery(new Term("sender","bluedavy*"));

聽聽聽聽聽聽聽 TermQuery tquery=new TermQuery(new Term("name","jerry"));

聽聽聽聽聽聽聽

聽聽聽聽聽聽聽 BooleanQuery bquery=new BooleanQuery();

聽聽聽聽聽聽聽 bquery.add(query,true,false);

聽聽聽聽聽聽聽 bquery.add(mquery,true,false);

聽聽聽聽聽聽聽 bquery.add(tquery,true,false);

聽聽聽聽聽聽聽

聽聽聽聽聽聽聽 Searcher searcher=new IndexSearcher(indexFilePath);

聽聽聽聽聽聽聽 Hits hits=searcher.search(bquery);

聽聽聽聽聽聽聽 for (int i = 0; i < hits.length(); i++) {

聽聽聽聽聽聽聽聽聽聽聽 System.out.println(hits.doc(i).get("name"));

聽聽聽聽聽聽聽 }


鍥?聽 鎬葷粨

鐩鎬俊澶у閫氳繃涓婇潰鐨勮鏄庤兘鐭ラ亾Lucene鐨勪竴涓熀鏈殑浣跨敤鏂規(guī)硶錛屽湪鍏ㄦ枃媯(gè)绱㈡椂寤鴻澶у鍏堥噰鐢ㄨ涔夋椂鐨勬悳绱紝鍏堟悳绱㈠嚭鏈夋剰涔夌殑鍐呭錛屼箣鍚庡啀榪涜妯$硦涔嬬被鐨勬悳绱紝^_^錛岃繖涓繕鏄渶瑕佹牴鎹悳绱㈢殑闇姹傛墠鑳藉畾浜嗭紝Lucene榪樻彁渚涗簡寰堝鍏朵粬鏇村ソ鐢ㄧ殑鏂規(guī)硶錛岃繖涓氨絳夊緟澶у鍦ㄤ嬌鐢ㄧ殑榪囩▼涓嚜宸卞幓榪涗竴姝ョ殑鎽哥儲(chǔ)浜嗭紝姣斿瀵逛簬Lucene鏈韓鎻愪緵鐨凲uery鐨勬洿鐔熺粌鐨勬帉鎻★紝瀵逛簬Filter銆丼orter鐨勪嬌鐢紝鑷繁鎵╁睍瀹炵幇Analyzer錛岃嚜宸卞疄鐜癚uery絳夌瓑錛岀敋鑷沖彲浠ュ幓浜嗚В涓浜涘叧浜庢悳绱㈠紩鎿庣殑鎶鏈?鍒囪瘝銆佺儲(chǔ)寮曟帓搴?etc)絳夌瓑銆?br />



]]>
Lucene In Action ch 4 絎旇(I)--Analysishttp://www.tkk7.com/baoyaer/articles/99658.html澶х敯鏂?/dc:creator>澶х敯鏂?/author>Tue, 13 Feb 2007 03:32:00 GMThttp://www.tkk7.com/baoyaer/articles/99658.htmlhttp://www.tkk7.com/baoyaer/comments/99658.htmlhttp://www.tkk7.com/baoyaer/articles/99658.html#Feedback0http://www.tkk7.com/baoyaer/comments/commentRss/99658.htmlhttp://www.tkk7.com/baoyaer/services/trackbacks/99658.html鏈珷璇︾粏鐨勮璁轟簡 Lucene鐨勫垎鏋愬鐞嗚繃紼嬪拰鍑犱釜Analyzer.

鍦╥ndexing榪囩▼涓?瑕佹妸闇瑕乮ndexing鐨則ext鍒嗘瀽澶勭悊涓涓? 緇忚繃澶勭悊鍜屽垏璇?鐒跺悗寤虹珛index. 鑰屼笉閫氱殑Analyzer鏈変笉鍚岀殑鍒嗘瀽瑙勫垯, 鍥犳鍦ㄧ▼搴忎腑浣跨敤Lucene鏃?閫夋嫨姝g‘鐨凙nalyzer鏄緢閲嶈鐨?

1.Using Analyzers

鍦ㄤ嬌鐢ˋnalyzer浠ュ墠 鍏堟潵鐪嬬湅text緇忚繃Analyzer鍒嗘瀽鍚庣殑鏁堟灉鍚?

Listing 4.1 Visualizing analyzer effects
Analyzing "The quick brown fox jumped over the lazy dogs"
聽 WhitespaceAnalyzer:
聽聽聽 [The] [quick] [brown] [fox] [jumped] [over] [the] [lazy] [dogs]
聽 SimpleAnalyzer:
聽聽聽 [the] [quick] [brown] [fox] [jumped] [over] [the] [lazy] [dogs]
聽 StopAnalyzer:
聽聽聽 [quick] [brown] [fox] [jumped] [over] [lazy] [dogs]
聽 StandardAnalyzer:
聽聽聽 [quick] [brown] [fox] [jumped] [over] [lazy] [dogs]

Analyzing "XY&Z Corporation - xyz@example.com"
聽 WhitespaceAnalyzer:
聽聽聽 [XY&Z] [Corporation] [-] [xyz@example.com]
聽 SimpleAnalyzer:
聽聽聽 [xy] [z] [corporation] [xyz] [example] [com]
聽 StopAnalyzer:
聽聽聽 [xy] [z] [corporation] [xyz] [example] [com]
聽 StandardAnalyzer:
聽聽聽 [xy&z] [corporation] [xyz@example.com]

涓婇潰鏄湪涓嬮潰鎴戜滑瑕佹彁鍒扮殑涓涓緥瀛愮殑榪愯緇撴灉. 鍙互鐪嬪嚭涓嶅悓鐨凙nalyzer 鏄浣曟潵鍒嗘瀽text鐨?鍦ㄥ垎鏋怲he quick brown fox jumped over the lazy dogs 鏃? WhitespaceAnalyzer鍜?SimpleAnalyzer鍙槸綆鍗曠殑鎶婅瘝鍒嗗紑,寤虹珛Term灝卞彲浠ヤ簡;鑰屽彟澶栦袱涓狝nalyzer鍒欏幓鎺変簡stop word. 鑰屽湪鍒嗘瀽XY&Z Corporation - xyz@example.com 鐨勬椂鍊?涓嶅悓鐨凙nalyzer 瀵瑰緟 & 鍜?- 鐨勬柟寮忎篃鏄笉涓鏍風(fēng)殑 . 鐜板湪瀵笰nalysis鏈変釜鎰熸х殑浜嗚В,涓嬮潰鏉ョ湅鐪嬩笉鍚屽鐞嗛樁孌電殑鍒嗘瀽榪囩▼.

I. Indexing Analysis

榪樿寰楀湪ch2 indexing 涓?璁插埌 ,鍦ㄥ緩绔媔ndex鏃?浣跨敤IndexWriter 鍦ㄦ瀯閫營ndexWriter鏃?瑕佷嬌鐢ㄥ埌Analyser.濡備笅鎵紺?

Analyzer analyzer = new StandardAnalyzer();

IndexWriter writer = new IndexWriter(directory,

analyzer, true);

鐒跺悗灝卞彲浠ヤ嬌鐢╳riter瀵?document 鏉ndexing浜?濡備笅

Document doc = new Document();

doc.add(

Field.Text("title", "This is the title"));

doc.add(

Field.UnStored("contents", "...document contents..."));

writer.addDocument(doc);

浣跨敤鐨勬槸鍦ㄦ瀯閫營ndexWriter鏃?鎸囧畾鐨凙nalyzer. 濡傛灉瑕佺粰涓涓枃妗e崟鐙寚瀹氫竴涓狝nalyzer 鍙互鐢ㄤ笅闈㈢殑涓涓柟娉?

聽writer.addDocument(doc,analyzer);

II.QueryParser Analysis

聽 Analysis 鏄痶erm鎼滅儲(chǔ)鐨勫叧閿?瑕佺‘淇濈粡榪嘇nalyzer鍒嗘瀽鍚庣殑term鍜岃绱㈠紩鐨勪竴鏍?榪欐牱鎵嶅彲浠ュ緱鍒版悳绱㈢粨鏋?鍦ㄤ嬌鐢≦ueryParser parse 鐢ㄦ埛杈撳叆鐨勬悳绱㈣〃杈懼紡鏃跺彲浠?鎸囧畾涓涓狝nalyzer 濡備笅鎵紺?

Query query = QueryParser.parse(expression, "contents",

analyzer);

閫氳繃QueryParser鐨勯潤鎬佹柟娉曞疄鐜? 濡傛灉浣跨敤QueryParser瀹炰緥, 鍒欏彲浠ュ湪鏋勯燪ueryParser鏃跺?鎻愪緵涓涓狝nalyzer 濡備笅:

QueryParser parser = new QueryParser("contents",

analyzer);

query = parser.parse(expression);

QueryParser

analyzes individual pieces of the expression, not the expression as a

whole, which may include operators, parenthesis, and other special expression

syntax to denote range, wildcard, and fuzzy searches.

QueryParser 騫崇瓑鐨勫垎鏋愭墍鏈夌殑text,濂瑰茍涓嶇煡閬撲粬浠槸濡備綍姣廼ndxed, 榪欐椂濡傛灉褰撴悳绱竴涓绱㈠紩涓篕eyword鐨刦iled鏃?灝卞彲鑳戒細(xì)閬囧埌闂.

榪樻湁涓涓棶棰樺氨鏄湪鍒嗘瀽涓浜涘寘鍚叾浠栧厓绱犵殑text鏃惰濡備綍澶勭悊 ,濡?Html xml 鏂囨。, 浠栦滑閮藉甫鏈夊厓绱犳爣絳?鑰岃繖浜涙爣絳句竴鑸槸涓嶇儲(chǔ)寮曠殑.浠ュ強(qiáng)濡備綍澶勭悊鍒嗗煙(field)绱㈠紩, 濡?Html 鏈塇eader 鍜?Body鍩?濡備綍鍒嗗紑鎼滅儲(chǔ) 榪欎釜闂Analyzer鐜板湪涔熶笉鑳借В鍐崇殑, 鍥犱負(fù)鍦ㄦ瘡嬈nalyzer閮藉鐞嗗崟涓煙. 鍦ㄥ悗闈㈡垜浠湪榪涗竴姝ヨ璁鴻闂.

2. Analyzing the Analyzer

瑕佽緇嗕簡瑙ucene鍒嗘瀽鏂囨湰鐨勮繃紼嬪氨瑕佺煡閬揂nalyzer鏄浣曞伐浣滅殑,涓嬮潰灝辨潵鐪嬬湅Analyzer鏄庝箞宸ヤ綔鐨勫惂. Analyzer鏄悇涓猉XXAnalyzer鐨勫熀綾?,璇ョ被鍑哄鐨勭畝鍗?姣旀垜鎯寵薄鐨勮綆鍗曞浜? 鍙涓涓柟娉?tokenStream(String fieldName, Reader reader); fieldName 鍙傛暟瀵規(guī)湁浜汚nalyzer瀹炵幇鏄病鏈変綔鐢ㄧ殑,濡係impleAnalyzer, 璇ョ被鐨勪唬鐮佸涓?

public final class SimpleAnalyzer extends Analyzer {

聽 public TokenStream tokenStream(String fieldName, Reader reader) {

聽聽聽 return new LowerCaseTokenizer(reader);

聽 }

}

鍙互鐪嬪埌璇ョ被涔熸槸鍑哄鐨勭畝鍗? 鍙敤鍒頒簡LowerCaseTokenizer; 浣哃owerCaseTokenizer鏄共浠涔堢殑鍛? 鐪嬬湅鍚嶅瓧灝卞彲浠ョ寽涓樊涓嶅鍟?,

璇ョ被鎶奣ext 涓潪瀛楁瘝(nonletters)鐨勫瓧絎﹀幓鎺?騫舵妸鎵鏈塗ext杞崲涓哄皬鍐?

鑰岃繑鍥炵殑

TokenStream 鏄竴涓?enumerator-like class ,閫氳繃濂瑰彲浠ュ緱鍒拌繛緇殑 Tokens,褰撳埌杈炬湯灝炬椂鍊欒繑鍥瀗ull.



]]>
Lucene In Action ch 3 絎旇--Add searchhttp://www.tkk7.com/baoyaer/articles/99657.html澶х敯鏂?/dc:creator>澶х敯鏂?/author>Tue, 13 Feb 2007 03:31:00 GMThttp://www.tkk7.com/baoyaer/articles/99657.htmlhttp://www.tkk7.com/baoyaer/comments/99657.htmlhttp://www.tkk7.com/baoyaer/articles/99657.html#Feedback0http://www.tkk7.com/baoyaer/comments/commentRss/99657.htmlhttp://www.tkk7.com/baoyaer/services/trackbacks/99657.html浠婂ぉ鐪嬬湅 ch3, Add search to your Application. 鐪熸寮濮嬩嬌鐢?Lucene search 鏉ユ悳绱綘鐨勭洰鏍囦簡.

1. 瀹炵幇涓涓畝鍗曠殑search feature

聽聽 鍦ㄦ湰绔犱腑鍙檺浜庤璁虹畝鍗昄ucene 鎼滅儲(chǔ)API, 鏈変笅闈㈠嚑涓浉鍏崇殑綾?

聽Lucene 鍩烘湰鎼滅儲(chǔ)API:

綾?/p>

鍔熻兘

IndexSearcher鎼滅儲(chǔ)涓涓猧ndex鐨勫叆鍙?鎵鏈夌殑searches閮芥槸閫氳繃IndexSearcher 瀹炰緥鐨勫嚑涓噸杞界殑鏂規(guī)硶瀹炵幇鐨?
Query (and subclasses)鍚勪釜瀛愮被灝佽浜嗙壒瀹氭悳绱㈢被鍨嬬殑閫昏緫(logic),Query瀹炰緥浼犻掔粰IndexSearcher鐨剆earch鏂規(guī)硶.
QueryParser 澶勭悊涓涓彲璇葷殑琛ㄨ揪寮?杞崲涓轟竴涓叿浣撶殑Query瀹炰緥.
Hits 鍖呭惈浜嗘悳绱㈢殑緇撴灉.鏈塈ndexSearcher鐨剆earch鍑芥暟榪斿洖.

涓嬮潰鎴戜滑鏉ョ湅鍑犱釜涔︿腑鐨勪緥瀛?

LiaTestCase.java聽 涓涓戶鎵胯嚜TestCase 騫朵笖鎵╁睍浜員estCase鐨勭被, 涓嬮潰鐨勫嚑涓緥瀛愰兘緇ф壙鑷綾?

01package聽lia.common;
02
03import聽junit.framework.TestCase;
04import聽org.apache.lucene.store.FSDirectory;
05import聽org.apache.lucene.store.Directory;
06import聽org.apache.lucene.search.Hits;
07import聽org.apache.lucene.document.Document;
08
09import聽java.io.IOException;
10import聽java.util.Date;
11import聽java.text.ParseException;
12import聽java.text.SimpleDateFormat;
13
14/**
15*聽LIA聽base聽class聽for聽test聽cases.
16*/
17public聽abstract聽class聽LiaTestCase聽extends聽TestCase聽{
18聽聽private聽String聽indexDir聽=聽System.getProperty("index.dir");聽 // 嫻嬭瘯 index 宸茬粡寤虹珛濂戒簡
19聽聽protected聽Directory聽directory;
20
21聽聽protected聽void聽setUp()聽throws聽Exception聽{
22聽聽聽聽directory聽=聽FSDirectory.getDirectory(indexDir,聽false);
23聽聽}
24
25聽聽protected聽void聽tearDown()聽throws聽Exception聽{
26聽聽聽聽directory.close();
27聽聽}
28
29聽聽/**
30聽聽聽*聽For聽troubleshooting 涓轟簡 瑙e喅闂鐨勬柟娉?/font>
31聽聽聽*/
32聽聽protected聽final聽void聽dumpHits(Hits聽hits)聽throws聽IOException聽{
33聽聽聽聽if聽(hits.length()聽==聽0)聽{
34聽聽聽聽聽聽System.out.println("No聽hits");
35聽聽聽聽}
36
37聽聽聽聽for聽(int聽i=0;聽i聽<聽hits.length();聽i++)聽{
38聽聽聽聽聽聽Document聽doc聽=聽hits.doc(i);
39聽聽聽聽聽聽System.out.println(hits.score(i)聽+聽":"聽+聽doc.get("title"));
40聽聽聽聽}
41聽聽}
42
43聽聽protected聽final聽void聽assertHitsIncludeTitle(
44聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽Hits聽hits,聽String聽title)
45聽聽聽聽throws聽IOException聽{
46聽聽聽聽for聽(int聽i=0;聽i聽<聽hits.length();聽i++)聽{
47聽聽聽聽聽聽Document聽doc聽=聽hits.doc(i);
48聽聽聽聽聽聽if聽(title.equals(doc.get("title")))聽{
49聽聽聽聽聽聽聽聽assertTrue(true);
50聽聽聽聽聽聽聽聽return;
51聽聽聽聽聽聽}
52聽聽聽聽}
53
54聽聽聽聽fail("title聽'"聽+聽title聽+聽"'聽not聽found");
55聽聽}
56
57聽聽protected聽final聽Date聽parseDate(String聽s)聽throws聽ParseException聽{
58聽聽聽聽聽聽return聽new聽SimpleDateFormat("yyyy-MM-dd").parse(s);
59聽聽}
60}

聽 I.鎼滅儲(chǔ)涓涓壒瀹氱殑Term 鍜屽埄鐢≦ueryParser 瑙f瀽鐢ㄦ埛杈撳叆鐨勮〃杈懼紡

聽 瑕佸埄鐢ㄤ竴涓壒瀹氱殑term鎼滅儲(chǔ),浣跨敤QueryTerm灝卞彲浠ヤ簡,鍗曚釜term 灝ゅ叾閫傚悎Keyword鎼滅儲(chǔ). 瑙f瀽鐢ㄦ埛杈撳叆鐨勮〃杈懼紡鍙互鏇撮傚悎鐢ㄦ埛鐨勪嬌鐢ㄦ柟寮?鎼滅儲(chǔ)琛ㄨ揪寮忕殑瑙f瀽鏈塓ueryParser鏉ュ畬鎴?濡傛灉琛ㄨ揪寮忚В鏋愰敊璇?浼?xì)鏈夊紓甯告姏鍑? 鍙互鍙栧緱鐩鎬俊鐨勯敊璇俊鎭?浠ヤ究緇欑敤鎴烽傚綋鐨勬彁紺?鍦ㄨВ鏋愯〃杈懼紡鏃?榪橀渶瑕佷竴涓狝nalyzer 鏉ュ垎鏋愮敤鎴風(fēng)殑杈撳叆, 騫舵牴鎹笉鍚岀殑Analyzer鏉ョ敓浜х浉搴旂殑Term鐒跺悗鏋勬垚Query瀹炰緥.

涓嬮潰鐪嬩釜渚嬪瓙鍚?BasicSearchingTest.java

01package聽lia.searching;
02
03import聽lia.common.LiaTestCase;
04import聽org.apache.lucene.analysis.SimpleAnalyzer;
05import聽org.apache.lucene.document.Document;
06import聽org.apache.lucene.index.Term;
07import聽org.apache.lucene.queryParser.QueryParser;
08import聽org.apache.lucene.search.Hits;
09import聽org.apache.lucene.search.IndexSearcher;
10import聽org.apache.lucene.search.Query;
11import聽org.apache.lucene.search.TermQuery;
12
13public聽class聽BasicSearchingTest聽extends聽LiaTestCase聽{
14
15聽聽public聽void聽testTerm()聽throws聽Exception聽{
16聽聽聽聽IndexSearcher聽searcher聽=聽new聽IndexSearcher(directory);
17聽聽聽聽Term聽t聽=聽new聽Term("subject",聽"ant");聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 // 鏋勯犱竴涓猅erm
18聽聽聽聽Query聽query聽=聽new聽TermQuery(t);
19聽聽聽聽Hits聽hits聽=聽searcher.search(query);聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 // 鎼滅儲(chǔ)
20聽聽聽聽assertEquals("JDwA",聽1,聽hits.length());聽聽聽聽聽聽聽聽聽聽聽聽 //嫻嬭瘯緇撴灉
21
22聽聽聽聽t聽=聽new聽Term("subject",聽"junit");
23聽聽聽聽hits聽=聽searcher.search(new聽TermQuery(t));聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽
24聽聽聽聽assertEquals(2,聽hits.length());
25
26聽聽聽聽searcher.close();
27聽聽}
28
29聽聽public聽void聽testKeyword()聽throws聽Exception聽{聽 // 嫻嬭瘯鍏抽敭瀛楁悳绱?/font>
30聽聽聽聽IndexSearcher聽searcher聽=聽new聽IndexSearcher(directory);
31聽聽聽聽Term聽t聽=聽new聽Term("isbn",聽"1930110995");聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 // 鍏抽敭瀛?term
32聽聽聽聽Query聽query聽=聽new聽TermQuery(t);
33聽聽聽聽Hits聽hits聽=聽searcher.search(query);
34聽聽聽聽assertEquals("JUnit聽in聽Action",聽1,聽hits.length());
35聽聽}
36
37聽聽public聽void聽testQueryParser()聽throws聽Exception聽{聽 // 嫻嬭瘯 QueryParser.
38聽聽聽聽IndexSearcher聽searcher聽=聽new聽IndexSearcher(directory);
39
40聽聽聽聽Query聽query聽=聽QueryParser.parse("+JUNIT聽+ANT聽-MOCK",
41聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽"contents",
42聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽new聽SimpleAnalyzer());聽 // 閫氳繃瑙f瀽鎼滅儲(chǔ)琛ㄨ揪寮?榪斿洖涓涓猀uery瀹炰緥
43聽聽聽聽Hits聽hits聽=聽searcher.search(query);
44聽聽聽聽assertEquals(1,聽hits.length());
45聽聽聽聽Document聽d聽=聽hits.doc(0);
46聽聽聽聽assertEquals("Java聽Development聽with聽Ant",聽d.get("title"));
47
48聽聽聽聽query聽=聽QueryParser.parse("mock聽OR聽junit",
49聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽"contents",
50聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽new聽SimpleAnalyzer());聽聽聽聽聽聽聽聽聽聽聽聽聽
// 閫氳繃瑙f瀽鎼滅儲(chǔ)琛ㄨ揪寮?榪斿洖涓涓猀uery瀹炰緥
51聽聽聽聽hits聽=聽searcher.search(query);
52聽聽聽聽assertEquals("JDwA聽and聽JIA",聽2,聽hits.length());
53聽聽}
54}



]]>
Lucene In Action ch 2 絎旇--indexing璇﹁В http://www.tkk7.com/baoyaer/articles/99655.html澶х敯鏂?/dc:creator>澶х敯鏂?/author>Tue, 13 Feb 2007 03:29:00 GMThttp://www.tkk7.com/baoyaer/articles/99655.htmlhttp://www.tkk7.com/baoyaer/comments/99655.htmlhttp://www.tkk7.com/baoyaer/articles/99655.html#Feedback0http://www.tkk7.com/baoyaer/comments/commentRss/99655.htmlhttp://www.tkk7.com/baoyaer/services/trackbacks/99655.htmlLucene In Action ch2 緋葷粺鐨勮瑙d簡 indexing,涓嬮潰灝辨潵鐪嬬湅鍚?

1,indexing鐨勫鐞嗚繃紼?

聽 棣栧厛瑕佹妸indexing鐨勬暟鎹漿鎹負(fù)text,鍥犱負(fù)Lucene鍙兘绱㈠紩text,鐒跺悗鐢盇nalysis鏉ヨ繃铏憈ext,鎶婁竴浜沜h1涓彁鍒扮殑鎵璋撶殑stop words 榪囨護(hù)鎺? 鐒跺悗寤虹珛index.寤虹珛鐨刬ndex涓?font face="NewBaskervilleITCbyBT-Italic" size="3">inverted index 涔熷氨鏄墍璋撶殑鍊掓帓绱㈠紩.

2,鍩烘湰鐨刬ngex鎿嶄綔

聽聽 鍩烘湰鐨勬搷浣?鍖呮嫭 :娣誨姞 鍒犻櫎 鏇存柊.

I . 娣誨姞

涓嬮潰鎴戜滑鐪嬩釜渚嬪瓙浠g爜 BaseIndexingTestCase.class

01package聽lia.indexing;
02
03import聽org.apache.lucene.store.Directory;
04import聽org.apache.lucene.store.FSDirectory;
05import聽org.apache.lucene.document.Document;
06import聽org.apache.lucene.document.Field;
07import聽org.apache.lucene.index.IndexWriter;
08import聽org.apache.lucene.index.IndexReader;
09import聽org.apache.lucene.analysis.Analyzer;
10import聽org.apache.lucene.analysis.SimpleAnalyzer;
11
12import聽junit.framework.TestCase;
13import聽java.io.IOException;
14
15/**
16*
17*/
18public聽abstract聽class聽BaseIndexingTestCase聽extends聽TestCase聽{
19聽聽protected聽String[]聽keywords聽=聽{"1",聽"2"};
20聽聽protected聽String[]聽unindexed聽=聽{"Netherlands",聽"Italy"};
21聽聽protected聽String[]聽unstored聽=聽{"Amsterdam聽has聽lots聽of聽bridges",
22聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽"Venice聽has聽lots聽of聽canals"};
23聽聽protected聽String[]聽text聽=聽{"Amsterdam",聽"Venice"};
24聽聽protected聽Directory聽dir;
25聽聽 // setUp 鏂規(guī)硶
26聽聽protected聽void聽setUp()聽throws聽IOException聽{
27聽聽聽聽String聽indexDir聽=
28聽聽聽聽聽聽System.getProperty("java.io.tmpdir",聽"tmp")聽+
29聽聽聽聽聽聽System.getProperty("file.separator")聽+聽"index-dir";
30聽聽聽聽dir聽=聽FSDirectory.getDirectory(indexDir,聽true);
31聽聽聽聽addDocuments(dir);
32聽聽}
33
34聽聽protected聽void聽addDocuments(Directory聽dir)
35聽聽聽聽throws聽IOException聽{
36聽聽聽聽IndexWriter聽writer聽=聽new聽IndexWriter(dir,聽getAnalyzer(),
37聽聽聽聽聽聽true);聽聽聽 // 寰楀埌indexWriter 瀹炰緥
38聽聽聽聽writer.setUseCompoundFile(isCompound());
39聽聽聽聽for聽(int聽i聽=聽0;聽i聽<聽keywords.length;聽i++)聽{
40聽聽聽聽聽聽Document聽doc聽=聽new聽Document();聽聽聽聽聽聽聽 // 娣誨姞鏂囨。
41聽聽聽聽聽聽doc.add(Field.Keyword("id",聽keywords[i]));
42聽聽聽聽聽聽doc.add(Field.UnIndexed("country",聽unindexed[i]));
43聽聽聽聽聽聽doc.add(Field.UnStored("contents",聽unstored[i]));
44聽聽聽聽聽聽doc.add(Field.Text("city",聽text[i]));
45聽聽聽聽聽聽writer.addDocument(doc);
46聽聽聽聽}
47聽聽聽聽writer.optimize();聽聽 // 浼樺寲index
48聽聽聽聽writer.close();
49聽聽}
50聽聽 // 鍙互瑕嗙洊璇ユ柟娉曟彁渚涗笉鍚岀殑Analyzer
51聽聽protected聽Analyzer聽getAnalyzer()聽{
52聽聽聽聽return聽new聽SimpleAnalyzer();
53聽聽}
54聽聽 // 涔熷彲浠ヨ鐩栬鏂規(guī)硶 鎸囧嚭Compound灞炴?鏄惁鏄?
Heterogeneous Documents
55聽聽protected聽boolean聽isCompound()聽{
56聽聽聽聽return聽true;
57聽聽}
58聽聽 // 嫻嬭瘯娣誨姞鏂囨。
59聽聽public聽void聽testIndexWriter()聽throws聽IOException聽{
60聽聽聽聽IndexWriter聽writer聽=聽new聽IndexWriter(dir,聽getAnalyzer(),
61聽聽聽聽聽聽false);
62聽聽聽聽assertEquals(keywords.length,聽writer.docCount());
63聽聽聽聽writer.close();
64聽聽}
65聽聽 // 嫻嬭瘯IndexReader
66聽聽public聽void聽testIndexReader()聽throws聽IOException聽{
67聽聽聽聽IndexReader聽reader聽=聽IndexReader.open(dir);
68聽聽聽聽assertEquals(keywords.length,聽reader.maxDoc());
69聽聽聽聽assertEquals(keywords.length,聽reader.numDocs());
70聽聽聽聽reader.close();
71聽聽}
72}

榪欐槸涓涓祴璇曡秴綾?鍙互琚叾浠栫殑嫻嬭瘯鐢ㄤ緥緇ф壙 鏉ユ祴璇曚笉鍚岀殑鍔熻兘.涓婇潰甯︽湁璇︾粏鐨勬敞閲?

鍦ㄦ坊鍔燜ield鏃? 浼?xì)閬囧埌鍚屼箟璇嶇殑鎯呭?娣誨姞鍚屼箟璇嶇敱涓ょ鏂瑰紡:

聽a.鍒涘緩涓涓悓涔夎瘝璇嶇粍,寰幆娣誨姞鍒癝ingle Strng鐨勪笉鍚孎ield涓?

聽b.鎶婂悓涔夎瘝娣誨姞鍒頒竴涓狟ase word鐨刦ield涓?濡備笅:

String baseWord = "fast";

String synonyms[] = String {"quick", "rapid", "speedy"};

Document doc = new Document();

doc.add(Field.Text("word", baseWord));

for (int i = 0; i < synonyms.length; i++) {

doc.add(Field.Text("word", synonyms[i]));

}

榪欐牱 鍦?/font>Lucene鍐呴儴鎶婃瘡涓瘝閮芥坊鍔犵殑涓涓悕涓簑ord鐨凢ield涓?鍦ㄦ悳绱㈡椂 浣犲彲浠ヤ嬌鐢ㄤ換浣曚竴涓粰瀹氱殑璇嶈.



澶х敯鏂?/a> 2007-02-13 11:29 鍙戣〃璇勮
]]>Lucene In Action ch 1 絎旇 -- 鍩烘湰姒傚康http://www.tkk7.com/baoyaer/articles/99653.html澶х敯鏂?/dc:creator>澶х敯鏂?/author>Tue, 13 Feb 2007 03:28:00 GMThttp://www.tkk7.com/baoyaer/articles/99653.htmlhttp://www.tkk7.com/baoyaer/comments/99653.htmlhttp://www.tkk7.com/baoyaer/articles/99653.html#Feedback0http://www.tkk7.com/baoyaer/comments/commentRss/99653.htmlhttp://www.tkk7.com/baoyaer/services/trackbacks/99653.html鍦ㄧ涓绔犱腑浣滆?涓昏璁蹭簡Lucene 鏄粈涔?鑳界敤鏉ュ共浠涔? 浠ュ強(qiáng)涓涓?indexing 鍜?searching 鐨勪緥瀛? 閫氳繃渚嬪瓙璁茶В浜嗕竴鐐瑰熀鏈?鏍稿績)姒傚康.緇欒鑰呬竴涓熀鏈殑Lucene 姒傚喌. 鐒跺悗鍙堜粙緇嶄簡鐜板湪嫻佽鐨?鎼滅儲(chǔ)妗嗘灦.

鎴戜滑涓昏鏉ョ湅鐪?榪欎釜 indexing and searching 渚嬪瓙 鐒跺悗浜嗚В涓浜涘熀鏈蹇?

package聽lia.meetlucene;

import聽org.apache.lucene.index.IndexWriter;
import聽org.apache.lucene.analysis.standard.StandardAnalyzer;
import聽org.apache.lucene.document.Document;
import聽org.apache.lucene.document.Field;

import聽java.io.File;
import聽java.io.IOException;
import聽java.io.FileReader;
import聽java.util.Date;

/**
*聽This聽code聽was聽originally聽written聽for
*聽Erik's聽Lucene聽intro聽java.net聽article
*/
public聽class聽Indexer聽{

聽聽public聽static聽void聽main(String[]聽args)聽throws聽Exception聽{
聽聽聽聽if聽(args.length聽!=聽2)聽{
聽聽聽聽聽聽throw聽new聽Exception("Usage:聽java聽"聽+聽Indexer.class.getName()
聽聽聽聽聽聽聽聽+聽"聽<index聽dir>聽<data聽dir>");
聽聽聽聽}
聽聽聽聽File聽indexDir聽=聽new聽File(args[0]); // 鍦ㄨ鐩綍涓垱寤篖ucene Incex
聽聽聽聽File聽dataDir聽=聽new聽File(args[1]); // 璇ョ洰褰曚腑瀛樻斁澶囩儲(chǔ)寮曠殑鏂囦歡

聽聽聽聽long聽start聽=聽new聽Date().getTime();
聽聽聽聽int聽numIndexed聽=聽index(indexDir,聽dataDir);
聽聽聽聽long聽end聽=聽new聽Date().getTime();

聽聽聽聽System.out.println("Indexing聽"聽+聽numIndexed聽+聽"聽files聽took聽"
聽聽聽聽聽聽+聽(end聽-聽start)聽+聽"聽milliseconds");
聽聽}

聽聽public聽static聽int聽index(File聽indexDir,聽File聽dataDir)
聽聽聽聽throws聽IOException聽{

聽聽聽聽if聽(!dataDir.exists()聽||聽!dataDir.isDirectory())聽{
聽聽聽聽聽聽throw聽new聽IOException(dataDir
聽聽聽聽聽聽聽聽+聽"聽does聽not聽exist聽or聽is聽not聽a聽directory");
聽聽聽聽}

聽聽聽聽IndexWriter聽writer聽=聽new聽IndexWriter(indexDir,
聽聽聽聽聽聽new聽StandardAnalyzer(),聽true);聽聽聽聽聽聽聽聽聽聽聽聽聽聽 //(1)鍒涘緩 Lucene Index
聽聽聽聽writer.setUseCompoundFile(false);

聽聽聽聽indexDirectory(writer,聽dataDir);

聽聽聽聽int聽numIndexed聽=聽writer.docCount();
聽聽聽聽writer.optimize();
聽聽聽聽writer.close();聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 // close index
聽聽聽聽return聽numIndexed;
聽聽}

聽聽private聽static聽void聽indexDirectory(IndexWriter聽writer,聽File聽dir)
聽聽聽聽throws聽IOException聽{

聽聽聽聽File[]聽files聽=聽dir.listFiles();

聽聽聽聽for聽(int聽i聽=聽0;聽i聽<聽files.length;聽i++)聽{
聽聽聽聽聽聽File聽f聽=聽files[i];
聽聽聽聽聽聽if聽(f.isDirectory())聽{
聽聽聽聽聽聽聽聽indexDirectory(writer,聽f);聽聽//(2)聽recurse
聽聽聽聽聽聽}聽else聽if聽(f.getName().endsWith(".txt"))聽{
聽聽聽聽聽聽聽聽indexFile(writer,聽f);
聽聽聽聽聽聽}
聽聽聽聽}
聽聽}

聽聽private聽static聽void聽indexFile(IndexWriter聽writer,聽File聽f)
聽聽聽聽throws聽IOException聽{

聽聽聽聽if聽(f.isHidden()聽||聽!f.exists()聽||聽!f.canRead())聽{
聽聽聽聽聽聽return;
聽聽聽聽}

聽聽聽聽System.out.println("Indexing聽"聽+聽f.getCanonicalPath());

聽聽聽聽Document聽doc聽=聽new聽Document();
聽聽聽聽doc.add(Field.Text("contents",聽new聽FileReader(f)));聽 // (3) index file content
聽聽聽聽doc.add(Field.Keyword("filename",聽f.getCanonicalPath())); // (4) index file name
聽聽聽聽writer.addDocument(doc);聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽聽 //(5) add document in Lucene index
聽聽}
}

涓婇潰鐨処ndexer 浣跨敤浜嗗嚑琛?Lucene鐨凙PI, 鏉ndexing 涓涓洰褰曚笅闈㈢殑鏂囦歡. 榪愯鏃跺?闇瑕佷袱涓弬鏁?, 涓涓繚瀛榠ndex鐨勭洰褰曞拰瑕佺儲(chǔ)寮曠殑鏂囦歡鐩綍.

鍦ㄤ笂闈㈢殑綾諱腑,闇瑕佷笅闈㈢殑涓浜汱ucene classes 鏉ユ墽琛?indexing 澶勭悊:

鈻?

IndexWriter

鈻?

Directory

鈻?

Analyzer

鈻?

Document

鈻?

Field

IndexWriter 鏄痠ndexing 澶勭悊鏃剁敤鍒扮殑涓績緇勪歡,璇ョ被create 鏂癷ndex 騫朵笖娣誨姞documents 鍒板凡緇忓瓨鍦ㄧ殑index, BTW,鍦↙ucene涓繕鏈夊埆鐨勬柟娉曟潵鏇存柊index.

Directory: 鐢ㄦ潵瀛樻斁index鏂囦歡鐨勬枃浠剁洰褰?璇ョ被鏄釜鎶借薄綾?鐢ㄥ嚑涓瓙綾誨彲浠ヤ嬌鐢?涓婇潰浣跨敤浜咶ile鏉ヤ唬琛ㄦ枃浠惰礬寰?鍦↙ucene涓敤涓や釜涓昏鐨凞irectory瀛愮被,涓涓狥SDirectory,涓涓?RAMDirectory,鍓嶈呮槸鎶奿ndex淇濆瓨鍒扮‖鐩樹腑鐨?鍚庤呮槸淇濆瓨鍦ㄥ唴瀛樹腑鐨?鍦ㄥ唴瀛樹腑澶勭悊鏁板害褰撶劧灝辯浉搴旂殑蹇竴浜?浜嗕絾鍙傚悎浜庡皬鏂囦歡.

Analyzer: 鍦ㄦ枃浠跺绱㈠紩浠ュ墠瑕佸厛閫氳繃Analyzer鍒嗘瀽,鍘繪帀涓浜涘search鏃犵敤鐨勮瘝璇?濡傝嫳璇腑 鐨勫皬璇?in at a 絳夌瓑,鍦↙ucene涓縐頒負(fù)stop words 鐨勮瘝),榪樺彲浠ュ鐞嗗ぇ灝忓啓鐨勯棶棰?鏄ぇ灝忓啓鐩稿叧鍟?榪樻槸涓嶇浉鍏?,浣跨敤Lucene鏃跺?閫夋嫨Analyzer鏄叧閿?

Document: 浠h〃涓浜汧ields鐨勯泦鍚?鍙互鎯寵薄涓轟竴浜涙暟鎹殑闆嗗悎.

Field: 鍦╥ndex涓殑姣忎竴涓狣ocument涓兘鍖呭惈涓浜?鍛藉悕鐨凢ields 鐢‵ield鏉ユ瀯閫? 姣忎竴涓猣ield閮芥槸鐨勬悳绱㈡槸絎﹀悎瑕佹眰鍜屼笉絎﹀悎瑕佹眰鐨刬ndex涓殑涓浜涙暟鎹?Lucene鎻愪緵浜嗗洓縐嶄笉鍚岀殑Field,

1,Keyword聽 涓嶅垎鏋?鍙儲(chǔ)寮曞拰淇濆瓨,璞′竴浜涚壒孌婁俊鎭?涓嶅彲浠ュ垎鍓茬殑 濡?鐢?shù)璇濆忥L(fēng)爜 緗戠珯 Email 絳?

2,UnIndexed 鏃笉绱㈠紩涔熶笉鍒嗘瀽,鍙槸鎶婂間繚瀛樺湪index涓?璇ョ被鍨嬮傚悎鐢ㄦ潵鏄劇ず鎼滅儲(chǔ)緇撴灉鐨刦ield,浣嗘槸浣犱粠鏉ヤ笉鎼滅儲(chǔ)璇ユ樉紺虹殑鏁版嵁,濡俇RL

3,UnStored UnIndexed鐨勫绔嬮潰, 鍒嗘瀽鍜岀儲(chǔ)寮曚絾鏄笉淇濆瓨鍦╥ndex涓?閫傚悎澶у瀷鏁版嵁 鍙悳绱絾鏄笉鏄劇ず鍘熷鏁版嵁.

4,Test 鍒嗘瀽涓旂儲(chǔ)寮?濡傛灉绱㈠紩鏁版嵁鏄疭tring鍒欎篃淇濆瓨鍦╥ndex涓? 濡傛灉鏄疪eader鍒欎笉淇濆瓨.



]]>
主站蜘蛛池模板: 亚洲乱码在线观看| 大香人蕉免费视频75| 成人精品综合免费视频| 亚洲国产精品成人精品软件| 国产成人综合亚洲AV第一页 | 国产最新凸凹视频免费| 免费看污成人午夜网站| 一级毛片免费播放| 成人免费一区二区三区| 一区二区免费在线观看| 亚洲丁香婷婷综合久久| 91在线亚洲综合在线| 亚洲av无码专区在线| 亚洲美女视频免费| 久久精品a亚洲国产v高清不卡| 亚洲人成图片小说网站| 亚洲人成国产精品无码| 亚洲国产成人久久综合一区77| 日本免费网站在线观看| 午夜影视在线免费观看| 四虎影视www四虎免费| 成年性生交大片免费看| 97无码免费人妻超级碰碰碰碰| 在线看片韩国免费人成视频| 1000部夫妻午夜免费| 在线人成精品免费视频| 2021在线永久免费视频| 亚洲黄色免费在线观看| 在线a免费观看最新网站| 四虎在线视频免费观看视频| 国产妇乱子伦视频免费| 18禁网站免费无遮挡无码中文| 国产高清免费视频| 亚洲第一成年免费网站| 搡女人免费视频大全| 日韩免费在线观看| www亚洲精品少妇裸乳一区二区| 亚洲精品成人在线| 亚洲色精品88色婷婷七月丁香| 亚洲国产成人片在线观看无码 | 又大又硬又粗又黄的视频免费看|