protected Query getFieldQuery(String field, String queryText) throws ParseException { //需要用analyzer對文本進行分詞 TokenStream source; try { source = analyzer.reusableTokenStream(field, new StringReader(queryText)); source.reset(); } catch (IOException e) { source = analyzer.tokenStream(field, new StringReader(queryText)); } CachingTokenFilter buffer = new CachingTokenFilter(source); TermAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; boolean success = false; try { buffer.reset(); success = true; } catch (IOException e) { } //得到TermAttribute和PositionIncrementAttribute,此兩項將決定到底產生什么樣的Query對象 if (success) { if (buffer.hasAttribute(TermAttribute.class)) { termAtt = buffer.getAttribute(TermAttribute.class); } if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } } int positionCount = 0; boolean severalTokensAtSamePosition = false; boolean hasMoreTokens = false; if (termAtt != null) { try { //遍歷分詞后的所有Token,統計Tokens的個數numTokens,以及positionIncrement的總數,即positionCount。 //當有一次positionIncrement為0的時候,severalTokensAtSamePosition設為true,表示有多個Token處在同一個位置。 hasMoreTokens = buffer.incrementToken(); while (hasMoreTokens) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer.incrementToken(); } } catch (IOException e) { } } try { //重設buffer,以便生成phrase查詢的時候,term和position可以重新遍歷。 buffer.reset(); source.close(); } catch (IOException e) { } if (numTokens == 0) return null; else if (numTokens == 1) { //如果分詞后只有一個Token,則生成TermQuery String term = null; try { boolean hasNext = buffer.incrementToken(); term = termAtt.term(); } catch (IOException e) { } return newTermQuery(new Term(field, term)); } else { //如果分詞后不只有一個Token if (severalTokensAtSamePosition) { //如果有多個Token處于同一個位置 if (positionCount == 1) { //并且處于同一位置的Token還全部處于第一個位置,則生成BooleanQuery,處于同一位置的Token之間是OR的關系 BooleanQuery q = newBooleanQuery(true); for (int i = 0; i < numTokens; i++) { String term = null; try { boolean hasNext = buffer.incrementToken(); term = termAtt.term(); } catch (IOException e) { } Query currentQuery = newTermQuery(new Term(field, term)); q.add(currentQuery, BooleanClause.Occur.SHOULD); } return q; } else { //如果有多個Token處于同一位置,但不是第一個位置,則生成MultiPhraseQuery。 //所謂MultiPhraseQuery即其可以包含多個phrase,其又一個ArrayList<Term[]> termArrays,每一項都是一個Term的數組,屬于同一個數組的Term表示在同一個位置。它有函數void add(Term[] terms)一次添加一個數組的Term。比如我們要搜索"microsoft app*",其表示多個phrase,"microsoft apple","microsoft application"都算。此時用QueryParser.parse("\"microsoft app*\"")從而生成PhraseQuery是搜不出microsoft apple和microsoft application的,也不能搜出microsoft app,因為*一旦被引號所引,就不算通配符了。所以必須生成MultiPhraseQuery,首先用add(new Term[]{new Term("field", "microsoft")})將microsoft作為一個Term數組添加進去,然后用add(new Term[]{new Term("field", "app"), new Term("field", "apple"), new Term("field", "application")})作為一個Term數組添加進去(算作同一個位置的),則三者都能搜的出來。 MultiPhraseQuery mpq = newMultiPhraseQuery(); mpq.setSlop(phraseSlop); List<Term> multiTerms = new ArrayList<Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.term(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { } if (positionIncrement > 0 && multiTerms.size() > 0) { //如果positionIncrement大于零,說明此Term和前一個Term已經不是同一個位置 了,所以原來收集在multiTerms中的Term都算作同一個位置,添加到MultiPhraseQuery中作為一項。并清除 multiTerms,以便重新收集相同位置的Term。 if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[0]),position); } else { mpq.add(multiTerms.toArray(new Term[0])); } multiTerms.clear(); } //將此Term收集到multiTerms中。 position += positionIncrement; multiTerms.add(new Term(field, term)); } //當遍歷完所有的Token,同處于最后一個位置的Term已經收集到multiTerms中了,把他們加到MultiPhraseQuery中作為一項。 if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[0]),position); } else { mpq.add(multiTerms.toArray(new Term[0])); } return mpq; } } else { //如果不存在多個Token處于同一個位置的情況,則直接生成PhraseQuery PhraseQuery pq = newPhraseQuery(); pq.setSlop(phraseSlop); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.term(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { } if (enablePositionIncrements) { position += positionIncrement; pq.add(new Term(field, term),position); } else { pq.add(new Term(field, term)); } } return pq; } } } |