private void initBiggerDiagonal(IndexReader reader) throws IOException { logger.info("Initializing Spatial Indexes for Queries Strategies"); if (biggerDiagonal == null) { biggerDiagonal = (Double) IndexReaderPersistentCache.get(reader, biggerDiagonalCacheKey); twiceBiggerDiagonal = (Double) IndexReaderPersistentCache.get(reader, twiceBiggerDiagonalCacheKey); if (biggerDiagonal == null || twiceBiggerDiagonal == null) { biggerDiagonal = 0.0; Term last = null; TermEnum termEnum = reader.terms(new Term(Globals.LUCENE_DIAGONAL_INDEX, "")); if (termEnum.term() != null && termEnum.term().field().equals(Globals.LUCENE_DIAGONAL_INDEX)) last = termEnum.term(); if (termEnum.term() != null) while (termEnum.next()) if (termEnum.term().field().equals(Globals.LUCENE_DIAGONAL_INDEX)) last = termEnum.term(); if (last != null) { biggerDiagonal = NumberUtils.SortableStr2double(last.text()); logger.info("Found bigger spatial width:" + biggerDiagonal); } twiceBiggerDiagonal = 2 * biggerDiagonal; halfBiggerDiagonal = biggerDiagonal / ((double) 2); logger.info("defining twice bigger spatial width:" + twiceBiggerDiagonal); termEnum.close(); IndexReaderPersistentCache.put(biggerDiagonalCacheKey, biggerDiagonal, reader); IndexReaderPersistentCache.put(twiceBiggerDiagonalCacheKey, twiceBiggerDiagonal, reader); } } if (biggerInternalCircleRadium == null) { biggerInternalCircleRadium = (Double) IndexReaderPersistentCache.get(reader, biggerRadiumCacheKey); if (biggerInternalCircleRadium == null) { biggerInternalCircleRadium = 0.0; Term last = null; TermEnum termEnum = reader.terms(new Term(Globals.LUCENE_RADIUM_INDEX, "")); if (termEnum.term() != null && termEnum.term().field().equals(Globals.LUCENE_RADIUM_INDEX)) last = termEnum.term(); if (termEnum.term() != null) while (termEnum.next()) if (termEnum.term().field().equals(Globals.LUCENE_RADIUM_INDEX)) last = termEnum.term(); if (last != null) { biggerInternalCircleRadium = NumberUtils.SortableStr2double(last.text()); logger.info("Found bigger spatial width:" + biggerInternalCircleRadium); } termEnum.close(); IndexReaderPersistentCache.put(biggerRadiumCacheKey, biggerInternalCircleRadium, reader); } } }
@Override public void writeTo(StreamOutput out) throws IOException { out.writeVInt(1); // version out.writeUTF(uid.field()); out.writeUTF(uid.text()); out.writeLong(version); }
@Override public void visitMatchingTerms(IndexReader reader, String fieldName, MatchingTermVisitor mtv) throws IOException { boolean expanded = false; int prefixLength = prefix.length(); TermEnum enumerator = reader.terms(new Term(fieldName, prefix)); Matcher matcher = pattern.matcher(""); try { do { Term term = enumerator.term(); if (term != null) { String text = term.text(); if ((!text.startsWith(prefix)) || (!term.field().equals(fieldName))) { break; } else { matcher.reset(text.substring(prefixLength)); if (matcher.matches()) { mtv.visitMatchingTerm(term); expanded = true; } } } } while (enumerator.next()); } finally { enumerator.close(); matcher.reset(); } if (!expanded) { System.out.println("No terms in " + fieldName + " field for: " + toString()); } }
public void testSimpleSkip() throws IOException { Directory dir = new CountingRAMDirectory(new RAMDirectory()); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()) .setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())) .setMergePolicy(newLogMergePolicy())); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.add(newTextField(term.field(), term.text(), Field.Store.NO)); writer.addDocument(d1); } writer.commit(); writer.forceMerge(1); writer.close(); AtomicReader reader = getOnlySegmentReader(DirectoryReader.open(dir)); for (int i = 0; i < 2; i++) { counter = 0; DocsAndPositionsEnum tp = reader.termPositionsEnum(term); checkSkipTo(tp, 14, 185); // no skips checkSkipTo(tp, 17, 190); // one skip on level 0 checkSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream checkSkipTo(tp, 4800, 250); // one skip on level 2 } }
public float queryScore(float idf) { return (float) Math.log(1 + term.text().length()) * dictidf * dictidf * (fromfreq + boost(fromfield)) * idf; }
/** * This is best effort only: the PhraseQuery may contain multiple terms at the same position * (think synonyms) or gaps (think stopwords) and it's in this case impossible to translate it * into a correct ElasticsearchQuery. */ private static JsonObject convertPhraseQuery(PhraseQuery query) { Term[] terms = query.getTerms(); if (terms.length == 0) { throw LOG.cannotQueryOnEmptyPhraseQuery(); } String field = terms[0].field(); // phrase queries are only supporting one field StringBuilder phrase = new StringBuilder(); for (Term term : terms) { phrase.append(" ").append(term.text()); } JsonObject phraseQuery = JsonBuilder.object() .add( "match_phrase", JsonBuilder.object() .add( field, JsonBuilder.object() .addProperty("query", phrase.toString().trim()) .addProperty("slop", query.getSlop()) .addProperty("boost", query.getBoost()))) .build(); return wrapQueryForNestedIfRequired(field, phraseQuery); }
private static TInfo parseTerm(FunctionQParser fp) throws SyntaxError { TInfo tinfo = new TInfo(); tinfo.indexedField = tinfo.field = fp.parseArg(); tinfo.val = fp.parseArg(); tinfo.indexedBytes = new BytesRef(); FieldType ft = fp.getReq().getSchema().getFieldTypeNoEx(tinfo.field); if (ft == null) ft = new StrField(); if (ft instanceof TextField) { // need to do analysis on the term String indexedVal = tinfo.val; Query q = ft.getFieldQuery(fp, fp.getReq().getSchema().getFieldOrNull(tinfo.field), tinfo.val); if (q instanceof TermQuery) { Term term = ((TermQuery) q).getTerm(); tinfo.indexedField = term.field(); indexedVal = term.text(); } UnicodeUtil.UTF16toUTF8(indexedVal, 0, indexedVal.length(), tinfo.indexedBytes); } else { ft.readableToIndexed(tinfo.val, tinfo.indexedBytes); } return tinfo; }
/** * @param clause * @param clauseQuery * @param ands * @param ors */ private void extractTerms( BooleanClause clause, org.apache.lucene.search.Query clauseQuery, Map<String, Object> ands, Map<String, Object> ors) { Set<Term> terms = Sets.newHashSet(); clauseQuery.extractTerms(terms); for (Term term : terms) { if (clause != null && clause.getOccur() == Occur.SHOULD) { accumulateValue(ors, term.field(), term.text()); } else { accumulateValue(ands, term.field(), term.text()); } } }
@Override protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { if (maxEdits == 0 || prefixLength >= term.text().length()) { // can only match if it's exact return new SingleTermsEnum(terms.iterator(), term.bytes()); } return new FuzzyTermsEnum(terms, atts, getTerm(), maxEdits, prefixLength, transpositions); }
public boolean skipTo(Term target) throws IOException { // already here if (t != null && t.equals(target)) return true; int startIdx = tindex.index.search(target.text()); if (startIdx >= 0) { // we hit the term exactly... lucky us! if (tenum != null) tenum.close(); tenum = reader.terms(target); pos = startIdx << tindex.intervalBits; return setTerm(); } // we didn't hit the term exactly startIdx = -startIdx - 1; if (startIdx == 0) { // our target occurs *before* the first term if (tenum != null) tenum.close(); tenum = reader.terms(target); pos = 0; return setTerm(); } // back up to the start of the block startIdx--; if ((pos >> tindex.intervalBits) == startIdx && t != null && t.text().compareTo(target.text()) <= 0) { // we are already in the right block and the current term is before the term we want, // so we don't need to seek. } else { // seek to the right block if (tenum != null) tenum.close(); tenum = reader.terms(target.createTerm(tindex.index.get(startIdx))); pos = startIdx << tindex.intervalBits; setTerm(); // should be true since it's in the index } while (t != null && t.text().compareTo(target.text()) < 0) { next(); } return t != null; }
protected Query blendTermQuery(Term term, MappedFieldType fieldType) { if (fuzziness != null) { if (fieldType != null) { try { Query query = fieldType.fuzzyQuery( term.text(), fuzziness, fuzzyPrefixLength, maxExpansions, transpositions); if (query instanceof FuzzyQuery) { QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod); } return query; } catch (RuntimeException e) { return new TermQuery(term); // See long comment below about why we're lenient here. } } int edits = fuzziness.asDistance(term.text()); FuzzyQuery query = new FuzzyQuery(term, edits, fuzzyPrefixLength, maxExpansions, transpositions); QueryParsers.setRewriteMethod(query, fuzzyRewriteMethod); return query; } if (fieldType != null) { /* * Its a bit weird to default to lenient here but its the backwards * compatible. It makes some sense when you think about what we are * doing here: at this point the user has forced an analyzer and * passed some string to the match query. We cut it up using the * analyzer and then tried to cram whatever we get into the field. * lenient=true here means that we try the terms in the query and on * the off chance that they are actually valid terms then we * actually try them. lenient=false would mean that we blow up the * query if they aren't valid terms. "valid" in this context means * "parses properly to something of the type being queried." So "1" * is a valid number, etc. * * We use the text form here because we we've received the term from * an analyzer that cut some string into text. */ Query query = termQuery(fieldType, term.bytes(), true); if (query != null) { return query; } } return new TermQuery(term); }
/** * Gets the global term frequency of a term, i.e. how may times it occurs in the whole corpus * * @param term whose frequency you want * @return Global term frequency of term, or 1 if unavailable. */ private int getGlobalTermFreq(Term term) { int tf = 0; try { TermDocs tDocs = this.indexReader.termDocs(term); if (tDocs == null) { logger.info("Couldn't get term frequency for term " + term.text()); return 1; } while (tDocs.next()) { tf += tDocs.freq(); } } catch (IOException e) { logger.info("Couldn't get term frequency for term " + term.text()); return 1; } return tf; }
protected boolean setTerm() { t = tenum.term(); if (t == null || t.field() != tindex.fterm.field() // intern'd compare || (tindex.prefix != null && !t.text().startsWith(tindex.prefix, 0))) { t = null; return false; } return true; }
/* (non-Javadoc) * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput) */ public void write(DataOutput out) throws IOException { out.writeInt(deleteList.size()); for (Term term : deleteList) { Text.writeString(out, term.field()); Text.writeString(out, term.text()); } String[] files = dir.list(); RAMDirectoryUtil.writeRAMFiles(out, dir, files); }
@Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); if (!term.field().equals(field)) { buffer.append(term.field()); buffer.append(":"); } buffer.append(term.text()); buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); }
public int[] toDocsArray(Term term, Bits bits, IndexReader reader) throws IOException { Fields fields = MultiFields.getFields(reader); Terms cterms = fields.terms(term.field); TermsEnum ctermsEnum = cterms.iterator(); if (ctermsEnum.seekExact(new BytesRef(term.text()))) { PostingsEnum postingsEnum = TestUtil.docs(random(), ctermsEnum, bits, null, PostingsEnum.NONE); return toArray(postingsEnum); } return null; }
private static SimpleOrderedMap<Object> getDocumentFieldsInfo( Document doc, int docId, IndexReader reader, IndexSchema schema) throws IOException { SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>(); for (Object o : doc.getFields()) { Fieldable fieldable = (Fieldable) o; SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>(); SchemaField sfield = schema.getFieldOrNull(fieldable.name()); FieldType ftype = (sfield == null) ? null : sfield.getType(); f.add("type", (ftype == null) ? null : ftype.getTypeName()); f.add("schema", getFieldFlags(sfield)); f.add("flags", getFieldFlags(fieldable)); Term t = new Term( fieldable.name(), ftype != null ? ftype.storedToIndexed(fieldable) : fieldable.stringValue()); f.add("value", (ftype == null) ? null : ftype.toExternal(fieldable)); // TODO: this really should be "stored" f.add("internal", fieldable.stringValue()); // may be a binary number byte[] arr = fieldable.getBinaryValue(); if (arr != null) { f.add("binary", Base64.byteArrayToBase64(arr, 0, arr.length)); } f.add("boost", fieldable.getBoost()); f.add( "docFreq", t.text() == null ? 0 : reader.docFreq(t)); // this can be 0 for non-indexed fields // If we have a term vector, return that if (fieldable.isTermVectorStored()) { try { TermFreqVector v = reader.getTermFreqVector(docId, fieldable.name()); if (v != null) { SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>(); for (int i = 0; i < v.size(); i++) { tfv.add(v.getTerms()[i], v.getTermFrequencies()[i]); } f.add("termVector", tfv); } } catch (Exception ex) { log.warn("error writing term vector", ex); } } finfo.add(fieldable.name(), f); } return finfo; }
protected Term getAnalyzedTerm(TokenType tokenType, String termString) throws IOException { Term term = getTerm(termString, tokenType); // first ensure that we've stripped any prefixes TokenStream tokenStream = analyzer.tokenStream(term.field(), new StringReader(term.text())); tokenStream.reset(); CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class); StringBuilder sb = new StringBuilder(); while (tokenStream.incrementToken()) { sb.append(termAtt.toString()); } tokenStream.end(); tokenStream.close(); return new Term(term.field(), sb.toString()); }
/** * Computes a term frequency map for the index at the specified location. "Most Frequent" is * defined as the terms whose frequencies are greater than or equal to the topTermCutoff * the * frequency of the top term, where the topTermCutoff is number between 0 and 1. * * @return * @throws CorruptIndexException * @throws IOException */ protected ArrayList<String> retrieveTopTerms() throws CorruptIndexException, IOException { final Map<String, Integer> frequencyMap = new HashMap<String, Integer>(); List<String> termlist = new ArrayList<String>(); IndexReader reader = IndexReader.open(ramdir); TermEnum terms = reader.terms(); while (terms.next()) { Term term = terms.term(); String termText = term.text(); int frequency = reader.docFreq(term); frequencyMap.put(termText, frequency); termlist.add(termText); } reader.close(); // sort the term map by frequency descending Collections.sort( termlist, new Comparator<String>() { @Override public int compare(String term1, String term2) { int term1Freq = frequencyMap.get(term1); int term2Freq = frequencyMap.get(term2); if (term1Freq < term2Freq) return 1; if (term1Freq > term2Freq) return -1; return 0; } }); // retrieve the top terms based on topTermCutoff ArrayList<String> topTerms = new ArrayList<String>(); double topFreq = -1.0F; for (String term : termlist) { if (topFreq < 0.0F) { // first term, capture the value topFreq = (double) frequencyMap.get(term); topTerms.add(term); } else { // not the first term, compute the ratio and discard if below // topTermCutoff score double ratio = (double) ((double) frequencyMap.get(term) / topFreq); if (ratio >= topTermCutoff) { topTerms.add(term); } else { break; } } } return topTerms; }
@SuppressWarnings({"StringEquality"}) @Override public void run() { TermDocs termDocs = null; TermEnum termEnum = null; try { BloomFilter filter = BloomFilterFactory.getFilter(reader.numDocs(), 15); termDocs = reader.termDocs(); termEnum = reader.terms(new Term(field)); do { Term term = termEnum.term(); if (term == null || term.field() != field) break; // LUCENE MONITOR: 4.0, move to use bytes! UnicodeUtil.UTF8Result utf8Result = Unicode.fromStringAsUtf8(term.text()); termDocs.seek(termEnum); while (termDocs.next()) { // when traversing, make sure to ignore deleted docs, so the key->docId will be correct if (!reader.isDeleted(termDocs.doc())) { filter.add(utf8Result.result, 0, utf8Result.length); } } } while (termEnum.next()); ConcurrentMap<String, BloomFilterEntry> fieldCache = cache.get(reader.getFieldCacheKey()); if (fieldCache != null) { if (fieldCache.containsKey(field)) { BloomFilterEntry filterEntry = new BloomFilterEntry(reader.numDocs(), filter); filterEntry.loading.set(false); fieldCache.put(field, filterEntry); } } } catch (Exception e) { logger.warn("failed to load bloom filter for [{}]", e, field); } finally { try { if (termDocs != null) { termDocs.close(); } } catch (IOException e) { // ignore } try { if (termEnum != null) { termEnum.close(); } } catch (IOException e) { // ignore } } }
/** * Gets the global term frequencies and writes them in the index directory. * * @throws Exception the exception */ public void getGlobalTermFrequencies() throws Exception { String parentDir = Flags.rootDir + (Flags.positional ? "/positional-" : "/") + "lucene/" + Flags.suffix; File file = new File(parentDir); indexReader = IndexReader.open(FSDirectory.open(file)); TermEnum terms = indexReader.terms(); BufferedWriter out = new BufferedWriter(new FileWriter(new File(parentDir + "/globalTermFreq.txt"))); while (terms.next()) { org.apache.lucene.index.Term term = terms.term(); out.write(term.text() + " " + getGlobalTermFreq(term) + "\n"); } out.close(); indexReader.close(); }
private UidField.DocIdAndVersion loadCurrentVersionFromIndex( BloomCache bloomCache, Engine.Searcher searcher, Term uid) { UnicodeUtil.UTF8Result utf8 = Unicode.fromStringAsUtf8(uid.text()); for (IndexReader reader : searcher.searcher().subReaders()) { BloomFilter filter = bloomCache.filter(reader, UidFieldMapper.NAME, true); // we know that its not there... if (!filter.isPresent(utf8.result, 0, utf8.length)) { continue; } UidField.DocIdAndVersion docIdAndVersion = UidField.loadDocIdAndVersion(reader, uid); // either -2 (its there, but no version associated), or an actual version if (docIdAndVersion.docId != -1) { return docIdAndVersion; } } return null; }
@Override public Query blendTerm(Term term, MappedFieldType fieldType) { if (blendedFields == null) { return super.blendTerm(term, fieldType); } final Term[] terms = new Term[blendedFields.length]; float[] blendedBoost = new float[blendedFields.length]; for (int i = 0; i < blendedFields.length; i++) { terms[i] = blendedFields[i].newTerm(term.text()); blendedBoost[i] = blendedFields[i].boost; } if (commonTermsCutoff != null) { return BlendedTermQuery.commonTermsBlendedQuery( terms, blendedBoost, false, commonTermsCutoff); } if (tieBreaker == 1.0f) { return BlendedTermQuery.booleanBlendedQuery(terms, blendedBoost, false); } return BlendedTermQuery.dismaxBlendedQuery(terms, blendedBoost, tieBreaker); }
/** * Process properties to query sparse content directly. * * @param request * @param query * @param asAnon * @return * @throws StorageClientException * @throws AccessDeniedException */ private SolrSearchResultSet processSparseQuery( SlingHttpServletRequest request, Query query, boolean asAnon) throws StorageClientException, AccessDeniedException, ParseException { // use solr parsing to get the terms from the query string QueryParser parser = new QueryParser(Version.LUCENE_40, "id", new TextField().getQueryAnalyzer()); org.apache.lucene.search.Query luceneQuery = parser.parse(query.getQueryString()); Set<Term> terms = Sets.newHashSet(); luceneQuery.extractTerms(terms); Map<String, Object> props = Maps.newHashMap(); for (Term term : terms) { props.put(term.field(), term.text()); } Session session = StorageClientUtils.adaptToSession( request.getResourceResolver().adaptTo(javax.jcr.Session.class)); ContentManager cm = session.getContentManager(); Iterable<Content> items = cm.find(props); SolrSearchResultSet rs = new SparseSearchResultSet(items); return rs; }
/** * Utility method to dump out all fields (name and terms) for a given index. * * @param outFile File to dump to. * @throws IOException */ public void dumpFields(File outFile) throws IOException { FileWriter writer = null; try { writer = new FileWriter(outFile); PrintWriter out = new PrintWriter(writer); Set<String> fieldNames = getFacetNames(); for (String fieldName : fieldNames) { TermEnum te = terms(new Term(fieldName, "")); out.write(fieldName + ":\n"); while (te.next()) { Term term = te.term(); if (!fieldName.equals(term.field())) { break; } out.write(term.text() + "\n"); } out.write("\n\n"); } } finally { if (writer != null) { writer.close(); } } }
private Map<String, Query> getSingleTermQueries( String termQuery, TokenType tokenType, boolean collapse) throws IOException { Map<String, Query> queriesMap = new HashMap<String, Query>(); if (termQuery.contains(WILDCARD_ASTERISK) || termQuery.contains(WILDCARD_QUESTION)) { // contains a wildcard Term term = getTerm(termQuery, tokenType); Query query = getWildCardQuery(term); if (collapse) { // treat all wildcard variants as a single term queriesMap.put(termQuery, query); } else { // separate each wildcard term into its own query Set<Term> terms = new HashSet<Term>(); Weight weight = query.createWeight(indexSearcher, false); weight.extractTerms(terms); for (Term t : terms) { // we don't need to analyze term here since it's already from the index queriesMap.put(t.text(), getTermQuery(t)); } } } else { // regular term (we hope) Term term = getAnalyzedTerm(tokenType, termQuery); // analyze it first queriesMap.put(termQuery, getTermQuery(term)); } return queriesMap; }
DumbRegexpQuery(Term term, int flags) { super(term.field()); RegExp re = new RegExp(term.text(), flags); automaton = re.toAutomaton(); }
private void dumpTerms() throws IOException { outputBanner("Terms (in Term.compareTo() order)"); TermEnum terms = mIndexReader.terms(); int order = 0; while (terms.next()) { order++; Term term = terms.term(); String field = term.field(); String text = term.text(); if (!wantThisTerm(field, text)) { continue; } outputLn(order + " " + field + ": " + text); /* * for each term, print the * <document, frequency, <position>* > tuples for a term. * * document: document in which the Term appears * frequency: number of time the Term appears in the document * position: position for each appearance in the document * * e.g. doc.add(new Field("field", "one two three two four five", Field.Store.YES, Field.Index.ANALYZED)); * then the tuple for Term("field", "two") in this document would be like: * 88, 2, <2, 4> * where * 88 is the document number * 2 is the frequency this term appear in the document * <2, 4> are the positions for each appearance in the document */ // by TermPositions outputLn(" document, frequency, <position>*"); // keep track of docs that appear in all terms that are filtered in. Set<Integer> docNums = null; if (hasFilters()) { docNums = new HashSet<Integer>(); } TermPositions termPos = mIndexReader.termPositions(term); while (termPos.next()) { int docNum = termPos.doc(); int freq = termPos.freq(); if (docNums != null) { docNums.add(docNum); } output(" " + docNum + ", " + freq + ", <"); boolean first = true; for (int f = 0; f < freq; f++) { int positionInDoc = termPos.nextPosition(); if (!first) { output(" "); } else { first = false; } output(positionInDoc + ""); } outputLn(">"); } termPos.close(); if (docNums != null) { computeDocsIntersection(docNums); } outputLn(); if (order % 1000 == 0) { mConsole.debug("Dumped " + order + " terms"); } } terms.close(); }
private void _includeIfUnique( BooleanQuery booleanQuery, boolean like, QueryParser queryParser, Query query, BooleanClause.Occur occur) { if (query instanceof TermQuery) { Set<Term> terms = new HashSet<Term>(); TermQuery termQuery = (TermQuery) query; termQuery.extractTerms(terms); float boost = termQuery.getBoost(); for (Term term : terms) { String termValue = term.text(); if (like) { termValue = termValue.toLowerCase(queryParser.getLocale()); term = term.createTerm(StringPool.STAR.concat(termValue).concat(StringPool.STAR)); query = new WildcardQuery(term); } else { query = new TermQuery(term); } query.setBoost(boost); boolean included = false; for (BooleanClause booleanClause : booleanQuery.getClauses()) { if (query.equals(booleanClause.getQuery())) { included = true; } } if (!included) { booleanQuery.add(query, occur); } } } else if (query instanceof BooleanQuery) { BooleanQuery curBooleanQuery = (BooleanQuery) query; BooleanQuery containerBooleanQuery = new BooleanQuery(); for (BooleanClause booleanClause : curBooleanQuery.getClauses()) { _includeIfUnique( containerBooleanQuery, like, queryParser, booleanClause.getQuery(), booleanClause.getOccur()); } if (containerBooleanQuery.getClauses().length > 0) { booleanQuery.add(containerBooleanQuery, occur); } } else { boolean included = false; for (BooleanClause booleanClause : booleanQuery.getClauses()) { if (query.equals(booleanClause.getQuery())) { included = true; } } if (!included) { booleanQuery.add(query, occur); } } }
@Override public boolean reload(String collectionName, String topRankingField) { if (collectionName == null) { return false; } CrescentCollectionHandler collectionHandler = SpringApplicationContext.getBean( "crescentCollectionHandler", CrescentCollectionHandler.class); CrescentCollection collection = collectionHandler.getCrescentCollections().getCrescentCollection(collectionName); if (collection == null) { logger.debug("doesn't Collection Info => {}", collectionName); init(View.Overview); return false; } if (topRankingField == null) { if (collection.getDefaultSearchFields().get(0) != null) { topRankingField = collection.getDefaultSearchFields().get(0).getName(); } else { logger.debug("doesn't defaultSearchField => {}", collectionName); init(View.Overview); return false; } } List<String> fieldName = new ArrayList<String>(); for (CrescentCollectionField field : collection.getFields()) fieldName.add(field.getName()); TopRankingQueue topRankingQueue = new TopRankingQueue(DEFAULT_TOPRANKING_TERM, new RankingTermComparator()); try { Directory directory = FSDirectory.open(new File(collection.getIndexingDirectory())); IndexReader reader = IndexReader.open(directory); TermEnum terms = reader.terms(); int termFreq = 0; int termCount = 0; Term beforeTerm = null; // init term count fieldTermCount.clear(); for (CrescentCollectionField field : collection.getFields()) fieldTermCount.put(field.getName(), 0); topRankingQueue.clear(); while (terms.next()) { Term currTerm = terms.term(); if (beforeTerm == null) { beforeTerm = currTerm; } if (beforeTerm.field() == currTerm.field()) { termCount++; } else { fieldTermCount.put(beforeTerm.field(), termCount); termCount = 1; beforeTerm = currTerm; } TermDocs termDocs = reader.termDocs(currTerm); while (termDocs.next()) { if (currTerm.field().equals(topRankingField)) { RankingTerm e = new RankingTerm(currTerm.text(), currTerm.field(), termDocs.freq()); topRankingQueue.add(e); } } termFreq++; } if (beforeTerm != null) fieldTermCount.put(beforeTerm.field(), termCount); terms.close(); result.put("numOfTerm", termFreq); result.put("numOfDoc", reader.numDocs()); result.put("hasDel", reader.hasDeletions()); result.put("isOptimize", reader.isOptimized()); result.put("indexVersion", reader.getVersion()); result.put("lastModify", new Date(IndexReader.lastModified(directory))); } catch (IOException e) { e.printStackTrace(); return false; } if (topRankingQueue.size() != 0) { topRankingTerms = topRankingQueue.toArray(); Arrays.sort(topRankingTerms); } result.put("collectionName", collectionName); result.put("indexName", collection.getIndexingDirectory()); result.put("numOfField", collection.getFields().size()); result.put("termCount", fieldTermCount); result.put("topRanking", topRankingTerms); result.put("fieldName", fieldName); return true; }