public void testSeekCeilNotFound() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); Document doc = new Document(); // Get empty string in there! doc.add(newStringField("field", "", Field.Store.NO)); w.addDocument(doc); for (int i = 0; i < 36; i++) { doc = new Document(); String term = "" + (char) (97 + i); String term2 = "a" + (char) (97 + i); doc.add(newTextField("field", term + " " + term2, Field.Store.NO)); w.addDocument(doc); } w.forceMerge(1); IndexReader r = w.getReader(); TermsEnum te = MultiFields.getTerms(r, "field").iterator(null); assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef(new byte[] {0x22}))); assertEquals("a", te.term().utf8ToString()); assertEquals(1L, te.ord()); r.close(); w.close(); dir.close(); }
/* * listTermDictionary displays the term dictionary for a field. */ static void listTermDictionary(IndexReader reader, String fieldName) throws IOException { System.out.println("\nTerm Dictionary: field " + fieldName); /* Grant says: MultiFields.getTerms(IndexReader, fieldName) */ Terms terms = MultiFields.getTerms(reader, fieldName); if ((terms == null) || (terms.size() == -1)) System.out.println(" The term dictionary is empty."); else { System.out.println(" Vocabulary size: " + terms.size() + " terms"); TermsEnum ithTerm = terms.iterator(null); /* * Iterate over the terms in this document. * Information about a term's occurrences (tf and * positions) is accessed via the indexing API, which * returns inverted lists that describe (only) the * current document. */ while (ithTerm.next() != null) { System.out.format( " %-30s %d %d\n", ithTerm.term().utf8ToString(), ithTerm.docFreq(), ithTerm.totalTermFreq()); } ; } ; }
public void test10kPulsed() throws Exception { // we always run this test with pulsing codec. Codec cp = _TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(1)); File f = _TestUtil.getTempDir("10kpulsed"); BaseDirectoryWrapper dir = newFSDirectory(f); dir.setCheckIndexOnClose(false); // we do this ourselves explicitly RandomIndexWriter iw = new RandomIndexWriter( random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp)); Document document = new Document(); FieldType ft = new FieldType(TextField.TYPE_STORED); switch (_TestUtil.nextInt(random(), 0, 2)) { case 0: ft.setIndexOptions(IndexOptions.DOCS_ONLY); break; case 1: ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); break; default: ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); break; } Field field = newField("field", "", ft); document.add(field); NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT)); for (int i = 0; i < 10050; i++) { field.setStringValue(df.format(i)); iw.addDocument(document); } IndexReader ir = iw.getReader(); iw.close(); TermsEnum te = MultiFields.getTerms(ir, "field").iterator(null); DocsEnum de = null; for (int i = 0; i < 10050; i++) { String expected = df.format(i); assertEquals(expected, te.next().utf8ToString()); de = _TestUtil.docs(random(), te, null, de, 0); assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc()); } ir.close(); _TestUtil.checkIndex(dir); dir.close(); }
public void testThreeBlocks() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); List<String> terms = new ArrayList<>(); for (int i = 0; i < 36; i++) { Document doc = new Document(); String term = "" + (char) (97 + i); terms.add(term); if (VERBOSE) { System.out.println("i=" + i + " term=" + term); } doc.add(newTextField("field", term, Field.Store.NO)); w.addDocument(doc); } for (int i = 0; i < 36; i++) { Document doc = new Document(); String term = "m" + (char) (97 + i); terms.add(term); if (VERBOSE) { System.out.println("i=" + i + " term=" + term); } doc.add(newTextField("field", term, Field.Store.NO)); w.addDocument(doc); } for (int i = 0; i < 36; i++) { Document doc = new Document(); String term = "mo" + (char) (97 + i); terms.add(term); if (VERBOSE) { System.out.println("i=" + i + " term=" + term); } doc.add(newTextField("field", term, Field.Store.NO)); w.addDocument(doc); } w.forceMerge(1); IndexReader r = w.getReader(); TermsEnum te = MultiFields.getTerms(r, "field").iterator(null); if (VERBOSE) { while (te.next() != null) { System.out.println("TERM: " + te.ord() + " " + te.term().utf8ToString()); } } assertTrue(te.seekExact(new BytesRef("mo"))); assertEquals(27, te.ord()); te.seekExact(90); assertEquals(new BytesRef("s"), te.term()); testEnum(te, terms); r.close(); w.close(); dir.close(); }
/* Copied from lucene 4.2.x core */ private static long totalTermFreq(IndexReader r, String field, BytesRef text) throws IOException { final Terms terms = MultiFields.getTerms(r, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(null); if (termsEnum.seekExact(text, true)) { return termsEnum.totalTermFreq(); } } return 0; }
private long getSumTermFrequency(IndexReader reader, String fieldName) { Terms collectionTermVector = null; try { collectionTermVector = MultiFields.getTerms(reader, fieldName); long totalTermFreq = collectionTermVector.getSumTotalTermFreq(); return totalTermFreq; } catch (IOException e) { LOG.warn("Unable to get total term frequency, it might not be indexed"); } return 0; }
public void testNonRootFloorBlocks() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); IndexWriter w = new IndexWriter(dir, iwc); List<String> terms = new ArrayList<>(); for (int i = 0; i < 36; i++) { Document doc = new Document(); String term = "" + (char) (97 + i); terms.add(term); if (VERBOSE) { System.out.println("i=" + i + " term=" + term); } doc.add(newTextField("field", term, Field.Store.NO)); w.addDocument(doc); } for (int i = 0; i < 128; i++) { Document doc = new Document(); String term = "m" + (char) i; terms.add(term); if (VERBOSE) { System.out.println("i=" + i + " term=" + term + " bytes=" + new BytesRef(term)); } doc.add(newStringField("field", term, Field.Store.NO)); w.addDocument(doc); } w.forceMerge(1); IndexReader r = DirectoryReader.open(w, true); TermsEnum te = MultiFields.getTerms(r, "field").iterator(null); BytesRef term; int ord = 0; while ((term = te.next()) != null) { if (VERBOSE) { System.out.println("TEST: " + te.ord() + ": " + term.utf8ToString()); } assertEquals(ord, te.ord()); ord++; } testEnum(te, terms); r.close(); w.close(); dir.close(); }
private int countTerms(MultiTermQuery q) throws Exception { final Terms terms = MultiFields.getTerms(reader, q.getField()); if (terms == null) return 0; final TermsEnum termEnum = q.getTermsEnum(terms); assertNotNull(termEnum); int count = 0; BytesRef cur, last = null; while ((cur = termEnum.next()) != null) { count++; if (last != null) { assertTrue(last.compareTo(cur) < 0); } last = BytesRef.deepCopyOf(cur); } // LUCENE-3314: the results after next() already returned null are undefined, // assertNull(termEnum.next()); return count; }
@Override public FieldStats.Long stats(IndexReader reader) throws IOException { int maxDoc = reader.maxDoc(); Terms terms = org.apache.lucene.index.MultiFields.getTerms(reader, name()); if (terms == null) { return null; } long minValue = LegacyNumericUtils.getMinInt(terms); long maxValue = LegacyNumericUtils.getMaxInt(terms); return new FieldStats.Long( maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), isSearchable(), isAggregatable(), minValue, maxValue); }
/** Test the WordScorer emitted by the smoothing model */ public void testBuildWordScorer() throws IOException { SmoothingModel testModel = createTestModel(); Map<String, Analyzer> mapping = new HashMap<>(); mapping.put("field", new WhitespaceAnalyzer()); PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping); IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper)); Document doc = new Document(); doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED)); writer.addDocument(doc); DirectoryReader ir = DirectoryReader.open(writer); WordScorer wordScorer = testModel .buildWordScorerFactory() .newScorer( ir, MultiFields.getTerms(ir, "field"), "field", 0.9d, BytesRefs.toBytesRef(" ")); assertWordScorer(wordScorer, testModel); }
public void testSeveralNonRootBlocks() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); IndexWriter w = new IndexWriter(dir, iwc); List<String> terms = new ArrayList<>(); for (int i = 0; i < 30; i++) { for (int j = 0; j < 30; j++) { Document doc = new Document(); String term = "" + (char) (97 + i) + (char) (97 + j); terms.add(term); if (VERBOSE) { System.out.println("term=" + term); } doc.add(newTextField("body", term, Field.Store.NO)); w.addDocument(doc); } } w.forceMerge(1); IndexReader r = DirectoryReader.open(w, true); TermsEnum te = MultiFields.getTerms(r, "body").iterator(null); for (int i = 0; i < 30; i++) { for (int j = 0; j < 30; j++) { String term = "" + (char) (97 + i) + (char) (97 + j); if (VERBOSE) { System.out.println("TEST: check term=" + term); } assertEquals(term, te.next().utf8ToString()); assertEquals(30 * i + j, te.ord()); } } testEnum(te, terms); te.seekExact(0); assertEquals("aa", te.term().utf8ToString()); r.close(); w.close(); dir.close(); }
public void testBasic() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); Document doc = new Document(); doc.add(newTextField("field", "a b c", Field.Store.NO)); w.addDocument(doc); IndexReader r = w.getReader(); TermsEnum te = MultiFields.getTerms(r, "field").iterator(null); // Test next() assertEquals(new BytesRef("a"), te.next()); assertEquals(0L, te.ord()); assertEquals(new BytesRef("b"), te.next()); assertEquals(1L, te.ord()); assertEquals(new BytesRef("c"), te.next()); assertEquals(2L, te.ord()); assertNull(te.next()); // Test seekExact by term assertTrue(te.seekExact(new BytesRef("b"))); assertEquals(1, te.ord()); assertTrue(te.seekExact(new BytesRef("a"))); assertEquals(0, te.ord()); assertTrue(te.seekExact(new BytesRef("c"))); assertEquals(2, te.ord()); // Test seekExact by ord te.seekExact(1); assertEquals(new BytesRef("b"), te.term()); te.seekExact(0); assertEquals(new BytesRef("a"), te.term()); te.seekExact(2); assertEquals(new BytesRef("c"), te.term()); r.close(); w.close(); dir.close(); }
@Override public void visitMatchingTerms(IndexReader reader, String fieldName, MatchingTermVisitor mtv) throws IOException { int prefixLength = prefix.length(); Terms terms = MultiFields.getTerms(reader, fieldName); if (terms != null) { Matcher matcher = pattern.matcher(""); try { TermsEnum termsEnum = terms.iterator(null); TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef); BytesRef text; if (status == TermsEnum.SeekStatus.FOUND) { text = prefixRef; } else if (status == TermsEnum.SeekStatus.NOT_FOUND) { text = termsEnum.term(); } else { text = null; } while (text != null) { if (text != null && StringHelper.startsWith(text, prefixRef)) { String textString = text.utf8ToString(); matcher.reset(textString.substring(prefixLength)); if (matcher.matches()) { mtv.visitMatchingTerm(new Term(fieldName, textString)); } } else { break; } text = termsEnum.next(); } } finally { matcher.reset(); } } }
public void testFloorBlocks() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); IndexWriter w = new IndexWriter(dir, iwc); for (int i = 0; i < 128; i++) { Document doc = new Document(); String term = "" + (char) i; if (VERBOSE) { System.out.println("i=" + i + " term=" + term + " bytes=" + new BytesRef(term)); } doc.add(newStringField("field", term, Field.Store.NO)); w.addDocument(doc); } w.forceMerge(1); IndexReader r = DirectoryReader.open(w, true); TermsEnum te = MultiFields.getTerms(r, "field").iterator(null); if (VERBOSE) { BytesRef term; while ((term = te.next()) != null) { System.out.println(" " + te.ord() + ": " + term.utf8ToString()); } } assertTrue(te.seekExact(new BytesRef("a"))); assertEquals(97, te.ord()); te.seekExact(98); assertEquals(new BytesRef("b"), te.term()); assertTrue(te.seekExact(new BytesRef("z"))); assertEquals(122, te.ord()); r.close(); w.close(); dir.close(); }
/** * Reconstruct document fields. * * @param docNum document number. If this document is deleted, but the index is not optimized yet, * the reconstruction process may still yield the reconstructed field content even from * deleted documents. * @return reconstructed document * @throws Exception */ public Reconstructed reconstruct(int docNum) throws Exception { if (docNum < 0 || docNum > reader.maxDoc()) { throw new Exception("Document number outside of valid range."); } Reconstructed res = new Reconstructed(); if (deleted != null && deleted.get(docNum)) { throw new Exception("Document is deleted."); } else { Document doc = reader.document(docNum); for (int i = 0; i < fieldNames.length; i++) { Field[] fs = doc.getFields(fieldNames[i]); if (fs != null && fs.length > 0) { res.getStoredFields().put(fieldNames[i], fs); } } } // collect values from unstored fields HashSet<String> fields = new HashSet<String>(Arrays.asList(fieldNames)); // try to use term vectors if available progress.maxValue = fieldNames.length; progress.curValue = 0; progress.minValue = 0; for (int i = 0; i < fieldNames.length; i++) { TermFreqVector tvf = reader.getTermFreqVector(docNum, fieldNames[i]); if (tvf != null && tvf.size() > 0 && (tvf instanceof TermPositionVector)) { TermPositionVector tpv = (TermPositionVector) tvf; progress.message = "Reading term vectors ..."; progress.curValue = i; setChanged(); notifyObservers(progress); BytesRef[] tv = tpv.getTerms(); for (int k = 0; k < tv.length; k++) { // do we have positions? int[] posArr = tpv.getTermPositions(k); if (posArr == null) { // only offsets TermVectorOffsetInfo[] offsets = tpv.getOffsets(k); if (offsets.length == 0) { continue; } // convert offsets into positions posArr = convertOffsets(offsets); } GrowableStringArray gsa = res.getReconstructedFields().get(fieldNames[i]); if (gsa == null) { gsa = new GrowableStringArray(); res.getReconstructedFields().put(fieldNames[i], gsa); } for (int m = 0; m < posArr.length; m++) { gsa.append(posArr[m], "|", tv[k].utf8ToString()); } } fields.remove(fieldNames[i]); // got what we wanted } } // this loop collects data only from left-over fields // not yet collected through term vectors progress.maxValue = fields.size(); progress.curValue = 0; progress.minValue = 0; for (String fld : fields) { progress.message = "Collecting terms in " + fld + " ..."; progress.curValue++; setChanged(); notifyObservers(progress); Terms terms = MultiFields.getTerms(reader, fld); if (terms == null) { // no terms in this field continue; } TermsEnum te = terms.iterator(); while (te.next() != null) { DocsAndPositionsEnum dpe = te.docsAndPositions(deleted, null); if (dpe == null) { // no position info for this field break; } int num = dpe.advance(docNum); if (num != docNum) { // either greater than or NO_MORE_DOCS continue; // no data for this term in this doc } String term = te.term().utf8ToString(); GrowableStringArray gsa = (GrowableStringArray) res.getReconstructedFields().get(fld); if (gsa == null) { gsa = new GrowableStringArray(); res.getReconstructedFields().put(fld, gsa); } for (int k = 0; k < dpe.freq(); k++) { int pos = dpe.nextPosition(); gsa.append(pos, "|", term); } } } progress.message = "Done."; progress.curValue = 100; setChanged(); notifyObservers(progress); return res; }
private void addTerms(IndexReader reader, FieldVals f, ScoreTermQueue q) throws IOException { if (f.queryString == null) return; final Terms terms = MultiFields.getTerms(reader, f.fieldName); if (terms == null) { return; } try (TokenStream ts = analyzer.tokenStream(f.fieldName, f.queryString)) { CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); int corpusNumDocs = reader.numDocs(); HashSet<String> processedTerms = new HashSet<>(); ts.reset(); while (ts.incrementToken()) { String term = termAtt.toString(); if (!processedTerms.contains(term)) { processedTerms.add(term); ScoreTermQueue variantsQ = new ScoreTermQueue( MAX_VARIANTS_PER_TERM); // maxNum variants considered for any one term float minScore = 0; Term startTerm = new Term(f.fieldName, term); AttributeSource atts = new AttributeSource(); MaxNonCompetitiveBoostAttribute maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class); SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(terms, atts, startTerm, f.minSimilarity, f.prefixLength); // store the df so all variants use same idf int df = reader.docFreq(startTerm); int numVariants = 0; int totalVariantDocFreqs = 0; BytesRef possibleMatch; BoostAttribute boostAtt = fe.attributes().addAttribute(BoostAttribute.class); while ((possibleMatch = fe.next()) != null) { numVariants++; totalVariantDocFreqs += fe.docFreq(); float score = boostAtt.getBoost(); if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore) { ScoreTerm st = new ScoreTerm( new Term(startTerm.field(), BytesRef.deepCopyOf(possibleMatch)), score, startTerm); variantsQ.insertWithOverflow(st); minScore = variantsQ.top().score; // maintain minScore } maxBoostAtt.setMaxNonCompetitiveBoost( variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY); } if (numVariants > 0) { int avgDf = totalVariantDocFreqs / numVariants; if (df == 0) // no direct match we can use as df for all variants { df = avgDf; // use avg df of all variants } // take the top variants (scored by edit distance) and reset the score // to include an IDF factor then add to the global queue for ranking // overall top query terms int size = variantsQ.size(); for (int i = 0; i < size; i++) { ScoreTerm st = variantsQ.pop(); st.score = (st.score * st.score) * sim.idf(df, corpusNumDocs); q.insertWithOverflow(st); } } } } ts.end(); } }
protected void fillFloatValues(FloatValues vals, IndexReader reader, String field) throws IOException { if (parser == null) { try { parser = FieldCache.DEFAULT_FLOAT_PARSER; fillFloatValues(vals, reader, field); return; } catch (NumberFormatException ne) { vals.parserHashCode = null; // wipe the previous one parser = FieldCache.NUMERIC_UTILS_FLOAT_PARSER; fillFloatValues(vals, reader, field); return; } } setParserAndResetCounts(vals, parser); Terms terms = MultiFields.getTerms(reader, field); int maxDoc = reader.maxDoc(); vals.values = null; if (terms != null) { final TermsEnum termsEnum = terms.iterator(); OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet(maxDoc) : null; DocsEnum docs = null; try { while (true) { final BytesRef term = termsEnum.next(); if (term == null) { break; } final float termval = parser.parseFloat(term); docs = termsEnum.docs(null, docs); while (true) { final int docID = docs.nextDoc(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (vals.values == null) { vals.values = new float[maxDoc]; } vals.values[docID] = termval; vals.numDocs++; if (validBits != null) { validBits.set(docID); } } vals.numTerms++; } } catch (FieldCache.StopFillCacheException stop) { } if (vals.valid == null) { vals.valid = checkMatchAllBits(validBits, vals.numDocs, maxDoc); } } if (vals.values == null) { vals.values = new float[maxDoc]; } if (vals.valid == null && vals.numDocs < 1) { vals.valid = new Bits.MatchNoBits(maxDoc); } }
/* * More Ideas: * - add ability to find whitespace problems -> we can build a poor mans decompounder with our index based on a automaton? * - add ability to build different error models maybe based on a confusion matrix? * - try to combine a token with its subsequent token to find / detect word splits (optional) * - for this to work we need some way to defined the position length of a candidate * - phonetic filters could be interesting here too for candidate selection */ @Override public Suggestion<? extends Entry<? extends Option>> innerExecute( String name, PhraseSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException { double realWordErrorLikelihood = suggestion.realworldErrorLikelyhood(); final PhraseSuggestion response = new PhraseSuggestion(name, suggestion.getSize()); final IndexReader indexReader = searcher.getIndexReader(); List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators(); final int numGenerators = generators.size(); final List<CandidateGenerator> gens = new ArrayList<>(generators.size()); for (int i = 0; i < numGenerators; i++) { PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i); DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator); Terms terms = MultiFields.getTerms(indexReader, generator.field()); if (terms != null) { gens.add( new DirectCandidateGenerator( directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter(), terms)); } } final String suggestField = suggestion.getField(); final Terms suggestTerms = MultiFields.getTerms(indexReader, suggestField); if (gens.size() > 0 && suggestTerms != null) { final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker( realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit()); final BytesRef separator = suggestion.separator(); TokenStream stream = checker.tokenStream( suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField()); WordScorer wordScorer = suggestion .model() .newScorer( indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator); Result checkerResult = checker.getCorrections( stream, new MultiCandidateGeneratorWrapper( suggestion.getShardSize(), gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(), suggestion.getShardSize(), wordScorer, suggestion.confidence(), suggestion.gramSize()); PhraseSuggestion.Entry resultEntry = buildResultEntry(suggestion, spare, checkerResult.cutoffScore); response.addTerm(resultEntry); final BytesRefBuilder byteSpare = new BytesRefBuilder(); final EarlyTerminatingCollector collector = Lucene.createExistsCollector(); final CompiledScript collateScript; if (suggestion.getCollateQueryScript() != null) { collateScript = suggestion.getCollateQueryScript(); } else if (suggestion.getCollateFilterScript() != null) { collateScript = suggestion.getCollateFilterScript(); } else { collateScript = null; } final boolean collatePrune = (collateScript != null) && suggestion.collatePrune(); for (int i = 0; i < checkerResult.corrections.length; i++) { Correction correction = checkerResult.corrections[i]; spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, null, null)); boolean collateMatch = true; if (collateScript != null) { // Checks if the template query collateScript yields any documents // from the index for a correction, collateMatch is updated final Map<String, Object> vars = suggestion.getCollateScriptParams(); vars.put(SUGGESTION_TEMPLATE_VAR_NAME, spare.toString()); final ExecutableScript executable = scriptService.executable(collateScript, vars); final BytesReference querySource = (BytesReference) executable.run(); final ParsedQuery parsedQuery; if (suggestion.getCollateFilterScript() != null) { parsedQuery = suggestion .getQueryParserService() .parse( QueryBuilders.constantScoreQuery(QueryBuilders.wrapperQuery(querySource))); } else { parsedQuery = suggestion.getQueryParserService().parse(querySource); } collateMatch = Lucene.exists(searcher, parsedQuery.query(), collector); } if (!collateMatch && !collatePrune) { continue; } Text phrase = new StringText(spare.toString()); Text highlighted = null; if (suggestion.getPreTag() != null) { spare.copyUTF8Bytes( correction.join( SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag())); highlighted = new StringText(spare.toString()); } if (collatePrune) { resultEntry.addOption( new Suggestion.Entry.Option( phrase, highlighted, (float) (correction.score), collateMatch)); } else { resultEntry.addOption( new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score))); } } } else { response.addTerm(buildResultEntry(suggestion, spare, Double.MIN_VALUE)); } return response; }
public void testTwoBlocks() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); List<String> terms = new ArrayList<>(); for (int i = 0; i < 36; i++) { Document doc = new Document(); String term = "" + (char) (97 + i); terms.add(term); if (VERBOSE) { System.out.println("i=" + i + " term=" + term); } doc.add(newTextField("field", term, Field.Store.NO)); w.addDocument(doc); } for (int i = 0; i < 36; i++) { Document doc = new Document(); String term = "m" + (char) (97 + i); terms.add(term); if (VERBOSE) { System.out.println("i=" + i + " term=" + term); } doc.add(newTextField("field", term, Field.Store.NO)); w.addDocument(doc); } if (VERBOSE) { System.out.println("TEST: now forceMerge"); } w.forceMerge(1); IndexReader r = w.getReader(); TermsEnum te = MultiFields.getTerms(r, "field").iterator(null); assertTrue(te.seekExact(new BytesRef("mo"))); assertEquals(27, te.ord()); te.seekExact(54); assertEquals(new BytesRef("s"), te.term()); Collections.sort(terms); for (int i = terms.size() - 1; i >= 0; i--) { te.seekExact(i); assertEquals(i, te.ord()); assertEquals(terms.get(i), te.term().utf8ToString()); } int iters = atLeast(1000); for (int iter = 0; iter < iters; iter++) { int ord = random().nextInt(terms.size()); BytesRef term = new BytesRef(terms.get(ord)); if (random().nextBoolean()) { if (VERBOSE) { System.out.println("TEST: iter=" + iter + " seek to ord=" + ord + " of " + terms.size()); } te.seekExact(ord); } else { if (VERBOSE) { System.out.println( "TEST: iter=" + iter + " seek to term=" + terms.get(ord) + " ord=" + ord + " of " + terms.size()); } te.seekExact(term); } assertEquals(ord, te.ord()); assertEquals(term, te.term()); } r.close(); w.close(); dir.close(); }