public void testReuseDocsEnumNoReuse() throws IOException { Directory dir = newDirectory(); Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat()); RandomIndexWriter writer = new RandomIndexWriter( random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setCodec(cp)); int numdocs = atLeast(20); createRandomIndex(numdocs, writer, random()); writer.commit(); DirectoryReader open = DirectoryReader.open(dir); for (LeafReaderContext ctx : open.leaves()) { LeafReader indexReader = ctx.reader(); Terms terms = indexReader.terms("body"); TermsEnum iterator = terms.iterator(); IdentityHashMap<PostingsEnum, Boolean> enums = new IdentityHashMap<>(); MatchNoBits bits = new Bits.MatchNoBits(indexReader.maxDoc()); while ((iterator.next()) != null) { PostingsEnum docs = iterator.postings( random().nextBoolean() ? bits : new Bits.MatchNoBits(indexReader.maxDoc()), null, random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE); enums.put(docs, true); } assertEquals(terms.size(), enums.size()); } writer.commit(); IOUtils.close(writer, open, dir); }
public synchronized ShapeFieldCache<T> getCache(LeafReader reader) throws IOException { ShapeFieldCache<T> idx = sidx.get(reader); if (idx != null) { return idx; } long startTime = System.currentTimeMillis(); log.fine("Building Cache [" + reader.maxDoc() + "]"); idx = new ShapeFieldCache<>(reader.maxDoc(), defaultSize); int count = 0; DocsEnum docs = null; Terms terms = reader.terms(shapeField); TermsEnum te = null; if (terms != null) { te = terms.iterator(te); BytesRef term = te.next(); while (term != null) { T shape = readShape(term); if (shape != null) { docs = te.docs(null, docs, DocsEnum.FLAG_NONE); Integer docid = docs.nextDoc(); while (docid != DocIdSetIterator.NO_MORE_DOCS) { idx.add(docid, shape); docid = docs.nextDoc(); count++; } } term = te.next(); } } sidx.put(reader, idx); long elapsed = System.currentTimeMillis() - startTime; log.fine("Cached: [" + count + " in " + elapsed + "ms] " + idx); return idx; }
public TermInfo collect(String term) throws IOException { TermInfo info = new TermInfo(); BytesRef luceneTerm = new BytesRef(term.getBytes()); // this gives documents in which the term is found, but no offset information can be retrieved PostingsEnum postings = MultiFields.getTermDocsEnum(indexReader, ngramInfoFieldname, luceneTerm); // now go through each document int docId = postings.nextDoc(); while (docId != PostingsEnum.NO_MORE_DOCS) { // get the term vector for that document. TermsEnum it = indexReader.getTermVector(docId, ngramInfoFieldname).iterator(); // find the term of interest it.seekExact(luceneTerm); // get its posting info. this will contain offset info PostingsEnum postingsInDoc = it.postings(null, PostingsEnum.OFFSETS); postingsInDoc.nextDoc(); Document doc = indexReader.document(docId); String id = doc.get(idFieldname); JATEDocument jd = new JATEDocument(id); Set<int[]> offsets = new HashSet<>(); int totalFreq = postingsInDoc.freq(); for (int i = 0; i < totalFreq; i++) { postingsInDoc.nextPosition(); offsets.add(new int[] {postingsInDoc.startOffset(), postingsInDoc.endOffset()}); } info.getOffsets().put(jd, offsets); docId = postings.nextDoc(); } return info; }
public void testNormsWithDocValues() throws Exception { MemoryIndex mi = new MemoryIndex(true, true); MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); mi.addField( new BinaryDocValuesField("text", new BytesRef("quick brown fox")), mockAnalyzer, 5f); mi.addField(new TextField("text", "quick brown fox", Field.Store.NO), mockAnalyzer, 5f); LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader(); Document doc = new Document(); doc.add(new BinaryDocValuesField("text", new BytesRef("quick brown fox"))); Field field = new TextField("text", "quick brown fox", Field.Store.NO); field.setBoost(5f); doc.add(field); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer)); writer.addDocument(doc); writer.close(); IndexReader controlIndexReader = DirectoryReader.open(dir); LeafReader controlLeafReader = controlIndexReader.leaves().get(0).reader(); assertEquals( controlLeafReader.getNormValues("text").get(0), leafReader.getNormValues("text").get(0)); controlIndexReader.close(); dir.close(); }
/** * Returns a mapping from the old document ID to its new location in the sorted index. * Implementations can use the auxiliary {@link #sort(int, DocComparator)} to compute the * old-to-new permutation given a list of documents and their corresponding values. * * <p>A return value of <tt>null</tt> is allowed and means that <code>reader</code> is already * sorted. * * <p><b>NOTE:</b> deleted documents are expected to appear in the mapping as well, they will * however be marked as deleted in the sorted view. */ DocMap sort(LeafReader reader) throws IOException { SortField fields[] = sort.getSort(); final int reverseMul[] = new int[fields.length]; final LeafFieldComparator comparators[] = new LeafFieldComparator[fields.length]; for (int i = 0; i < fields.length; i++) { reverseMul[i] = fields[i].getReverse() ? -1 : 1; comparators[i] = fields[i].getComparator(1, i).getLeafComparator(reader.getContext()); comparators[i].setScorer(FAKESCORER); } final DocComparator comparator = new DocComparator() { @Override public int compare(int docID1, int docID2) { try { for (int i = 0; i < comparators.length; i++) { // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co, // the segments are always the same here... comparators[i].copy(0, docID1); comparators[i].setBottom(0); int comp = reverseMul[i] * comparators[i].compareBottom(docID2); if (comp != 0) { return comp; } } return Integer.compare(docID1, docID2); // docid order tiebreak } catch (IOException e) { throw new RuntimeException(e); } } }; return sort(reader.maxDoc(), comparator); }
public void testDocsAndPositionsEnumStart() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); int numIters = atLeast(3); MemoryIndex memory = new MemoryIndex(true, false, random().nextInt(50) * 1024 * 1024); for (int i = 0; i < numIters; i++) { // check reuse memory.addField("foo", "bar", analyzer); LeafReader reader = (LeafReader) memory.createSearcher().getIndexReader(); TestUtil.checkReader(reader); assertEquals(1, reader.terms("foo").getSumTotalTermFreq()); PostingsEnum disi = reader.postings(new Term("foo", "bar"), PostingsEnum.ALL); int docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(0, disi.nextPosition()); assertEquals(0, disi.startOffset()); assertEquals(3, disi.endOffset()); // now reuse and check again TermsEnum te = reader.terms("foo").iterator(); assertTrue(te.seekExact(new BytesRef("bar"))); disi = te.postings(disi); docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); reader.close(); memory.reset(); } }
public void testNumerics() throws Exception { Directory dir = newDirectory(); Document doc = new Document(); Field field = new NumericDocValuesField("numbers", 0); doc.add(field); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { field.setLongValue(random().nextLong()); iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); NumericDocValues multi = MultiDocValues.getNumericValues(ir, "numbers"); NumericDocValues single = merged.getNumericDocValues("numbers"); for (int i = 0; i < numDocs; i++) { assertEquals(single.get(i), multi.get(i)); } ir.close(); ir2.close(); dir.close(); }
Query createCandidateQuery(IndexReader indexReader) throws IOException { List<Term> extractedTerms = new ArrayList<>(); // include extractionResultField:failed, because docs with this term have no // extractedTermsField // and otherwise we would fail to return these docs. Docs that failed query term extraction // always need to be verified by MemoryIndex: extractedTerms.add(new Term(extractionResultField.name(), EXTRACTION_FAILED)); LeafReader reader = indexReader.leaves().get(0).reader(); Fields fields = reader.fields(); for (String field : fields) { Terms terms = fields.terms(field); if (terms == null) { continue; } BytesRef fieldBr = new BytesRef(field); TermsEnum tenum = terms.iterator(); for (BytesRef term = tenum.next(); term != null; term = tenum.next()) { BytesRefBuilder builder = new BytesRefBuilder(); builder.append(fieldBr); builder.append(FIELD_VALUE_SEPARATOR); builder.append(term); extractedTerms.add(new Term(queryTermsField.name(), builder.toBytesRef())); } } return new TermsQuery(extractedTerms); }
protected LeafReaderContext refreshReader() throws Exception { if (readerContext != null) { readerContext.reader().close(); } LeafReader reader = SlowCompositeReaderWrapper.wrap(topLevelReader = DirectoryReader.open(writer, true)); readerContext = reader.getContext(); return readerContext; }
@Override public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException { LeafReader reader = context.reader(); FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info != null) { Geo3DDocValuesField.checkCompatible(info); } currentDocs = DocValues.getSortedNumeric(reader, field); return this; }
private void tstFilterCard(String mes, int expected, Filter filt) throws Exception { final DocIdSet docIdSet = filt.getDocIdSet(reader.getContext(), reader.getLiveDocs()); int actual = 0; if (docIdSet != null) { DocIdSetIterator disi = docIdSet.iterator(); while (disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { actual++; } } assertEquals(mes, expected, actual); }
public void testChangeGaps() throws Exception { // LUCENE-5324: check that it is possible to change the wrapper's gaps final int positionGap = random().nextInt(1000); final int offsetGap = random().nextInt(1000); final Analyzer delegate = new MockAnalyzer(random()); final Analyzer a = new DelegatingAnalyzerWrapper(delegate.getReuseStrategy()) { @Override protected Analyzer getWrappedAnalyzer(String fieldName) { return delegate; } @Override public int getPositionIncrementGap(String fieldName) { return positionGap; } @Override public int getOffsetGap(String fieldName) { return offsetGap; } }; final RandomIndexWriter writer = new RandomIndexWriter(random(), newDirectory(), a); final Document doc = new Document(); final FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.DOCS); ft.setTokenized(true); ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); ft.setStoreTermVectorOffsets(true); doc.add(new Field("f", "a", ft)); doc.add(new Field("f", "a", ft)); writer.addDocument(doc); final LeafReader reader = getOnlySegmentReader(writer.getReader()); final Fields fields = reader.getTermVectors(0); final Terms terms = fields.terms("f"); final TermsEnum te = terms.iterator(); assertEquals(new BytesRef("a"), te.next()); final PostingsEnum dpe = te.postings(null, PostingsEnum.ALL); assertEquals(0, dpe.nextDoc()); assertEquals(2, dpe.freq()); assertEquals(0, dpe.nextPosition()); assertEquals(0, dpe.startOffset()); final int endOffset = dpe.endOffset(); assertEquals(1 + positionGap, dpe.nextPosition()); assertEquals(1 + endOffset + offsetGap, dpe.endOffset()); assertEquals(null, te.next()); reader.close(); writer.close(); writer.w.getDirectory().close(); }
public void testSortedNumeric() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numValues = random().nextInt(5); for (int j = 0; j < numValues; j++) { doc.add( new SortedNumericDocValuesField( "nums", TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE))); } iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); SortedNumericDocValues multi = MultiDocValues.getSortedNumericValues(ir, "nums"); SortedNumericDocValues single = merged.getSortedNumericDocValues("nums"); if (multi == null) { assertNull(single); } else { // check values for (int i = 0; i < numDocs; i++) { single.setDocument(i); ArrayList<Long> expectedList = new ArrayList<>(); for (int j = 0; j < single.count(); j++) { expectedList.add(single.valueAt(j)); } multi.setDocument(i); assertEquals(expectedList.size(), multi.count()); for (int j = 0; j < single.count(); j++) { assertEquals(expectedList.get(j).longValue(), multi.valueAt(j)); } } } ir.close(); ir2.close(); dir.close(); }
InsaneReader(LeafReader in, String insaneField) { super(in); this.insaneField = insaneField; ArrayList<FieldInfo> filteredInfos = new ArrayList<>(); for (FieldInfo fi : in.getFieldInfos()) { if (fi.name.equals(insaneField)) { filteredInfos.add( new FieldInfo( fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(), fi.hasPayloads(), fi.getIndexOptions(), DocValuesType.NONE, -1, Collections.emptyMap(), fi.getPointDimensionCount(), fi.getPointNumBytes())); } else { filteredInfos.add(fi); } } fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()])); }
private FieldLookup loadFieldData(String name) { FieldLookup data = cachedFieldData.get(name); if (data == null) { MappedFieldType fieldType = mapperService.smartNameFieldType(name, types); if (fieldType == null) { throw new IllegalArgumentException( "No field found for [" + name + "] in mapping with types " + Arrays.toString(types) + ""); } data = new FieldLookup(fieldType); cachedFieldData.put(name, data); } if (data.fields() == null) { String fieldName = data.fieldType().names().indexName(); fieldVisitor.reset(fieldName); try { reader.document(docId, fieldVisitor); fieldVisitor.postProcess(data.fieldType()); data.fields( ImmutableMap.of(name, fieldVisitor.fields().get(data.fieldType().names().indexName()))); } catch (IOException e) { throw new ElasticsearchParseException("failed to load field [{}]", e, name); } } return data; }
/** * Find terms in the index based on a prefix. Useful for autocomplete. * * @param index the index * @param fieldName the field * @param prefix the prefix we're looking for (null or empty string for all terms) * @param sensitive match case-sensitively or not? * @param maxResults max. number of results to return (or -1 for all) * @return the matching terms */ public static List<String> findTermsByPrefix( LeafReader index, String fieldName, String prefix, boolean sensitive, int maxResults) { boolean allTerms = prefix == null || prefix.length() == 0; if (allTerms) { prefix = ""; sensitive = true; // don't do unnecessary work in this case } try { if (!sensitive) prefix = StringUtil.removeAccents(prefix).toLowerCase(); org.apache.lucene.index.Terms terms = index.terms(fieldName); List<String> results = new ArrayList<>(); TermsEnum termsEnum = terms.iterator(); BytesRef brPrefix = new BytesRef(prefix.getBytes(LUCENE_DEFAULT_CHARSET)); termsEnum.seekCeil(brPrefix); // find the prefix in the terms list while (maxResults < 0 || results.size() < maxResults) { BytesRef term = termsEnum.next(); if (term == null) break; String termText = term.utf8ToString(); String optDesensitized = termText; if (!sensitive) optDesensitized = StringUtil.removeAccents(termText).toLowerCase(); if (!allTerms && !optDesensitized.substring(0, prefix.length()).equalsIgnoreCase(prefix)) { // Doesn't match prefix or different field; no more matches break; } // Match, add term results.add(termText); } return results; } catch (IOException e) { throw new RuntimeException(e); } }
public void testDocsWithField() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); if (random().nextInt(4) >= 0) { doc.add(new NumericDocValuesField("numbers", random().nextLong())); } doc.add(new NumericDocValuesField("numbersAlways", random().nextLong())); iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); Bits multi = MultiDocValues.getDocsWithField(ir, "numbers"); Bits single = merged.getDocsWithField("numbers"); if (multi == null) { assertNull(single); } else { assertEquals(single.length(), multi.length()); for (int i = 0; i < numDocs; i++) { assertEquals(single.get(i), multi.get(i)); } } multi = MultiDocValues.getDocsWithField(ir, "numbersAlways"); single = merged.getDocsWithField("numbersAlways"); assertEquals(single.length(), multi.length()); for (int i = 0; i < numDocs; i++) { assertEquals(single.get(i), multi.get(i)); } ir.close(); ir2.close(); dir.close(); }
private Set<Uid> getShardDocUIDs(final IndexShard shard) throws IOException { shard.refresh("get_uids"); try (Engine.Searcher searcher = shard.acquireSearcher("test")) { Set<Uid> ids = new HashSet<>(); for (LeafReaderContext leafContext : searcher.reader().leaves()) { LeafReader reader = leafContext.reader(); Bits liveDocs = reader.getLiveDocs(); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs == null || liveDocs.get(i)) { Document uuid = reader.document(i, Collections.singleton(UidFieldMapper.NAME)); ids.add(Uid.createUid(uuid.get(UidFieldMapper.NAME))); } } } return ids; } }
public PostingsEnum randomDocsEnum( String field, BytesRef term, List<LeafReaderContext> readers, Bits bits) throws IOException { if (random().nextInt(10) == 0) { return null; } LeafReader indexReader = readers.get(random().nextInt(readers.size())).reader(); Terms terms = indexReader.terms(field); if (terms == null) { return null; } TermsEnum iterator = terms.iterator(); if (iterator.seekExact(term)) { return iterator.postings( bits, null, random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE); } return null; }
public void testBasics() throws Exception { // sanity check of norms writer // TODO: generalize LeafReader slow = SlowCompositeReaderWrapper.wrap(reader); NumericDocValues fooNorms = slow.getNormValues("foo"); NumericDocValues barNorms = slow.getNormValues("bar"); for (int i = 0; i < slow.maxDoc(); i++) { assertFalse(fooNorms.get(i) == barNorms.get(i)); } // sanity check of searching TopDocs foodocs = searcher.search(new TermQuery(new Term("foo", "brown")), 10); assertTrue(foodocs.totalHits > 0); TopDocs bardocs = searcher.search(new TermQuery(new Term("bar", "brown")), 10); assertTrue(bardocs.totalHits > 0); assertTrue(foodocs.scoreDocs[0].score < bardocs.scoreDocs[0].score); }
public void testDocValues() throws IOException { assertU(adoc("id", "1")); assertU(commit()); try (SolrCore core = h.getCoreInc()) { final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true); final SolrIndexSearcher searcher = searcherRef.get(); try { final LeafReader reader = searcher.getLeafReader(); assertEquals(1, reader.numDocs()); final FieldInfos infos = reader.getFieldInfos(); assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("floatdv").getDocValuesType()); assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("intdv").getDocValuesType()); assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("doubledv").getDocValuesType()); assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("longdv").getDocValuesType()); assertEquals(DocValuesType.SORTED, infos.fieldInfo("stringdv").getDocValuesType()); assertEquals((long) Float.floatToIntBits(1), reader.getNumericDocValues("floatdv").get(0)); assertEquals(2L, reader.getNumericDocValues("intdv").get(0)); assertEquals(Double.doubleToLongBits(3), reader.getNumericDocValues("doubledv").get(0)); assertEquals(4L, reader.getNumericDocValues("longdv").get(0)); final IndexSchema schema = core.getLatestSchema(); final SchemaField floatDv = schema.getField("floatdv"); final SchemaField intDv = schema.getField("intdv"); final SchemaField doubleDv = schema.getField("doubledv"); final SchemaField longDv = schema.getField("longdv"); FunctionValues values = floatDv .getType() .getValueSource(floatDv, null) .getValues(null, searcher.getLeafReader().leaves().get(0)); assertEquals(1f, values.floatVal(0), 0f); assertEquals(1f, values.objectVal(0)); values = intDv .getType() .getValueSource(intDv, null) .getValues(null, searcher.getLeafReader().leaves().get(0)); assertEquals(2, values.intVal(0)); assertEquals(2, values.objectVal(0)); values = doubleDv .getType() .getValueSource(doubleDv, null) .getValues(null, searcher.getLeafReader().leaves().get(0)); assertEquals(3d, values.doubleVal(0), 0d); assertEquals(3d, values.objectVal(0)); values = longDv .getType() .getValueSource(longDv, null) .getValues(null, searcher.getLeafReader().leaves().get(0)); assertEquals(4L, values.longVal(0)); assertEquals(4L, values.objectVal(0)); } finally { searcherRef.decref(); } } }
public void testSorted() throws Exception { Directory dir = newDirectory(); Document doc = new Document(); Field field = new SortedDocValuesField("bytes", new BytesRef()); doc.add(field); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random())); field.setBytesValue(ref); if (random().nextInt(7) == 0) { iw.addDocument(new Document()); } iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes"); SortedDocValues single = merged.getSortedDocValues("bytes"); assertEquals(single.getValueCount(), multi.getValueCount()); for (int i = 0; i < numDocs; i++) { // check ord assertEquals(single.getOrd(i), multi.getOrd(i)); // check value final BytesRef expected = BytesRef.deepCopyOf(single.get(i)); final BytesRef actual = multi.get(i); assertEquals(expected, actual); } ir.close(); ir2.close(); dir.close(); }
@Override public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { final LeafReader reader = readerContext.reader(); final BinaryDocValues docValues = reader.getBinaryDocValues(fieldName); if (docValues == null) return null; return new DoubleDocValues(this) { public BytesRef scratch = null; // TODO why is the value being requested twice per doc?! int lastDoc = -1; double lastVal; @Override public double doubleVal(int doc) { if (doc == lastDoc) return lastVal; // sanity check; shouldn't be necessary if (doc < 0 || doc >= reader.maxDoc()) throw new IllegalStateException("Bad doc " + doc + " for reader " + reader); BytesRef bytes = null; try { // shouldn't be necessary scratch = docValues.get(doc); bytes = scratch; } catch (ArrayIndexOutOfBoundsException e) { if (log.isErrorEnabled()) log.error( "DocValues index corruption for docid " + doc + " reader " + reader); // don't log 'e' } if (bytes != null) lastVal = MultiPointEncoding.calcDistance(point, bytes, ctx); else lastVal = 1; // 1 degree away, 111.2km lastDoc = doc; return lastVal; } }; }
public void testSameFieldAddedMultipleTimes() throws IOException { MemoryIndex mindex = randomMemoryIndex(); MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); mindex.addField("field", "the quick brown fox", mockAnalyzer); mindex.addField("field", "jumps over the", mockAnalyzer); LeafReader reader = (LeafReader) mindex.createSearcher().getIndexReader(); TestUtil.checkReader(reader); assertEquals(7, reader.terms("field").getSumTotalTermFreq()); PhraseQuery query = new PhraseQuery("field", "fox", "jumps"); assertTrue(mindex.search(query) > 0.1); mindex.reset(); mockAnalyzer.setPositionIncrementGap(1 + random().nextInt(10)); mindex.addField("field", "the quick brown fox", mockAnalyzer); mindex.addField("field", "jumps over the", mockAnalyzer); assertEquals(0, mindex.search(query), 0.00001f); query = new PhraseQuery(10, "field", "fox", "jumps"); assertTrue( "posGap" + mockAnalyzer.getPositionIncrementGap("field"), mindex.search(query) > 0.0001); TestUtil.checkReader(mindex.createSearcher().getIndexReader()); }
public void testRefCounts2() throws IOException { Directory dir1 = getDir1(random()); Directory dir2 = getDir2(random()); LeafReader ir1 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir1)); LeafReader ir2 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir2)); // don't close subreaders, so ParallelReader will increment refcounts ParallelLeafReader pr = new ParallelLeafReader(false, ir1, ir2); // check RefCounts assertEquals(2, ir1.getRefCount()); assertEquals(2, ir2.getRefCount()); pr.close(); assertEquals(1, ir1.getRefCount()); assertEquals(1, ir2.getRefCount()); ir1.close(); ir2.close(); assertEquals(0, ir1.getRefCount()); assertEquals(0, ir2.getRefCount()); dir1.close(); dir2.close(); }
/** * Returns total in-heap bytes used by all suggesters. This method has CPU cost <code> * O(numIndexedFields)</code>. * * @param fieldNamePatterns if non-null, any completion field name matching any of these patterns * will break out its in-heap bytes separately in the returned {@link CompletionStats} */ public CompletionStats completionStats(IndexReader indexReader, String... fieldNamePatterns) { CompletionStats completionStats = new CompletionStats(); for (LeafReaderContext atomicReaderContext : indexReader.leaves()) { LeafReader atomicReader = atomicReaderContext.reader(); try { Fields fields = atomicReader.fields(); for (String fieldName : fields) { Terms terms = fields.terms(fieldName); if (terms instanceof CompletionTerms) { CompletionTerms completionTerms = (CompletionTerms) terms; completionStats.add(completionTerms.stats(fieldNamePatterns)); } } } catch (IOException ioe) { logger.error("Could not get completion stats", ioe); } } return completionStats; }
public void testCloseInnerReader() throws Exception { Directory dir1 = getDir1(random()); LeafReader ir1 = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir1)); // with overlapping ParallelLeafReader pr = new ParallelLeafReader(true, new LeafReader[] {ir1}, new LeafReader[] {ir1}); ir1.close(); try { pr.document(0); fail("ParallelLeafReader should be already closed because inner reader was closed!"); } catch (AlreadyClosedException e) { // pass } // noop: pr.close(); dir1.close(); }
public AssertingLeafReader(LeafReader in) { super(in); // check some basic reader sanity assert in.maxDoc() >= 0; assert in.numDocs() <= in.maxDoc(); assert in.numDeletedDocs() + in.numDocs() == in.maxDoc(); assert !in.hasDeletions() || in.numDeletedDocs() > 0 && in.numDocs() < in.maxDoc(); addCoreClosedListener( new CoreClosedListener() { @Override public void onClose(Object ownerCoreCacheKey) throws IOException { final Object expectedKey = getCoreCacheKey(); assert expectedKey == ownerCoreCacheKey : "Core closed listener called on a different key " + expectedKey + " <> " + ownerCoreCacheKey; } }); }
public static DocSet createDocSet(SolrIndexSearcher searcher, Term term) throws IOException { DirectoryReader reader = searcher.getRawReader(); // raw reader to avoid extra wrapping overhead int maxDoc = searcher.getIndexReader().maxDoc(); int smallSetSize = smallSetSize(maxDoc); String field = term.field(); BytesRef termVal = term.bytes(); int maxCount = 0; int firstReader = -1; List<LeafReaderContext> leaves = reader.leaves(); PostingsEnum[] postList = new PostingsEnum [leaves .size()]; // use array for slightly higher scanning cost, but fewer memory // allocations for (LeafReaderContext ctx : leaves) { assert leaves.get(ctx.ord) == ctx; LeafReader r = ctx.reader(); Fields f = r.fields(); Terms t = f.terms(field); if (t == null) continue; // field is missing TermsEnum te = t.iterator(); if (te.seekExact(termVal)) { maxCount += te.docFreq(); postList[ctx.ord] = te.postings(null, PostingsEnum.NONE); if (firstReader < 0) firstReader = ctx.ord; } } if (maxCount == 0) { return DocSet.EMPTY; } if (maxCount <= smallSetSize) { return createSmallSet(leaves, postList, maxCount, firstReader); } return createBigSet(leaves, postList, maxDoc, firstReader); }
public void testDocsEnumStart() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); MemoryIndex memory = new MemoryIndex(random().nextBoolean(), false, random().nextInt(50) * 1024 * 1024); memory.addField("foo", "bar", analyzer); LeafReader reader = (LeafReader) memory.createSearcher().getIndexReader(); TestUtil.checkReader(reader); PostingsEnum disi = TestUtil.docs(random(), reader, "foo", new BytesRef("bar"), null, PostingsEnum.NONE); int docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again TermsEnum te = reader.terms("foo").iterator(); assertTrue(te.seekExact(new BytesRef("bar"))); disi = te.postings(disi, PostingsEnum.NONE); docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); reader.close(); }