public void test() throws Exception { BaseDirectoryWrapper d = newDirectory(); d.setCheckIndexOnClose(false); // we nuke files, but verify the reader still works RandomIndexWriter w = new RandomIndexWriter(random(), d); int numDocs = atLeast(100); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(newField("foo", "bar", TextField.TYPE_NOT_STORED)); w.addDocument(doc); } IndexReader r = w.getReader(); w.commit(); w.close(); for (String fileName : d.listAll()) { try { d.deleteFile(fileName); // may succeed, e.g. if the file is completely read into RAM. } catch (IOException ioe) { // ignore: this means codec (correctly) is holding // the file open } } for (LeafReaderContext cxt : r.leaves()) { TestUtil.checkReader(cxt.reader()); } r.close(); d.close(); }
Query createCandidateQuery(IndexReader indexReader) throws IOException { List<Term> extractedTerms = new ArrayList<>(); // include extractionResultField:failed, because docs with this term have no // extractedTermsField // and otherwise we would fail to return these docs. Docs that failed query term extraction // always need to be verified by MemoryIndex: extractedTerms.add(new Term(extractionResultField.name(), EXTRACTION_FAILED)); LeafReader reader = indexReader.leaves().get(0).reader(); Fields fields = reader.fields(); for (String field : fields) { Terms terms = fields.terms(field); if (terms == null) { continue; } BytesRef fieldBr = new BytesRef(field); TermsEnum tenum = terms.iterator(); for (BytesRef term = tenum.next(); term != null; term = tenum.next()) { BytesRefBuilder builder = new BytesRefBuilder(); builder.append(fieldBr); builder.append(FIELD_VALUE_SEPARATOR); builder.append(term); extractedTerms.add(new Term(queryTermsField.name(), builder.toBytesRef())); } } return new TermsQuery(extractedTerms); }
public static Map<String, Integer> termFrequencies( IndexSearcher indexSearcher, Query documentFilterQuery, String fieldName, String propName, String altName) { try { String luceneField = ComplexFieldUtil.propertyField(fieldName, propName, altName); Weight weight = indexSearcher.createNormalizedWeight(documentFilterQuery, false); Map<String, Integer> freq = new HashMap<>(); IndexReader indexReader = indexSearcher.getIndexReader(); for (LeafReaderContext arc : indexReader.leaves()) { if (weight == null) throw new RuntimeException("weight == null"); if (arc == null) throw new RuntimeException("arc == null"); if (arc.reader() == null) throw new RuntimeException("arc.reader() == null"); Scorer scorer = weight.scorer(arc, arc.reader().getLiveDocs()); if (scorer != null) { while (scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { getFrequenciesFromTermVector( indexReader, scorer.docID() + arc.docBase, luceneField, freq); } } } return freq; } catch (IOException e) { throw ExUtil.wrapRuntimeException(e); } }
private void createIndex( IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException { boolean success = false; final IndexWriter w = new IndexWriter(target, config); try { final List<LeafReaderContext> leaves = reader.leaves(); final IndexReader[] subReaders = new IndexReader[leaves.size()]; int i = 0; for (final LeafReaderContext ctx : leaves) { subReaders[i++] = new DocumentFilteredLeafIndexReader(ctx, preserveFilter, negateFilter); } w.addIndexes(subReaders); success = true; } finally { if (success) { w.close(); } else { IOUtils.closeWhileHandlingException(w); } } }
public void testNormsWithDocValues() throws Exception { MemoryIndex mi = new MemoryIndex(true, true); MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); mi.addField( new BinaryDocValuesField("text", new BytesRef("quick brown fox")), mockAnalyzer, 5f); mi.addField(new TextField("text", "quick brown fox", Field.Store.NO), mockAnalyzer, 5f); LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader(); Document doc = new Document(); doc.add(new BinaryDocValuesField("text", new BytesRef("quick brown fox"))); Field field = new TextField("text", "quick brown fox", Field.Store.NO); field.setBoost(5f); doc.add(field); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer)); writer.addDocument(doc); writer.close(); IndexReader controlIndexReader = DirectoryReader.open(dir); LeafReader controlLeafReader = controlIndexReader.leaves().get(0).reader(); assertEquals( controlLeafReader.getNormValues("text").get(0), leafReader.getNormValues("text").get(0)); controlIndexReader.close(); dir.close(); }
private void getPrefixTerms( ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException { // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment // into one terms // instance, which is very expensive. Therefore I think it is better to iterate over each leaf // individually. List<LeafReaderContext> leaves = reader.leaves(); for (LeafReaderContext leaf : leaves) { Terms _terms = leaf.reader().terms(field); if (_terms == null) { continue; } TermsEnum termsEnum = _terms.iterator(); TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes()); if (TermsEnum.SeekStatus.END == seekStatus) { continue; } for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) { if (!StringHelper.startsWith(term, prefix.bytes())) { break; } terms.add(new Term(field, BytesRef.deepCopyOf(term))); if (terms.size() >= maxExpansions) { return; } } } }
String buildDocumentTitle(AtomicReader inSegmentReader, final DocumentDescriptor inDescriptor) { AtomicReader segmentReader = _indexReader.leaves().get(inDescriptor.segmentNumber).reader(); String author = Attributes.getDocAttribute(segmentReader, inDescriptor.localId, "author"), header = Attributes.getDocAttribute(segmentReader, inDescriptor.localId, "header"), medium = Attributes.getDocAttribute(segmentReader, inDescriptor.localId, "medium"), created = Attributes.getDocAttribute(inSegmentReader, inDescriptor.localId, "grcreated"), publ_year = Attributes.getDocAttribute(inSegmentReader, inDescriptor.localId, "publ_year"), res = author + ". " + header; if ( // gazeta medium.equals("\u0433\u0430\u0437\u0435\u0442\u0430") // zhurnal || medium.equals("\u0436\u0443\u0440\u043d\u0430\u043b") // elektronnoe izdanie || medium.equals( "\u044d\u043b\u0435\u043a\u0442\u0440\u043e\u043d\u043d\u043e\u0435 \u0438\u0437\u0434\u0430\u043d\u0438\u0435")) { String publication = Attributes.getDocAttribute(inSegmentReader, inDescriptor.localId, "publication"); if (created.equals(publ_year)) { res += " // " + publication + ", " + publ_year; } else { res += " (" + created + ") // " + publication + ", " + publ_year; } } else { res += " (" + created + ")"; } return res; }
public static void main(String[] args) throws IOException, InterruptedException { Path indexPath = Paths.get(args[0]); String sparseOrNot = args[1]; boolean sparse; if (sparseOrNot.equals("sparse")) { sparse = true; } else if (sparseOrNot.equals("nonsparse")) { sparse = false; } else { throw new IllegalArgumentException("expected sparse or nonsparse but got: " + sparseOrNot); } Directory dir = FSDirectory.open(indexPath); IndexReader reader = DirectoryReader.open(dir); System.out.println("READER: " + reader); long bytes = 0; for (LeafReaderContext ctx : reader.leaves()) { CodecReader cr = (CodecReader) ctx.reader(); System.out.println("\nREADER: " + cr); for (Accountable acc : cr.getChildResources()) { System.out.println(" " + Accountables.toString(acc)); } bytes += cr.ramBytesUsed(); } System.out.println("HEAP: " + bytes); IndexSearcher searcher = new IndexSearcher(reader); Random random = new Random(17); SearchThread[] threads = new SearchThread[2]; for (int i = 0; i < threads.length; i++) { threads[i] = new SearchThread(i, sparse, searcher, 500, new Random(random.nextLong())); threads[i].start(); } for (SearchThread thread : threads) { thread.join(); } /* SearchThread[] threads = new SearchThread[] {new SearchThread(0, sparse, searcher, 1000, new Random(random.nextLong()))}; threads[0].run(); */ for (SearchThread thread : threads) { for (String line : thread.results) { System.out.println(line); } } IOUtils.close(reader, dir); }
@Test public void testNestedChildrenFilter() throws Exception { int numParentDocs = scaledRandomIntBetween(0, 32); int maxChildDocsPerParent = scaledRandomIntBetween(8, 16); Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); for (int i = 0; i < numParentDocs; i++) { int numChildDocs = scaledRandomIntBetween(0, maxChildDocsPerParent); List<Document> docs = new ArrayList<>(numChildDocs + 1); for (int j = 0; j < numChildDocs; j++) { Document childDoc = new Document(); childDoc.add(new StringField("type", "child", Field.Store.NO)); docs.add(childDoc); } Document parenDoc = new Document(); parenDoc.add(new StringField("type", "parent", Field.Store.NO)); parenDoc.add(new IntField("num_child_docs", numChildDocs, Field.Store.YES)); docs.add(parenDoc); writer.addDocuments(docs); } IndexReader reader = writer.getReader(); writer.close(); IndexSearcher searcher = new IndexSearcher(reader); FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(); BitDocIdSetFilter parentFilter = new BitDocIdSetCachingWrapperFilter( new QueryWrapperFilter(new TermQuery(new Term("type", "parent")))); Filter childFilter = new QueryWrapperFilter(new TermQuery(new Term("type", "child"))); int checkedParents = 0; for (LeafReaderContext leaf : reader.leaves()) { DocIdSetIterator parents = parentFilter.getDocIdSet(leaf).iterator(); for (int parentDoc = parents.nextDoc(); parentDoc != DocIdSetIterator.NO_MORE_DOCS; parentDoc = parents.nextDoc()) { int expectedChildDocs = leaf.reader().document(parentDoc).getField("num_child_docs").numericValue().intValue(); hitContext.reset(null, leaf, parentDoc, searcher); NestedChildrenFilter nestedChildrenFilter = new NestedChildrenFilter(parentFilter, childFilter, hitContext); TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); searcher.search(new ConstantScoreQuery(nestedChildrenFilter), totalHitCountCollector); assertThat(totalHitCountCollector.getTotalHits(), equalTo(expectedChildDocs)); checkedParents++; } } assertThat(checkedParents, equalTo(numParentDocs)); reader.close(); dir.close(); }
public void testCollector() throws IOException { TotalHitCountCollector collector = new TotalHitCountCollector(); ProfileCollector profileCollector = new ProfileCollector(collector); assertEquals(0, profileCollector.getTime()); final LeafCollector leafCollector = profileCollector.getLeafCollector(reader.leaves().get(0)); assertThat(profileCollector.getTime(), greaterThan(0L)); long time = profileCollector.getTime(); leafCollector.setScorer(null); assertThat(profileCollector.getTime(), greaterThan(time)); time = profileCollector.getTime(); leafCollector.collect(0); assertThat(profileCollector.getTime(), greaterThan(time)); }
@Override public void setContext(TransformContext context) { try { IndexReader reader = qparser.getReq().getSearcher().getIndexReader(); readerContexts = reader.leaves(); docValuesArr = new FunctionValues[readerContexts.size()]; searcher = qparser.getReq().getSearcher(); fcontext = ValueSource.newContext(searcher); this.valueSource.createWeight(fcontext, searcher); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } }
@Override public IndexOrdinalsFieldData loadGlobal(IndexReader indexReader) { if (indexReader.leaves().size() <= 1) { // ordinals are already global return this; } try { return cache.load(indexReader, this); } catch (Throwable e) { if (e instanceof ElasticsearchException) { throw (ElasticsearchException) e; } else { throw new ElasticsearchException(e.getMessage(), e); } } }
@Override public Query rewrite(IndexReader reader) throws IOException { if (getBoost() != 1f) { return super.rewrite(reader); } if (this.terms.isEmpty()) { return new MatchNoDocsQuery(); } else if (this.terms.size() == 1) { return newTermQuery(this.terms.get(0), null); } final List<LeafReaderContext> leaves = reader.leaves(); final int maxDoc = reader.maxDoc(); final TermContext[] contextArray = new TermContext[terms.size()]; final Term[] queryTerms = this.terms.toArray(new Term[0]); collectTermContext(reader, leaves, contextArray, queryTerms); return buildQuery(maxDoc, contextArray, queryTerms); }
void createDocumentNode(final DocumentDescriptor inDescriptor) throws IOException { try { _document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); _rootNode = _document.createElement("document"); } catch (ParserConfigurationException e) { e.printStackTrace(); System.exit(1); } AtomicReader segmentReader = _indexReader.leaves().get(inDescriptor.segmentNumber).reader(); _rootNode.setAttribute("id", DocumentIdOperations.documentDescriptorToId(inDescriptor)); // TODO: implement the proper way of building a title from the production report _rootNode.setAttribute("title", buildDocumentTitle(segmentReader, inDescriptor)); _rootNode.setAttribute("path", "ruscorpora.ru"); _rootNode.setAttribute( "tagging", Attributes.getDocAttribute(segmentReader, inDescriptor.localId, "tagging")); _rootNode.setAttribute("snippets", "0"); Element attributesNode = _document.createElement("attributes"); _rootNode.appendChild(attributesNode); FieldInfos fields = segmentReader.getFieldInfos(); for (int fieldIndex = 0; fieldIndex != fields.size(); ++fieldIndex) { FieldInfo field = fields.fieldInfo(fieldIndex); // TODO: understand why field may turn into null if (field == null) { continue; } String name = field.name; if (Attributes.ATTRIBUTES.contains(name) || Attributes.ATTRIBUTES_FOR_REPORT.contains(name) || Attributes.ATTRIBUTES_FOR_WORD_INFO.contains(name) || !field.hasDocValues()) { // it's a word attribute continue; } Element attrNode = _document.createElement("attr"); attrNode.setAttribute("name", name); attrNode.setAttribute( "value", Attributes.getDocAttribute(segmentReader, inDescriptor.localId, name)); attributesNode.appendChild(attrNode); } }
private Query newTermQuery(IndexReader reader, Term term) throws IOException { if (ignoreTF) { return new ConstantScoreQuery(new TermQuery(term)); } else { // we build an artificial TermContext that will give an overall df and ttf // equal to 1 TermContext context = new TermContext(reader.getContext()); for (LeafReaderContext leafContext : reader.leaves()) { Terms terms = leafContext.reader().terms(term.field()); if (terms != null) { TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(term.bytes())) { int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1 context.register(termsEnum.termState(), leafContext.ord, freq, freq); } } } return new TermQuery(term, context); } }
public void initialize(Engine.Searcher docSearcher, ParsedDocument parsedDocument) { this.docSearcher = docSearcher; IndexReader indexReader = docSearcher.reader(); LeafReaderContext atomicReaderContext = indexReader.leaves().get(0); LeafSearchLookup leafLookup = lookup().getLeafSearchLookup(atomicReaderContext); leafLookup.setDocument(0); leafLookup.source().setSource(parsedDocument.source()); Map<String, SearchHitField> fields = new HashMap<>(); for (IndexableField field : parsedDocument.rootDoc().getFields()) { fields.put(field.name(), new InternalSearchHitField(field.name(), Collections.emptyList())); } hitContext() .reset( new InternalSearchHit(0, "unknown", new StringText(parsedDocument.type()), fields), atomicReaderContext, 0, docSearcher.searcher()); }
/** * Returns total in-heap bytes used by all suggesters. This method has CPU cost <code> * O(numIndexedFields)</code>. * * @param fieldNamePatterns if non-null, any completion field name matching any of these patterns * will break out its in-heap bytes separately in the returned {@link CompletionStats} */ public CompletionStats completionStats(IndexReader indexReader, String... fieldNamePatterns) { CompletionStats completionStats = new CompletionStats(); for (LeafReaderContext atomicReaderContext : indexReader.leaves()) { LeafReader atomicReader = atomicReaderContext.reader(); try { Fields fields = atomicReader.fields(); for (String fieldName : fields) { Terms terms = fields.terms(fieldName); if (terms instanceof CompletionTerms) { CompletionTerms completionTerms = (CompletionTerms) terms; completionStats.add(completionTerms.stats(fieldNamePatterns)); } } } catch (IOException ioe) { logger.error("Could not get completion stats", ioe); } } return completionStats; }
public static long getTotalTermFreq(IndexReader reader, Term term) throws Exception { long totalTF = 0L; for (final AtomicReaderContext ctx : reader.leaves()) { AtomicReader r = ctx.reader(); if (!r.hasDeletions()) { // TODO: we could do this up front, during the scan // (next()), instead of after-the-fact here w/ seek, // if the codec supports it and there are no del // docs... final long totTF = r.totalTermFreq(term); if (totTF != -1) { totalTF += totTF; continue; } // otherwise we fall-through } // note: what should we do if field omits freqs? currently it counts as 1... DocsEnum de = r.termDocsEnum(term); if (de != null) { while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) totalTF += de.freq(); } } return totalTF; }
// LUCENE-5644: for first segment, two threads each indexed one doc (likely concurrently), but for // second segment, each thread indexed the // doc NOT at the same time, and should have shared the same thread state / segment public void testSegmentCountOnFlushBasic() throws Exception { Directory dir = newDirectory(); final IndexWriter w = new IndexWriter( dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); final CountDownLatch startingGun = new CountDownLatch(1); final CountDownLatch startDone = new CountDownLatch(2); final CountDownLatch middleGun = new CountDownLatch(1); final CountDownLatch finalGun = new CountDownLatch(1); Thread[] threads = new Thread[2]; for (int i = 0; i < threads.length; i++) { final int threadID = i; threads[i] = new Thread() { @Override public void run() { try { startingGun.await(); Document doc = new Document(); doc.add(newTextField("field", "here is some text", Field.Store.NO)); w.addDocument(doc); startDone.countDown(); middleGun.await(); if (threadID == 0) { w.addDocument(doc); } else { finalGun.await(); w.addDocument(doc); } } catch (Exception e) { throw new RuntimeException(e); } } }; threads[i].start(); } startingGun.countDown(); startDone.await(); IndexReader r = DirectoryReader.open(w, true); assertEquals(2, r.numDocs()); int numSegments = r.leaves().size(); // 1 segment if the threads ran sequentially, else 2: assertTrue(numSegments <= 2); r.close(); middleGun.countDown(); threads[0].join(); finalGun.countDown(); threads[1].join(); r = DirectoryReader.open(w, true); assertEquals(4, r.numDocs()); // Both threads should have shared a single thread state since they did not try to index // concurrently: assertEquals(1 + numSegments, r.leaves().size()); r.close(); w.close(); dir.close(); }
public void testDocValuesMemoryIndexVsNormalIndex() throws Exception { Document doc = new Document(); long randomLong = random().nextLong(); doc.add(new NumericDocValuesField("numeric", randomLong)); if (random().nextBoolean()) { doc.add(new LegacyLongField("numeric", randomLong, Field.Store.NO)); } int numValues = atLeast(5); for (int i = 0; i < numValues; i++) { randomLong = random().nextLong(); doc.add(new SortedNumericDocValuesField("sorted_numeric", randomLong)); if (random().nextBoolean()) { // randomly duplicate field/value doc.add(new SortedNumericDocValuesField("sorted_numeric", randomLong)); } if (random().nextBoolean()) { doc.add(new LegacyLongField("numeric", randomLong, Field.Store.NO)); } } BytesRef randomTerm = new BytesRef(randomTerm()); doc.add(new BinaryDocValuesField("binary", randomTerm)); if (random().nextBoolean()) { doc.add(new StringField("binary", randomTerm, Field.Store.NO)); } randomTerm = new BytesRef(randomTerm()); doc.add(new SortedDocValuesField("sorted", randomTerm)); if (random().nextBoolean()) { doc.add(new StringField("sorted", randomTerm, Field.Store.NO)); } numValues = atLeast(5); for (int i = 0; i < numValues; i++) { randomTerm = new BytesRef(randomTerm()); doc.add(new SortedSetDocValuesField("sorted_set", randomTerm)); if (random().nextBoolean()) { // randomly duplicate field/value doc.add(new SortedSetDocValuesField("sorted_set", randomTerm)); } if (random().nextBoolean()) { // randomily just add a normal string field doc.add(new StringField("sorted_set", randomTerm, Field.Store.NO)); } } MockAnalyzer mockAnalyzer = new MockAnalyzer(random()); MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, mockAnalyzer); IndexReader indexReader = memoryIndex.createSearcher().getIndexReader(); LeafReader leafReader = indexReader.leaves().get(0).reader(); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer)); writer.addDocument(doc); writer.close(); IndexReader controlIndexReader = DirectoryReader.open(dir); LeafReader controlLeafReader = controlIndexReader.leaves().get(0).reader(); NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric"); NumericDocValues controlNumericDocValues = controlLeafReader.getNumericDocValues("numeric"); assertEquals(controlNumericDocValues.get(0), numericDocValues.get(0)); SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric"); sortedNumericDocValues.setDocument(0); SortedNumericDocValues controlSortedNumericDocValues = controlLeafReader.getSortedNumericDocValues("sorted_numeric"); controlSortedNumericDocValues.setDocument(0); assertEquals(controlSortedNumericDocValues.count(), sortedNumericDocValues.count()); for (int i = 0; i < controlSortedNumericDocValues.count(); i++) { assertEquals(controlSortedNumericDocValues.valueAt(i), sortedNumericDocValues.valueAt(i)); } BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary"); BinaryDocValues controlBinaryDocValues = controlLeafReader.getBinaryDocValues("binary"); assertEquals(controlBinaryDocValues.get(0), binaryDocValues.get(0)); SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted"); SortedDocValues controlSortedDocValues = controlLeafReader.getSortedDocValues("sorted"); assertEquals(controlSortedDocValues.getValueCount(), sortedDocValues.getValueCount()); assertEquals(controlSortedDocValues.get(0), sortedDocValues.get(0)); assertEquals(controlSortedDocValues.getOrd(0), sortedDocValues.getOrd(0)); assertEquals(controlSortedDocValues.lookupOrd(0), sortedDocValues.lookupOrd(0)); SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set"); sortedSetDocValues.setDocument(0); SortedSetDocValues controlSortedSetDocValues = controlLeafReader.getSortedSetDocValues("sorted_set"); controlSortedSetDocValues.setDocument(0); assertEquals(controlSortedSetDocValues.getValueCount(), sortedSetDocValues.getValueCount()); for (long controlOrd = controlSortedSetDocValues.nextOrd(); controlOrd != SortedSetDocValues.NO_MORE_ORDS; controlOrd = controlSortedSetDocValues.nextOrd()) { assertEquals(controlOrd, sortedSetDocValues.nextOrd()); assertEquals( controlSortedSetDocValues.lookupOrd(controlOrd), sortedSetDocValues.lookupOrd(controlOrd)); } assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd()); indexReader.close(); controlIndexReader.close(); dir.close(); }
@Override protected Suggest.Suggestion< ? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> innerExecute( String name, CompletionSuggestionContext suggestionContext, IndexReader indexReader, CharsRef spare) throws IOException { if (suggestionContext.mapper() == null || !(suggestionContext.mapper() instanceof CompletionFieldMapper)) { throw new ElasticsearchException( "Field [" + suggestionContext.getField() + "] is not a completion suggest field"); } CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize()); UnicodeUtil.UTF8toUTF16(suggestionContext.getText(), spare); CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(new StringText(spare.toString()), 0, spare.length()); completionSuggestion.addTerm(completionSuggestEntry); String fieldName = suggestionContext.getField(); Map<String, CompletionSuggestion.Entry.Option> results = Maps.newHashMapWithExpectedSize(indexReader.leaves().size() * suggestionContext.getSize()); for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) { AtomicReader atomicReader = atomicReaderContext.reader(); Terms terms = atomicReader.fields().terms(fieldName); if (terms instanceof Completion090PostingsFormat.CompletionTerms) { final Completion090PostingsFormat.CompletionTerms lookupTerms = (Completion090PostingsFormat.CompletionTerms) terms; final Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), suggestionContext); if (lookup == null) { // we don't have a lookup for this segment.. this might be possible if a merge dropped all // docs from the segment that had a value in this segment. continue; } List<Lookup.LookupResult> lookupResults = lookup.lookup(spare, false, suggestionContext.getSize()); for (Lookup.LookupResult res : lookupResults) { final String key = res.key.toString(); final float score = res.value; final Option value = results.get(key); if (value == null) { final Option option = new CompletionSuggestion.Entry.Option( new StringText(key), score, res.payload == null ? null : new BytesArray(res.payload)); results.put(key, option); } else if (value.getScore() < score) { value.setScore(score); value.setPayload(res.payload == null ? null : new BytesArray(res.payload)); } } } } final List<CompletionSuggestion.Entry.Option> options = new ArrayList<CompletionSuggestion.Entry.Option>(results.values()); CollectionUtil.introSort(options, scoreComparator); int optionCount = Math.min(suggestionContext.getSize(), options.size()); for (int i = 0; i < optionCount; i++) { completionSuggestEntry.addOption(options.get(i)); } return completionSuggestion; }
@Override public IndexFieldData.WithOrdinals build( final IndexReader indexReader, IndexFieldData.WithOrdinals indexFieldData, Settings settings, CircuitBreakerService breakerService) throws IOException { assert indexReader.leaves().size() > 1; long startTime = System.currentTimeMillis(); // It makes sense to make the overhead ratio configurable for the mapping from segment ords to // global ords // However, other mappings are never the bottleneck and only used to get the original value from // an ord, so // it makes sense to force COMPACT for them final float acceptableOverheadRatio = settings.getAsFloat("acceptable_overhead_ratio", PackedInts.FAST); final AppendingPackedLongBuffer globalOrdToFirstSegment = new AppendingPackedLongBuffer(PackedInts.COMPACT); final MonotonicAppendingLongBuffer globalOrdToFirstSegmentDelta = new MonotonicAppendingLongBuffer(PackedInts.COMPACT); FieldDataType fieldDataType = indexFieldData.getFieldDataType(); int defaultThreshold = settings.getAsInt( ORDINAL_MAPPING_THRESHOLD_INDEX_SETTING_KEY, ORDINAL_MAPPING_THRESHOLD_DEFAULT); int threshold = fieldDataType.getSettings().getAsInt(ORDINAL_MAPPING_THRESHOLD_KEY, defaultThreshold); OrdinalMappingSourceBuilder ordinalMappingBuilder = new OrdinalMappingSourceBuilder( indexReader.leaves().size(), acceptableOverheadRatio, threshold); long currentGlobalOrdinal = 0; final AtomicFieldData.WithOrdinals[] withOrdinals = new AtomicFieldData.WithOrdinals[indexReader.leaves().size()]; TermIterator termIterator = new TermIterator(indexFieldData, indexReader.leaves(), withOrdinals); for (BytesRef term = termIterator.next(); term != null; term = termIterator.next()) { globalOrdToFirstSegment.add(termIterator.firstReaderIndex()); long globalOrdinalDelta = currentGlobalOrdinal - termIterator.firstLocalOrdinal(); globalOrdToFirstSegmentDelta.add(globalOrdinalDelta); for (TermIterator.LeafSource leafSource : termIterator.competitiveLeafs()) { ordinalMappingBuilder.onOrdinal( leafSource.context.ord, leafSource.tenum.ord(), currentGlobalOrdinal); } currentGlobalOrdinal++; } // ram used for the globalOrd to segmentOrd and segmentOrd to firstReaderIndex lookups long memorySizeInBytesCounter = 0; globalOrdToFirstSegment.freeze(); memorySizeInBytesCounter += globalOrdToFirstSegment.ramBytesUsed(); globalOrdToFirstSegmentDelta.freeze(); memorySizeInBytesCounter += globalOrdToFirstSegmentDelta.ramBytesUsed(); final long maxOrd = currentGlobalOrdinal; OrdinalMappingSource[] segmentOrdToGlobalOrdLookups = ordinalMappingBuilder.build(maxOrd); // add ram used for the main segmentOrd to globalOrd lookups memorySizeInBytesCounter += ordinalMappingBuilder.getMemorySizeInBytes(); final long memorySizeInBytes = memorySizeInBytesCounter; breakerService.getBreaker().addWithoutBreaking(memorySizeInBytes); if (logger.isDebugEnabled()) { // this does include the [] from the array in the impl name String implName = segmentOrdToGlobalOrdLookups.getClass().getSimpleName(); logger.debug( "Global-ordinals[{}][{}][{}] took {} ms", implName, indexFieldData.getFieldNames().fullName(), maxOrd, (System.currentTimeMillis() - startTime)); } return new InternalGlobalOrdinalsIndexFieldData( indexFieldData.index(), settings, indexFieldData.getFieldNames(), fieldDataType, withOrdinals, globalOrdToFirstSegment, globalOrdToFirstSegmentDelta, segmentOrdToGlobalOrdLookups, memorySizeInBytes); }
@Override public void execute(String[] args, PrintStream out) throws Exception { String field = null; String termVal = null; try { field = args[0]; } catch (Exception e) { field = null; } if (field != null) { String[] parts = field.split(":"); if (parts.length > 1) { field = parts[0]; termVal = parts[1]; } } if (field == null || termVal == null) { out.println("usage: field:term"); out.flush(); return; } IndexReader reader = ctx.getIndexReader(); List<AtomicReaderContext> leaves = reader.leaves(); int docBase = 0; int numPerPage = 20; for (AtomicReaderContext leaf : leaves) { AtomicReader atomicReader = leaf.reader(); Terms terms = atomicReader.terms(field); if (terms == null) { continue; } boolean hasPositions = terms.hasPositions(); if (terms != null && termVal != null) { TermsEnum te = terms.iterator(null); int count = 0; if (te.seekExact(new BytesRef(termVal), true)) { if (hasPositions) { DocsAndPositionsEnum iter = te.docsAndPositions(atomicReader.getLiveDocs(), null); int docid; while ((docid = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { count++; out.print("docid: " + (docid + docBase) + ", freq: " + iter.freq() + ", "); for (int i = 0; i < iter.freq(); ++i) { out.print("pos " + i + ": " + iter.nextPosition()); BytesRef payload = iter.getPayload(); if (payload != null) { out.print(",payload: " + payload); } out.print(";"); } out.println(); if (ctx.isInteractiveMode()) { if (count % numPerPage == 0) { out.println("Ctrl-D to break"); int ch = System.in.read(); if (ch == -1) { out.flush(); return; } } } } } else { DocsEnum iter = te.docs(atomicReader.getLiveDocs(), null); int docid; while ((docid = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { count++; out.println("docid: " + (docid + docBase)); if (ctx.isInteractiveMode()) { if (count % numPerPage == 0) { out.println("Ctrl-D to break"); int ch = System.in.read(); if (ch == -1) { out.flush(); return; } } } } } } } docBase += atomicReader.maxDoc(); } }