public TermInfo collect(String term) throws IOException { TermInfo info = new TermInfo(); BytesRef luceneTerm = new BytesRef(term.getBytes()); // this gives documents in which the term is found, but no offset information can be retrieved PostingsEnum postings = MultiFields.getTermDocsEnum(indexReader, ngramInfoFieldname, luceneTerm); // now go through each document int docId = postings.nextDoc(); while (docId != PostingsEnum.NO_MORE_DOCS) { // get the term vector for that document. TermsEnum it = indexReader.getTermVector(docId, ngramInfoFieldname).iterator(); // find the term of interest it.seekExact(luceneTerm); // get its posting info. this will contain offset info PostingsEnum postingsInDoc = it.postings(null, PostingsEnum.OFFSETS); postingsInDoc.nextDoc(); Document doc = indexReader.document(docId); String id = doc.get(idFieldname); JATEDocument jd = new JATEDocument(id); Set<int[]> offsets = new HashSet<>(); int totalFreq = postingsInDoc.freq(); for (int i = 0; i < totalFreq; i++) { postingsInDoc.nextPosition(); offsets.add(new int[] {postingsInDoc.startOffset(), postingsInDoc.endOffset()}); } info.getOffsets().put(jd, offsets); docId = postings.nextDoc(); } return info; }
private FieldLookup loadFieldData(String name) { FieldLookup data = cachedFieldData.get(name); if (data == null) { MappedFieldType fieldType = mapperService.smartNameFieldType(name, types); if (fieldType == null) { throw new IllegalArgumentException( "No field found for [" + name + "] in mapping with types " + Arrays.toString(types) + ""); } data = new FieldLookup(fieldType); cachedFieldData.put(name, data); } if (data.fields() == null) { String fieldName = data.fieldType().names().indexName(); fieldVisitor.reset(fieldName); try { reader.document(docId, fieldVisitor); fieldVisitor.postProcess(data.fieldType()); data.fields( ImmutableMap.of(name, fieldVisitor.fields().get(data.fieldType().names().indexName()))); } catch (IOException e) { throw new ElasticsearchParseException("failed to load field [{}]", e, name); } } return data; }
private Set<Uid> getShardDocUIDs(final IndexShard shard) throws IOException { shard.refresh("get_uids"); try (Engine.Searcher searcher = shard.acquireSearcher("test")) { Set<Uid> ids = new HashSet<>(); for (LeafReaderContext leafContext : searcher.reader().leaves()) { LeafReader reader = leafContext.reader(); Bits liveDocs = reader.getLiveDocs(); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs == null || liveDocs.get(i)) { Document uuid = reader.document(i, Collections.singleton(UidFieldMapper.NAME)); ids.add(Uid.createUid(uuid.get(UidFieldMapper.NAME))); } } } return ids; } }