예제 #1
0
  public TermInfo collect(String term) throws IOException {
    TermInfo info = new TermInfo();
    BytesRef luceneTerm = new BytesRef(term.getBytes());
    // this gives documents in which the term is found, but no offset information can be retrieved
    PostingsEnum postings =
        MultiFields.getTermDocsEnum(indexReader, ngramInfoFieldname, luceneTerm);
    // now go through each document
    int docId = postings.nextDoc();
    while (docId != PostingsEnum.NO_MORE_DOCS) {
      // get the term vector for that document.
      TermsEnum it = indexReader.getTermVector(docId, ngramInfoFieldname).iterator();
      // find the term of interest
      it.seekExact(luceneTerm);
      // get its posting info. this will contain offset info
      PostingsEnum postingsInDoc = it.postings(null, PostingsEnum.OFFSETS);
      postingsInDoc.nextDoc();

      Document doc = indexReader.document(docId);
      String id = doc.get(idFieldname);
      JATEDocument jd = new JATEDocument(id);
      Set<int[]> offsets = new HashSet<>();
      int totalFreq = postingsInDoc.freq();
      for (int i = 0; i < totalFreq; i++) {
        postingsInDoc.nextPosition();
        offsets.add(new int[] {postingsInDoc.startOffset(), postingsInDoc.endOffset()});
      }
      info.getOffsets().put(jd, offsets);

      docId = postings.nextDoc();
    }

    return info;
  }
 private FieldLookup loadFieldData(String name) {
   FieldLookup data = cachedFieldData.get(name);
   if (data == null) {
     MappedFieldType fieldType = mapperService.smartNameFieldType(name, types);
     if (fieldType == null) {
       throw new IllegalArgumentException(
           "No field found for ["
               + name
               + "] in mapping with types "
               + Arrays.toString(types)
               + "");
     }
     data = new FieldLookup(fieldType);
     cachedFieldData.put(name, data);
   }
   if (data.fields() == null) {
     String fieldName = data.fieldType().names().indexName();
     fieldVisitor.reset(fieldName);
     try {
       reader.document(docId, fieldVisitor);
       fieldVisitor.postProcess(data.fieldType());
       data.fields(
           ImmutableMap.of(name, fieldVisitor.fields().get(data.fieldType().names().indexName())));
     } catch (IOException e) {
       throw new ElasticsearchParseException("failed to load field [{}]", e, name);
     }
   }
   return data;
 }
 private Set<Uid> getShardDocUIDs(final IndexShard shard) throws IOException {
   shard.refresh("get_uids");
   try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
     Set<Uid> ids = new HashSet<>();
     for (LeafReaderContext leafContext : searcher.reader().leaves()) {
       LeafReader reader = leafContext.reader();
       Bits liveDocs = reader.getLiveDocs();
       for (int i = 0; i < reader.maxDoc(); i++) {
         if (liveDocs == null || liveDocs.get(i)) {
           Document uuid = reader.document(i, Collections.singleton(UidFieldMapper.NAME));
           ids.add(Uid.createUid(uuid.get(UidFieldMapper.NAME)));
         }
       }
     }
     return ids;
   }
 }