/**
   * Find words for a more-like-this query former.
   *
   * @param docNum the id of the lucene document from which to find terms
   */
  private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
    Map<String, Int> termFreqMap = new HashMap<>();
    for (String fieldName : fieldNames) {
      final Fields vectors = ir.getTermVectors(docNum);
      final Terms vector;
      if (vectors != null) {
        vector = vectors.terms(fieldName);
      } else {
        vector = null;
      }

      // field does not store term vector info
      if (vector == null) {
        Document d = ir.document(docNum);
        IndexableField fields[] = d.getFields(fieldName);
        for (IndexableField field : fields) {
          final String stringValue = field.stringValue();
          if (stringValue != null) {
            addTermFrequencies(new FastStringReader(stringValue), termFreqMap, fieldName);
          }
        }
      } else {
        addTermFrequencies(termFreqMap, vector, fieldName);
      }
    }

    return createQueue(termFreqMap);
  }
示例#2
0
 @BeforeClass
 public static void beforeClass() throws Exception {
   fieldInfos = new FieldInfos.Builder();
   DocHelper.setupDoc(testDoc);
   for (IndexableField field : testDoc) {
     fieldInfos.addOrUpdate(field.name(), field.fieldType());
   }
   dir = newDirectory();
   IndexWriterConfig conf =
       newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
           .setMergePolicy(newLogMergePolicy());
   ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false);
   IndexWriter writer = new IndexWriter(dir, conf);
   writer.addDocument(testDoc);
   writer.close();
   FaultyIndexInput.doFail = false;
 }
  private Fields generateTermVectorsFromDoc(TermVectorRequest request, boolean doAllFields)
      throws IOException {
    // parse the document, at the moment we do update the mapping, just like percolate
    ParsedDocument parsedDocument =
        parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc());

    // select the right fields and generate term vectors
    ParseContext.Document doc = parsedDocument.rootDoc();
    Collection<String> seenFields = new HashSet<>();
    Collection<GetField> getFields = new HashSet<>();
    for (IndexableField field : doc.getFields()) {
      FieldMapper fieldMapper = indexShard.mapperService().smartNameFieldMapper(field.name());
      if (seenFields.contains(field.name())) {
        continue;
      } else {
        seenFields.add(field.name());
      }
      if (!isValidField(fieldMapper)) {
        continue;
      }
      if (request.selectedFields() == null
          && !doAllFields
          && !fieldMapper.fieldType().storeTermVectors()) {
        continue;
      }
      if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) {
        continue;
      }
      String[] values = doc.getValues(field.name());
      getFields.add(new GetField(field.name(), Arrays.asList((Object[]) values)));
    }
    return generateTermVectors(getFields, request.offsets(), request.perFieldAnalyzer());
  }
 @BeforeClass
 public static void beforeClass() throws Exception {
   testDoc = new Document();
   fieldInfos = new FieldInfos.Builder();
   DocHelper.setupDoc(testDoc);
   for (IndexableField field : testDoc.getFields()) {
     FieldInfo fieldInfo = fieldInfos.getOrAdd(field.name());
     IndexableFieldType ift = field.fieldType();
     fieldInfo.setIndexOptions(ift.indexOptions());
     if (ift.omitNorms()) {
       fieldInfo.setOmitsNorms();
     }
     fieldInfo.setDocValuesType(ift.docValuesType());
   }
   dir = newDirectory();
   IndexWriterConfig conf =
       newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy());
   conf.getMergePolicy().setNoCFSRatio(0.0);
   IndexWriter writer = new IndexWriter(dir, conf);
   writer.addDocument(testDoc);
   writer.close();
 }