/** * Find words for a more-like-this query former. * * @param docNum the id of the lucene document from which to find terms */ private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException { Map<String, Int> termFreqMap = new HashMap<>(); for (String fieldName : fieldNames) { final Fields vectors = ir.getTermVectors(docNum); final Terms vector; if (vectors != null) { vector = vectors.terms(fieldName); } else { vector = null; } // field does not store term vector info if (vector == null) { Document d = ir.document(docNum); IndexableField fields[] = d.getFields(fieldName); for (IndexableField field : fields) { final String stringValue = field.stringValue(); if (stringValue != null) { addTermFrequencies(new FastStringReader(stringValue), termFreqMap, fieldName); } } } else { addTermFrequencies(termFreqMap, vector, fieldName); } } return createQueue(termFreqMap); }
@BeforeClass public static void beforeClass() throws Exception { fieldInfos = new FieldInfos.Builder(); DocHelper.setupDoc(testDoc); for (IndexableField field : testDoc) { fieldInfos.addOrUpdate(field.name(), field.fieldType()); } dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy()); ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false); IndexWriter writer = new IndexWriter(dir, conf); writer.addDocument(testDoc); writer.close(); FaultyIndexInput.doFail = false; }
private Fields generateTermVectorsFromDoc(TermVectorRequest request, boolean doAllFields) throws IOException { // parse the document, at the moment we do update the mapping, just like percolate ParsedDocument parsedDocument = parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc()); // select the right fields and generate term vectors ParseContext.Document doc = parsedDocument.rootDoc(); Collection<String> seenFields = new HashSet<>(); Collection<GetField> getFields = new HashSet<>(); for (IndexableField field : doc.getFields()) { FieldMapper fieldMapper = indexShard.mapperService().smartNameFieldMapper(field.name()); if (seenFields.contains(field.name())) { continue; } else { seenFields.add(field.name()); } if (!isValidField(fieldMapper)) { continue; } if (request.selectedFields() == null && !doAllFields && !fieldMapper.fieldType().storeTermVectors()) { continue; } if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) { continue; } String[] values = doc.getValues(field.name()); getFields.add(new GetField(field.name(), Arrays.asList((Object[]) values))); } return generateTermVectors(getFields, request.offsets(), request.perFieldAnalyzer()); }
@BeforeClass public static void beforeClass() throws Exception { testDoc = new Document(); fieldInfos = new FieldInfos.Builder(); DocHelper.setupDoc(testDoc); for (IndexableField field : testDoc.getFields()) { FieldInfo fieldInfo = fieldInfos.getOrAdd(field.name()); IndexableFieldType ift = field.fieldType(); fieldInfo.setIndexOptions(ift.indexOptions()); if (ift.omitNorms()) { fieldInfo.setOmitsNorms(); } fieldInfo.setDocValuesType(ift.docValuesType()); } dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()); conf.getMergePolicy().setNoCFSRatio(0.0); IndexWriter writer = new IndexWriter(dir, conf); writer.addDocument(testDoc); writer.close(); }