@Override public void hitExecute(SearchContext context, HitContext hitContext) { if (context.getFetchSubPhaseContext(CONTEXT_FACTORY).hitExecutionNeeded() == false) { return; } String field = context.getFetchSubPhaseContext(CONTEXT_FACTORY).getField(); if (hitContext.hit().fieldsOrNull() == null) { hitContext.hit().fields(new HashMap<>()); } SearchHitField hitField = hitContext.hit().fields().get(NAMES[0]); if (hitField == null) { hitField = new InternalSearchHitField(NAMES[0], new ArrayList<>(1)); hitContext.hit().fields().put(NAMES[0], hitField); } TermVectorsResponse termVector = TermVectorsService.getTermVectors( context.indexShard(), new TermVectorsRequest( context.indexShard().shardId().getIndex().getName(), hitContext.hit().type(), hitContext.hit().id())); try { Map<String, Integer> tv = new HashMap<>(); TermsEnum terms = termVector.getFields().terms(field).iterator(); BytesRef term; while ((term = terms.next()) != null) { tv.put(term.utf8ToString(), terms.postings(null, PostingsEnum.ALL).freq()); } hitField.values().add(tv); } catch (IOException e) { ESLoggerFactory.getLogger(FetchSubPhasePluginIT.class.getName()) .info("Swallowed exception", e); } }
public void testGetTermVector() throws IOException { createIndexWithAlias(); assertAcked( client() .admin() .indices() .preparePutMapping("test") .setType("type1") .setSource("field", "type=text,term_vector=with_positions_offsets_payloads") .get()); ensureYellow("test"); client() .prepareIndex(indexOrAlias(), "type1", "1") .setSource("field", "the quick brown fox jumps over the lazy dog") .get(); refresh(); TermVectorsResponse termVectorsResponse = client().prepareTermVectors(indexOrAlias(), "type1", "1").get(); assertThat(termVectorsResponse.getIndex(), equalTo("test")); assertThat(termVectorsResponse.isExists(), equalTo(true)); Fields fields = termVectorsResponse.getFields(); assertThat(fields.size(), equalTo(1)); assertThat(fields.terms("field").size(), equalTo(8L)); }
private void checkAllInfo( int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset, int i) throws IOException { TermVectorsRequestBuilder resp = client() .prepareTermVectors("test", "type1", Integer.toString(i)) .setPayloads(true) .setOffsets(true) .setPositions(true) .setFieldStatistics(true) .setTermStatistics(true) .setSelectedFields(); assertThat(resp.request().fieldStatistics(), equalTo(true)); TermVectorsResponse response = resp.execute().actionGet(); assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(1)); Terms terms = fields.terms("field"); assertThat(terms.size(), equalTo(8l)); assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs))); assertThat(terms.getDocCount(), Matchers.equalTo(numDocs)); assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length)); TermsEnum iterator = terms.iterator(); for (int j = 0; j < values.length; j++) { String string = values[j]; BytesRef next = iterator.next(); assertThat(next, Matchers.notNullValue()); assertThat("expected " + string, string, equalTo(next.utf8ToString())); assertThat(next, Matchers.notNullValue()); if (string.equals("the")) { assertThat( "expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq())); } else { assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq())); } PostingsEnum docsAndPositions = iterator.postings(null, null, PostingsEnum.ALL); assertThat(docsAndPositions.nextDoc(), equalTo(0)); assertThat(freq[j], equalTo(docsAndPositions.freq())); assertThat(iterator.docFreq(), equalTo(numDocs)); int[] termPos = pos[j]; int[] termStartOffset = startOffset[j]; int[] termEndOffset = endOffset[j]; assertThat(termPos.length, equalTo(freq[j])); assertThat(termStartOffset.length, equalTo(freq[j])); assertThat(termEndOffset.length, equalTo(freq[j])); for (int k = 0; k < freq[j]; k++) { int nextPosition = docsAndPositions.nextPosition(); assertThat("term: " + string, nextPosition, equalTo(termPos[k])); assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k])); assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k])); assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word"))); } } assertThat(iterator.next(), Matchers.nullValue()); XContentBuilder xBuilder = XContentFactory.jsonBuilder(); xBuilder.startObject(); response.toXContent(xBuilder, ToXContent.EMPTY_PARAMS); xBuilder.endObject(); BytesStream bytesStream = xBuilder.bytesStream(); String utf8 = bytesStream.bytes().toUtf8().replaceFirst("\"took\":\\d+,", ""); ; String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}"; assertThat(utf8, equalTo(expectedString)); }