private Fields generateTermVectorsFromDoc(TermVectorRequest request, boolean doAllFields) throws IOException { // parse the document, at the moment we do update the mapping, just like percolate ParsedDocument parsedDocument = parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc()); // select the right fields and generate term vectors ParseContext.Document doc = parsedDocument.rootDoc(); Collection<String> seenFields = new HashSet<>(); Collection<GetField> getFields = new HashSet<>(); for (IndexableField field : doc.getFields()) { FieldMapper fieldMapper = indexShard.mapperService().smartNameFieldMapper(field.name()); if (seenFields.contains(field.name())) { continue; } else { seenFields.add(field.name()); } if (!isValidField(fieldMapper)) { continue; } if (request.selectedFields() == null && !doAllFields && !fieldMapper.fieldType().storeTermVectors()) { continue; } if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) { continue; } String[] values = doc.getValues(field.name()); getFields.add(new GetField(field.name(), Arrays.asList((Object[]) values))); } return generateTermVectors(getFields, request.offsets(), request.perFieldAnalyzer()); }
/** populates a request object (pre-populated with defaults) based on a parser. */ public static void parseRequest(TermVectorRequest termVectorRequest, XContentParser parser) throws IOException { XContentParser.Token token; String currentFieldName = null; List<String> fields = new ArrayList<>(); while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (currentFieldName != null) { if (currentFieldName.equals("fields")) { if (token == XContentParser.Token.START_ARRAY) { while (parser.nextToken() != XContentParser.Token.END_ARRAY) { fields.add(parser.text()); } } else { throw new ElasticsearchParseException( "The parameter fields must be given as an array! Use syntax : \"fields\" : [\"field1\", \"field2\",...]"); } } else if (currentFieldName.equals("offsets")) { termVectorRequest.offsets(parser.booleanValue()); } else if (currentFieldName.equals("positions")) { termVectorRequest.positions(parser.booleanValue()); } else if (currentFieldName.equals("payloads")) { termVectorRequest.payloads(parser.booleanValue()); } else if (currentFieldName.equals("term_statistics") || currentFieldName.equals("termStatistics")) { termVectorRequest.termStatistics(parser.booleanValue()); } else if (currentFieldName.equals("field_statistics") || currentFieldName.equals("fieldStatistics")) { termVectorRequest.fieldStatistics(parser.booleanValue()); } else if ("_index" .equals(currentFieldName)) { // the following is important for multi request parsing. termVectorRequest.index = parser.text(); } else if ("_type".equals(currentFieldName)) { termVectorRequest.type = parser.text(); } else if ("_id".equals(currentFieldName)) { if (termVectorRequest.doc != null) { throw new ElasticsearchParseException( "Either \"id\" or \"doc\" can be specified, but not both!"); } termVectorRequest.id = parser.text(); } else if ("doc".equals(currentFieldName)) { if (termVectorRequest.id != null) { throw new ElasticsearchParseException( "Either \"id\" or \"doc\" can be specified, but not both!"); } termVectorRequest.doc(jsonBuilder().copyCurrentStructure(parser)); } else if ("_routing".equals(currentFieldName) || "routing".equals(currentFieldName)) { termVectorRequest.routing = parser.text(); } else { throw new ElasticsearchParseException( "The parameter " + currentFieldName + " is not valid for term vector request!"); } } } if (fields.size() > 0) { String[] fieldsAsArray = new String[fields.size()]; termVectorRequest.selectedFields(fields.toArray(fieldsAsArray)); } }
private Fields addGeneratedTermVectors( Engine.GetResult get, Fields termVectorsByField, TermVectorRequest request, Set<String> selectedFields) throws IOException { /* only keep valid fields */ Set<String> validFields = new HashSet<>(); for (String field : selectedFields) { FieldMapper fieldMapper = indexShard.mapperService().smartNameFieldMapper(field); if (!isValidField(fieldMapper)) { continue; } // already retrieved, only if the analyzer hasn't been overridden at the field if (fieldMapper.fieldType().storeTermVectors() && (request.perFieldAnalyzer() == null || !request.perFieldAnalyzer().containsKey(field))) { continue; } validFields.add(field); } if (validFields.isEmpty()) { return termVectorsByField; } /* generate term vectors from fetched document fields */ GetResult getResult = indexShard .getService() .get( get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false); Fields generatedTermVectors = generateTermVectors( getResult.getFields().values(), request.offsets(), request.perFieldAnalyzer()); /* merge with existing Fields */ if (termVectorsByField == null) { return generatedTermVectors; } else { return mergeFields(termVectorsByField, generatedTermVectors); } }