private void handleFieldWildcards(TermVectorRequest request) { Set<String> fieldNames = new HashSet<>(); for (String pattern : request.selectedFields()) { fieldNames.addAll(indexShard.mapperService().simpleMatchToIndexNames(pattern)); } request.selectedFields(fieldNames.toArray(Strings.EMPTY_ARRAY)); }
private AggregatedDfs getAggregatedDfs(Fields termVectorFields, TermVectorRequest request) throws IOException { DfsOnlyRequest dfsOnlyRequest = new DfsOnlyRequest( termVectorFields, new String[] {request.index()}, new String[] {request.type()}, request.selectedFields()); DfsOnlyResponse response = dfsAction.execute(dfsOnlyRequest).actionGet(); return response.getDfs(); }
private Fields generateTermVectorsFromDoc(TermVectorRequest request, boolean doAllFields) throws IOException { // parse the document, at the moment we do update the mapping, just like percolate ParsedDocument parsedDocument = parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc()); // select the right fields and generate term vectors ParseContext.Document doc = parsedDocument.rootDoc(); Collection<String> seenFields = new HashSet<>(); Collection<GetField> getFields = new HashSet<>(); for (IndexableField field : doc.getFields()) { FieldMapper fieldMapper = indexShard.mapperService().smartNameFieldMapper(field.name()); if (seenFields.contains(field.name())) { continue; } else { seenFields.add(field.name()); } if (!isValidField(fieldMapper)) { continue; } if (request.selectedFields() == null && !doAllFields && !fieldMapper.fieldType().storeTermVectors()) { continue; } if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) { continue; } String[] values = doc.getValues(field.name()); getFields.add(new GetField(field.name(), Arrays.asList((Object[]) values))); } return generateTermVectors(getFields, request.offsets(), request.perFieldAnalyzer()); }
@Override protected MultiTermVectorsShardResponse shardOperation( MultiTermVectorsShardRequest request, int shardId) throws ElasticsearchException { MultiTermVectorsShardResponse response = new MultiTermVectorsShardResponse(); for (int i = 0; i < request.locations.size(); i++) { TermVectorRequest termVectorRequest = request.requests.get(i); try { IndexService indexService = indicesService.indexServiceSafe(request.index()); IndexShard indexShard = indexService.shardSafe(shardId); TermVectorResponse termVectorResponse = indexShard.termVectorService().getTermVector(termVectorRequest); response.add(request.locations.get(i), termVectorResponse); } catch (Throwable t) { if (TransportActions.isShardNotAvailableException(t)) { throw (ElasticsearchException) t; } else { logger.debug( "[{}][{}] failed to execute multi term vectors for [{}]/[{}]", t, request.index(), shardId, termVectorRequest.type(), termVectorRequest.id()); response.add( request.locations.get(i), new MultiTermVectorsResponse.Failure( request.index(), termVectorRequest.type(), termVectorRequest.id(), ExceptionsHelper.detailedMessage(t))); } } } return response; }
/** * Constructs a new term vector request for a document that will be fetch from the provided index. * Use {@link #type(String)} and {@link #id(String)} to specify the document to load. */ public TermVectorRequest(TermVectorRequest other) { super(other.index()); this.id = other.id(); this.type = other.type(); this.flagsEnum = other.getFlags().clone(); this.preference = other.preference(); this.routing = other.routing(); if (other.selectedFields != null) { this.selectedFields = new HashSet<>(other.selectedFields); } }
private Fields addGeneratedTermVectors( Engine.GetResult get, Fields termVectorsByField, TermVectorRequest request, Set<String> selectedFields) throws IOException { /* only keep valid fields */ Set<String> validFields = new HashSet<>(); for (String field : selectedFields) { FieldMapper fieldMapper = indexShard.mapperService().smartNameFieldMapper(field); if (!isValidField(fieldMapper)) { continue; } // already retrieved, only if the analyzer hasn't been overridden at the field if (fieldMapper.fieldType().storeTermVectors() && (request.perFieldAnalyzer() == null || !request.perFieldAnalyzer().containsKey(field))) { continue; } validFields.add(field); } if (validFields.isEmpty()) { return termVectorsByField; } /* generate term vectors from fetched document fields */ GetResult getResult = indexShard .getService() .get( get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false); Fields generatedTermVectors = generateTermVectors( getResult.getFields().values(), request.offsets(), request.perFieldAnalyzer()); /* merge with existing Fields */ if (termVectorsByField == null) { return generatedTermVectors; } else { return mergeFields(termVectorsByField, generatedTermVectors); } }
/** populates a request object (pre-populated with defaults) based on a parser. */ public static void parseRequest(TermVectorRequest termVectorRequest, XContentParser parser) throws IOException { XContentParser.Token token; String currentFieldName = null; List<String> fields = new ArrayList<>(); while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (currentFieldName != null) { if (currentFieldName.equals("fields")) { if (token == XContentParser.Token.START_ARRAY) { while (parser.nextToken() != XContentParser.Token.END_ARRAY) { fields.add(parser.text()); } } else { throw new ElasticsearchParseException( "The parameter fields must be given as an array! Use syntax : \"fields\" : [\"field1\", \"field2\",...]"); } } else if (currentFieldName.equals("offsets")) { termVectorRequest.offsets(parser.booleanValue()); } else if (currentFieldName.equals("positions")) { termVectorRequest.positions(parser.booleanValue()); } else if (currentFieldName.equals("payloads")) { termVectorRequest.payloads(parser.booleanValue()); } else if (currentFieldName.equals("term_statistics") || currentFieldName.equals("termStatistics")) { termVectorRequest.termStatistics(parser.booleanValue()); } else if (currentFieldName.equals("field_statistics") || currentFieldName.equals("fieldStatistics")) { termVectorRequest.fieldStatistics(parser.booleanValue()); } else if ("_index" .equals(currentFieldName)) { // the following is important for multi request parsing. termVectorRequest.index = parser.text(); } else if ("_type".equals(currentFieldName)) { termVectorRequest.type = parser.text(); } else if ("_id".equals(currentFieldName)) { if (termVectorRequest.doc != null) { throw new ElasticsearchParseException( "Either \"id\" or \"doc\" can be specified, but not both!"); } termVectorRequest.id = parser.text(); } else if ("doc".equals(currentFieldName)) { if (termVectorRequest.id != null) { throw new ElasticsearchParseException( "Either \"id\" or \"doc\" can be specified, but not both!"); } termVectorRequest.doc(jsonBuilder().copyCurrentStructure(parser)); } else if ("_routing".equals(currentFieldName) || "routing".equals(currentFieldName)) { termVectorRequest.routing = parser.text(); } else { throw new ElasticsearchParseException( "The parameter " + currentFieldName + " is not valid for term vector request!"); } } } if (fields.size() > 0) { String[] fieldsAsArray = new String[fields.size()]; termVectorRequest.selectedFields(fields.toArray(fieldsAsArray)); } }
public static TermVectorRequest readTermVectorRequest(StreamInput in) throws IOException { TermVectorRequest termVectorRequest = new TermVectorRequest(); termVectorRequest.readFrom(in); return termVectorRequest; }
public TermVectorResponse getTermVector(TermVectorRequest request, String concreteIndex) { final Engine.Searcher searcher = indexShard.acquireSearcher("term_vector"); IndexReader topLevelReader = searcher.reader(); final TermVectorResponse termVectorResponse = new TermVectorResponse(concreteIndex, request.type(), request.id()); final Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id())); Engine.GetResult get = indexShard.get(new Engine.Get(request.realtime(), uidTerm)); boolean docFromTranslog = get.source() != null; AggregatedDfs dfs = null; /* fetched from translog is treated as an artificial document */ if (docFromTranslog) { request.doc(get.source().source, false); termVectorResponse.setDocVersion(get.version()); } /* handle potential wildcards in fields */ if (request.selectedFields() != null) { handleFieldWildcards(request); } try { Fields topLevelFields = MultiFields.getFields(topLevelReader); Versions.DocIdAndVersion docIdAndVersion = get.docIdAndVersion(); /* from an artificial document */ if (request.doc() != null) { Fields termVectorsByField = generateTermVectorsFromDoc(request, !docFromTranslog); // if no document indexed in shard, take the queried document itself for stats if (topLevelFields == null) { topLevelFields = termVectorsByField; } if (termVectorsByField != null && useDfs(request)) { dfs = getAggregatedDfs(termVectorsByField, request); } termVectorResponse.setFields( termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields, dfs); termVectorResponse.setExists(true); termVectorResponse.setArtificial(!docFromTranslog); } /* or from an existing document */ else if (docIdAndVersion != null) { // fields with stored term vectors Fields termVectorsByField = docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId); Set<String> selectedFields = request.selectedFields(); // generate tvs for fields where analyzer is overridden if (selectedFields == null && request.perFieldAnalyzer() != null) { selectedFields = getFieldsToGenerate(request.perFieldAnalyzer(), termVectorsByField); } // fields without term vectors if (selectedFields != null) { termVectorsByField = addGeneratedTermVectors(get, termVectorsByField, request, selectedFields); } if (termVectorsByField != null && useDfs(request)) { dfs = getAggregatedDfs(termVectorsByField, request); } termVectorResponse.setFields( termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields, dfs); termVectorResponse.setDocVersion(docIdAndVersion.version); termVectorResponse.setExists(true); } else { termVectorResponse.setExists(false); } } catch (Throwable ex) { throw new ElasticsearchException("failed to execute term vector request", ex); } finally { searcher.close(); get.release(); } return termVectorResponse; }
private boolean useDfs(TermVectorRequest request) { return request.dfs() && (request.fieldStatistics() || request.termStatistics()); }