@Override public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException { XContentParser parser = parseContext.parser(); MoreLikeThisQuery mltQuery = new MoreLikeThisQuery(); mltQuery.setSimilarity(parseContext.searchSimilarity()); Analyzer analyzer = null; List<String> moreLikeFields = null; boolean failOnUnsupportedField = true; String queryName = null; boolean include = false; XContentParser.Token token; String currentFieldName = null; List<String> likeTexts = new ArrayList<>(); MultiTermVectorsRequest likeItems = new MultiTermVectorsRequest(); List<String> unlikeTexts = new ArrayList<>(); MultiTermVectorsRequest unlikeItems = new MultiTermVectorsRequest(); while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token.isValue()) { if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.LIKE_TEXT)) { likeTexts.add(parser.text()); } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.LIKE)) { parseLikeField(parser, likeTexts, likeItems); } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.UNLIKE)) { parseLikeField(parser, unlikeTexts, unlikeItems); } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MIN_TERM_FREQ)) { mltQuery.setMinTermFrequency(parser.intValue()); } else if (parseContext .parseFieldMatcher() .match(currentFieldName, Fields.MAX_QUERY_TERMS)) { mltQuery.setMaxQueryTerms(parser.intValue()); } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MIN_DOC_FREQ)) { mltQuery.setMinDocFreq(parser.intValue()); } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MAX_DOC_FREQ)) { mltQuery.setMaxDocFreq(parser.intValue()); } else if (parseContext .parseFieldMatcher() .match(currentFieldName, Fields.MIN_WORD_LENGTH)) { mltQuery.setMinWordLen(parser.intValue()); } else if (parseContext .parseFieldMatcher() .match(currentFieldName, Fields.MAX_WORD_LENGTH)) { mltQuery.setMaxWordLen(parser.intValue()); } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.BOOST_TERMS)) { float boostFactor = parser.floatValue(); if (boostFactor != 0) { mltQuery.setBoostTerms(true); mltQuery.setBoostTermsFactor(boostFactor); } } else if (parseContext .parseFieldMatcher() .match(currentFieldName, Fields.MINIMUM_SHOULD_MATCH)) { mltQuery.setMinimumShouldMatch(parser.text()); } else if ("analyzer".equals(currentFieldName)) { analyzer = parseContext.analysisService().analyzer(parser.text()); } else if ("boost".equals(currentFieldName)) { mltQuery.setBoost(parser.floatValue()); } else if (parseContext .parseFieldMatcher() .match(currentFieldName, Fields.FAIL_ON_UNSUPPORTED_FIELD)) { failOnUnsupportedField = parser.booleanValue(); } else if ("_name".equals(currentFieldName)) { queryName = parser.text(); } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.INCLUDE)) { include = parser.booleanValue(); } else { throw new QueryParsingException( parseContext, "[mlt] query does not support [" + currentFieldName + "]"); } } else if (token == XContentParser.Token.START_ARRAY) { if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.STOP_WORDS)) { Set<String> stopWords = Sets.newHashSet(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { stopWords.add(parser.text()); } mltQuery.setStopWords(stopWords); } else if ("fields".equals(currentFieldName)) { moreLikeFields = new LinkedList<>(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { String field = parser.text(); MappedFieldType fieldType = parseContext.fieldMapper(field); moreLikeFields.add(fieldType == null ? field : fieldType.names().indexName()); } } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.DOCUMENT_IDS)) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { if (!token.isValue()) { throw new IllegalArgumentException("ids array element should only contain ids"); } likeItems.add(newTermVectorsRequest().id(parser.text())); } } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.DOCUMENTS)) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { if (token != XContentParser.Token.START_OBJECT) { throw new IllegalArgumentException("docs array element should include an object"); } likeItems.add(parseDocument(parser)); } } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.LIKE)) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { parseLikeField(parser, likeTexts, likeItems); } } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.UNLIKE)) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { parseLikeField(parser, unlikeTexts, unlikeItems); } } else { throw new QueryParsingException( parseContext, "[mlt] query does not support [" + currentFieldName + "]"); } } else if (token == XContentParser.Token.START_OBJECT) { if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.LIKE)) { parseLikeField(parser, likeTexts, likeItems); } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.UNLIKE)) { parseLikeField(parser, unlikeTexts, unlikeItems); } else { throw new QueryParsingException( parseContext, "[mlt] query does not support [" + currentFieldName + "]"); } } } if (likeTexts.isEmpty() && likeItems.isEmpty()) { throw new QueryParsingException( parseContext, "more_like_this requires 'like' to be specified"); } if (moreLikeFields != null && moreLikeFields.isEmpty()) { throw new QueryParsingException( parseContext, "more_like_this requires 'fields' to be non-empty"); } // set analyzer if (analyzer == null) { analyzer = parseContext.mapperService().searchAnalyzer(); } mltQuery.setAnalyzer(analyzer); // set like text fields boolean useDefaultField = (moreLikeFields == null); if (useDefaultField) { moreLikeFields = Collections.singletonList(parseContext.defaultField()); } // possibly remove unsupported fields removeUnsupportedFields(moreLikeFields, analyzer, failOnUnsupportedField); if (moreLikeFields.isEmpty()) { return null; } mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY)); // support for named query if (queryName != null) { parseContext.addNamedQuery(queryName, mltQuery); } // handle like texts if (!likeTexts.isEmpty()) { mltQuery.setLikeText(likeTexts); } if (!unlikeTexts.isEmpty()) { mltQuery.setIgnoreText(unlikeTexts); } // handle items if (!likeItems.isEmpty()) { // set default index, type and fields if not specified MultiTermVectorsRequest items = likeItems; for (TermVectorsRequest item : unlikeItems) { items.add(item); } for (TermVectorsRequest item : items) { if (item.index() == null) { item.index(parseContext.index().name()); } if (item.type() == null) { if (parseContext.queryTypes().size() > 1) { throw new QueryParsingException( parseContext, "ambiguous type for item with id: " + item.id() + " and index: " + item.index()); } else { item.type(parseContext.queryTypes().iterator().next()); } } // default fields if not present but don't override for artificial docs if (item.selectedFields() == null && item.doc() == null) { if (useDefaultField) { item.selectedFields("*"); } else { item.selectedFields(moreLikeFields.toArray(new String[moreLikeFields.size()])); } } } // fetching the items with multi-termvectors API items.copyContextAndHeadersFrom(SearchContext.current()); MultiTermVectorsResponse responses = fetchService.fetchResponse(items); // getting the Fields for liked items mltQuery.setLikeText(MoreLikeThisFetchService.getFields(responses, likeItems)); // getting the Fields for ignored items if (!unlikeItems.isEmpty()) { org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, unlikeItems); if (ignoreFields.length > 0) { mltQuery.setUnlikeText(ignoreFields); } } BooleanQuery.Builder boolQuery = new BooleanQuery.Builder(); boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD); // exclude the items from the search if (!include) { handleExclude(boolQuery, likeItems); } return boolQuery.build(); } return mltQuery; }