public void testQueries() throws Exception { single = single(random()); parallel = parallel(random()); queryTest(new TermQuery(new Term("f1", "v1"))); queryTest(new TermQuery(new Term("f1", "v2"))); queryTest(new TermQuery(new Term("f2", "v1"))); queryTest(new TermQuery(new Term("f2", "v2"))); queryTest(new TermQuery(new Term("f3", "v1"))); queryTest(new TermQuery(new Term("f3", "v2"))); queryTest(new TermQuery(new Term("f4", "v1"))); queryTest(new TermQuery(new Term("f4", "v2"))); BooleanQuery.Builder bq1 = new BooleanQuery.Builder(); bq1.add(new TermQuery(new Term("f1", "v1")), Occur.MUST); bq1.add(new TermQuery(new Term("f4", "v1")), Occur.MUST); queryTest(bq1.build()); single.getIndexReader().close(); single = null; parallel.getIndexReader().close(); parallel = null; dir.close(); dir = null; dir1.close(); dir1 = null; dir2.close(); dir2 = null; }
/** * Prepare the queries for this test. Extending classes can override this method for preparing * different queries. * * @return prepared queries. * @throws Exception if cannot prepare the queries. */ @Override protected Query[] prepareQueries() throws Exception { // analyzer (default is standard analyzer) Analyzer anlzr = NewAnalyzerTask.createAnalyzer( config.get("analyzer", "org.apache.lucene.analysis.standard.StandardAnalyzer")); QueryParser qp = new QueryParser(DocMaker.BODY_FIELD, anlzr); ArrayList<Query> qq = new ArrayList<>(); Query q1 = new TermQuery(new Term(DocMaker.ID_FIELD, "doc2")); qq.add(q1); Query q2 = new TermQuery(new Term(DocMaker.BODY_FIELD, "simple")); qq.add(q2); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(q1, Occur.MUST); bq.add(q2, Occur.MUST); qq.add(bq.build()); qq.add(qp.parse("synthetic body")); qq.add(qp.parse("\"synthetic body\"")); qq.add(qp.parse("synthetic text")); qq.add(qp.parse("\"synthetic text\"")); qq.add(qp.parse("\"synthetic text\"~3")); qq.add(qp.parse("zoom*")); qq.add(qp.parse("synth*")); return qq.toArray(new Query[0]); }
@Override protected Query newFuzzyQuery(String text, int fuzziness) { BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.setDisableCoord(true); for (Map.Entry<String, Float> entry : weights.entrySet()) { String field = entry.getKey(); FieldType type = schema.getFieldType(field); Query fuzzy; if (type instanceof TextField) { // If the field type is a TextField then use the multi term analyzer. Analyzer analyzer = ((TextField) type).getMultiTermAnalyzer(); String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString(); fuzzy = new FuzzyQuery(new Term(entry.getKey(), term), fuzziness); } else { // If the type is *not* a TextField don't do any analysis. fuzzy = new FuzzyQuery(new Term(entry.getKey(), text), fuzziness); } float boost = entry.getValue(); if (boost != 1f) { fuzzy = new BoostQuery(fuzzy, boost); } bq.add(fuzzy, BooleanClause.Occur.SHOULD); } return simplify(bq.build()); }
@Override public Query createQuery( FieldBoost boost, float dmqTieBreakerMultiplier, DocumentFrequencyAndTermContextProvider dftcp) throws IOException { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setDisableCoord(disableCoord); for (Clause clause : clauses) { builder.add( clause.queryFactory.createQuery(boost, dmqTieBreakerMultiplier, dftcp), clause.occur); } Query bq = builder.build(); if (normalizeBoost) { int size = getNumberOfClauses(); if (size > 0) { bq = new BoostQuery(bq, 1f / (float) size); } } return bq; }
@Override public ConstantScoreQuery makeQuery(SpatialArgs args) { if (!SpatialOperation.is( args.getOperation(), SpatialOperation.Intersects, SpatialOperation.IsWithin)) throw new UnsupportedSpatialOperation(args.getOperation()); Shape shape = args.getShape(); if (shape instanceof Rectangle) { Rectangle bbox = (Rectangle) shape; return new ConstantScoreQuery(makeWithin(bbox)); } else if (shape instanceof Circle) { Circle circle = (Circle) shape; Rectangle bbox = circle.getBoundingBox(); Query approxQuery = makeWithin(bbox); BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); FunctionRangeQuery vsRangeQuery = new FunctionRangeQuery( makeDistanceValueSource(circle.getCenter()), 0.0, circle.getRadius(), true, true); bqBuilder.add( approxQuery, BooleanClause.Occur.FILTER); // should have lowest "cost" value; will drive iteration bqBuilder.add(vsRangeQuery, BooleanClause.Occur.FILTER); return new ConstantScoreQuery(bqBuilder.build()); } else { throw new UnsupportedOperationException( "Only Rectangles and Circles are currently supported, " + "found [" + shape.getClass() + "]"); // TODO } }
@Override protected Query doToQuery(QueryShardContext context) throws IOException { DocumentMapper childDocMapper = context.getMapperService().documentMapper(type); if (childDocMapper == null) { if (ignoreUnmapped) { return new MatchNoDocsQuery(); } else { throw new QueryShardException( context, "[" + NAME + "] no mapping found for type [" + type + "]"); } } ParentFieldMapper parentFieldMapper = childDocMapper.parentFieldMapper(); if (parentFieldMapper.active() == false) { throw new QueryShardException( context, "[" + NAME + "] _parent field has no parent type configured"); } String fieldName = ParentFieldMapper.joinField(parentFieldMapper.type()); BooleanQuery.Builder query = new BooleanQuery.Builder(); query.add(new DocValuesTermsQuery(fieldName, id), BooleanClause.Occur.MUST); // Need to take child type into account, otherwise a child doc of different type with the same // id could match query.add(new TermQuery(new Term(TypeFieldMapper.NAME, type)), BooleanClause.Occur.FILTER); return query.build(); }
// TODO: Move to QueryParserTestBase once standard flexible parser gets this capability public void testMultiWordSynonyms() throws Exception { QueryParser dumb = new QueryParser("field", new Analyzer1()); dumb.setSplitOnWhitespace(false); // A multi-word synonym source will form a synonym query for the same-starting-position tokens BooleanQuery.Builder multiWordExpandedBqBuilder = new BooleanQuery.Builder(); Query multiWordSynonymQuery = new SynonymQuery(new Term("field", "guinea"), new Term("field", "cavy")); multiWordExpandedBqBuilder.add(multiWordSynonymQuery, BooleanClause.Occur.SHOULD); multiWordExpandedBqBuilder.add( new TermQuery(new Term("field", "pig")), BooleanClause.Occur.SHOULD); Query multiWordExpandedBq = multiWordExpandedBqBuilder.build(); assertEquals(multiWordExpandedBq, dumb.parse("guinea pig")); // With the phrase operator, a multi-word synonym source will form a multiphrase query. // When the number of expanded term(s) is different from that of the original term(s), this is // not good. MultiPhraseQuery.Builder multiWordExpandedMpqBuilder = new MultiPhraseQuery.Builder(); multiWordExpandedMpqBuilder.add( new Term[] {new Term("field", "guinea"), new Term("field", "cavy")}); multiWordExpandedMpqBuilder.add(new Term("field", "pig")); Query multiWordExpandedMPQ = multiWordExpandedMpqBuilder.build(); assertEquals(multiWordExpandedMPQ, dumb.parse("\"guinea pig\"")); // custom behavior, the synonyms are expanded, unless you use quote operator QueryParser smart = new SmartQueryParser(); smart.setSplitOnWhitespace(false); assertEquals(multiWordExpandedBq, smart.parse("guinea pig")); PhraseQuery.Builder multiWordUnexpandedPqBuilder = new PhraseQuery.Builder(); multiWordUnexpandedPqBuilder.add(new Term("field", "guinea")); multiWordUnexpandedPqBuilder.add(new Term("field", "pig")); Query multiWordUnexpandedPq = multiWordUnexpandedPqBuilder.build(); assertEquals(multiWordUnexpandedPq, smart.parse("\"guinea pig\"")); }
public void testBoolean() { BooleanQuery.Builder expected = new BooleanQuery.Builder(); expected.add(new TermQuery(new Term("field", "foo")), BooleanClause.Occur.SHOULD); expected.add(new TermQuery(new Term("field", "bar")), BooleanClause.Occur.SHOULD); QueryBuilder builder = new QueryBuilder(new MockAnalyzer(random())); assertEquals(expected.build(), builder.createBooleanQuery("field", "foo bar")); }
@Override protected Query newPrefixQuery(String text) { BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.setDisableCoord(true); for (Map.Entry<String, Float> entry : weights.entrySet()) { String field = entry.getKey(); FieldType type = schema.getFieldType(field); Query prefix; if (type instanceof TextField) { // If the field type is a TextField then use the multi term analyzer. Analyzer analyzer = ((TextField) type).getMultiTermAnalyzer(); String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString(); SchemaField sf = schema.getField(field); prefix = sf.getType().getPrefixQuery(qParser, sf, term); } else { // If the type is *not* a TextField don't do any analysis. SchemaField sf = schema.getField(field); prefix = type.getPrefixQuery(qParser, sf, text); } float boost = entry.getValue(); if (boost != 1f) { prefix = new BoostQuery(prefix, boost); } bq.add(prefix, BooleanClause.Occur.SHOULD); } return simplify(bq.build()); }
/** synonyms with default OR operator */ public void testCJKSynonymsOR() throws Exception { BooleanQuery.Builder expected = new BooleanQuery.Builder(); expected.add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD); SynonymQuery inner = new SynonymQuery(new Term("field", "国"), new Term("field", "國")); expected.add(inner, BooleanClause.Occur.SHOULD); QueryBuilder builder = new QueryBuilder(new MockCJKSynonymAnalyzer()); assertEquals(expected.build(), builder.createBooleanQuery("field", "中国")); }
static Query getFieldMissingQuery(SolrIndexSearcher searcher, String fieldName) throws IOException { SchemaField sf = searcher.getSchema().getField(fieldName); Query hasVal = sf.getType().getRangeQuery(null, sf, null, null, false, false); BooleanQuery.Builder noVal = new BooleanQuery.Builder(); noVal.add(hasVal, BooleanClause.Occur.MUST_NOT); return noVal.build(); }
/** synonyms with default OR operator */ public void testCJKSynonymsOR() throws Exception { BooleanQuery.Builder expectedB = new BooleanQuery.Builder(); expectedB.add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD); Query inner = new SynonymQuery(new Term("field", "国"), new Term("field", "國")); expectedB.add(inner, BooleanClause.Occur.SHOULD); Query expected = expectedB.build(); QueryParser qp = new QueryParser("field", new MockCJKSynonymAnalyzer()); assertEquals(expected, qp.parse("中国")); expected = new BoostQuery(expected, 2f); assertEquals(expected, qp.parse("中国^2")); }
public void testCJKTerm() throws Exception { // individual CJK chars as terms SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); BooleanQuery.Builder expected = new BooleanQuery.Builder(); expected.add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD); expected.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); QueryBuilder builder = new QueryBuilder(analyzer); assertEquals(expected.build(), builder.createBooleanQuery("field", "中国")); }
/** synonyms with default AND operator */ public void testCJKSynonymsAND() throws Exception { BooleanQuery.Builder expectedB = new BooleanQuery.Builder(); expectedB.add(new TermQuery(new Term(FIELD, "中")), BooleanClause.Occur.MUST); Query inner = new SynonymQuery(new Term(FIELD, "国"), new Term(FIELD, "國")); expectedB.add(inner, BooleanClause.Occur.MUST); Query expected = expectedB.build(); QueryParser qp = new QueryParser(FIELD, new MockCJKSynonymAnalyzer()); qp.setDefaultOperator(Operator.AND); assertEquals(expected, qp.parse("中国")); expected = new BoostQuery(expected, 2f); assertEquals(expected, qp.parse("中国^2")); }
@Override public Query createQuery() { final int size = fieldContexts.size(); if (size == 1) { return queryCustomizer.setWrappedQuery(createQuery(fieldContexts.get(0))).createQuery(); } else { BooleanQuery.Builder aggregatedFieldsQueryBuilder = new BooleanQuery.Builder(); for (FieldContext fieldContext : fieldContexts) { aggregatedFieldsQueryBuilder.add(createQuery(fieldContext), BooleanClause.Occur.SHOULD); } BooleanQuery aggregatedFieldsQuery = aggregatedFieldsQueryBuilder.build(); return queryCustomizer.setWrappedQuery(aggregatedFieldsQuery).createQuery(); } }
@Override public Query searchFilter(String[] types) { Query filter = mapperService().searchFilter(types); if (filter == null && aliasFilter == null) { return null; } BooleanQuery.Builder bq = new BooleanQuery.Builder(); if (filter != null) { bq.add(filter, Occur.MUST); } if (aliasFilter != null) { bq.add(aliasFilter, Occur.MUST); } return new ConstantScoreQuery(bq.build()); }
public void testDefaultSplitOnWhitespace() throws Exception { QueryParser parser = new QueryParser("field", new Analyzer1()); assertFalse(parser.getSplitOnWhitespace()); // default is false // A multi-word synonym source will form a synonym query for the same-starting-position tokens BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); bqBuilder.add( new SynonymQuery(new Term("field", "guinea"), new Term("field", "cavy")), BooleanClause.Occur.SHOULD); bqBuilder.add(new TermQuery(new Term("field", "pig")), BooleanClause.Occur.SHOULD); assertEquals(bqBuilder.build(), parser.parse("guinea pig")); boolean oldSplitOnWhitespace = splitOnWhitespace; splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE; assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "Synonym(cavy guinea) pig"); splitOnWhitespace = oldSplitOnWhitespace; }
@Override public Query rewrite(IndexReader reader) throws IOException { ScoreTermQueue q = new ScoreTermQueue(maxNumTerms); // load up the list of possible terms for (FieldVals f : fieldVals) { addTerms(reader, f, q); } BooleanQuery.Builder bq = new BooleanQuery.Builder(); // create BooleanQueries to hold the variants for each token/field pair and ensure it // has no coord factor // Step 1: sort the termqueries by term/field HashMap<Term, ArrayList<ScoreTerm>> variantQueries = new HashMap<>(); int size = q.size(); for (int i = 0; i < size; i++) { ScoreTerm st = q.pop(); ArrayList<ScoreTerm> l = variantQueries.get(st.fuzziedSourceTerm); if (l == null) { l = new ArrayList<>(); variantQueries.put(st.fuzziedSourceTerm, l); } l.add(st); } // Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries for (Iterator<ArrayList<ScoreTerm>> iter = variantQueries.values().iterator(); iter.hasNext(); ) { ArrayList<ScoreTerm> variants = iter.next(); if (variants.size() == 1) { // optimize where only one selected variant ScoreTerm st = variants.get(0); Query tq = newTermQuery(reader, st.term); // set the boost to a mix of IDF and score bq.add(new BoostQuery(tq, st.score), BooleanClause.Occur.SHOULD); } else { BooleanQuery.Builder termVariants = new BooleanQuery.Builder(); termVariants.setDisableCoord(true); // disable coord and IDF for these term variants for (Iterator<ScoreTerm> iterator2 = variants.iterator(); iterator2.hasNext(); ) { ScoreTerm st = iterator2.next(); // found a match Query tq = newTermQuery(reader, st.term); // set the boost using the ScoreTerm's score termVariants.add( new BoostQuery(tq, st.score), BooleanClause.Occur.SHOULD); // add to query } bq.add(termVariants.build(), BooleanClause.Occur.SHOULD); // add to query } } // TODO possible alternative step 3 - organize above booleans into a new layer of field-based // booleans with a minimum-should-match of NumFields-1? return bq.build(); }
@Override public void performWork(LuceneWork work, IndexWriterDelegate delegate, IndexingMonitor monitor) { final Serializable id = work.getId(); final String tenantId = work.getTenantId(); final Class<?> managedType = work.getEntityClass(); DocumentBuilderIndexedEntity builder = workspace.getDocumentBuilder(managedType); try { if (DeleteWorkExecutor.isIdNumeric(builder)) { log.tracef( "Deleting %s#%s by query using an IndexWriter#updateDocument as id is Numeric", managedType, id); Query exactMatchQuery = NumericFieldUtils.createExactMatchQuery(builder.getIdKeywordName(), id); BooleanQuery.Builder deleteDocumentsQueryBuilder = new BooleanQuery.Builder(); deleteDocumentsQueryBuilder.add(exactMatchQuery, Occur.FILTER); if (tenantId != null) { TermQuery tenantTermQuery = new TermQuery(new Term(DocumentBuilderIndexedEntity.TENANT_ID_FIELDNAME, tenantId)); deleteDocumentsQueryBuilder.add(tenantTermQuery, Occur.FILTER); } delegate.deleteDocuments(deleteDocumentsQueryBuilder.build()); // no need to log the Add operation as we'll log in the delegate this.addDelegate.performWork(work, delegate, monitor); } else { log.tracef("Updating %s#%s by id using an IndexWriter#updateDocument.", managedType, id); Term idTerm = new Term(builder.getIdKeywordName(), work.getIdInString()); Map<String, String> fieldToAnalyzerMap = work.getFieldToAnalyzerMap(); ScopedAnalyzerReference analyzerReference = builder.getAnalyzerReference(); analyzerReference = AddWorkExecutor.updateAnalyzerMappings( workspace, analyzerReference, fieldToAnalyzerMap); delegate.updateDocument(idTerm, work.getDocument(), analyzerReference); } workspace.notifyWorkApplied(work); } catch (Exception e) { String message = "Unable to update " + managedType + "#" + id + " in index."; throw new SearchException(message, e); } if (monitor != null) { monitor.documentsAdded(1l); } }
@Override public Query regexpQuery( String value, int flags, int maxDeterminizedStates, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) { if (indexOptions() != IndexOptions.NONE || context == null) { return super.regexpQuery(value, flags, maxDeterminizedStates, method, context); } Collection<String> queryTypes = context.queryTypes(); if (queryTypes.size() == 1) { RegexpQuery regexpQuery = new RegexpQuery( new Term( UidFieldMapper.NAME, Uid.createUidAsBytes( Iterables.getFirst(queryTypes, null), BytesRefs.toBytesRef(value))), flags, maxDeterminizedStates); if (method != null) { regexpQuery.setRewriteMethod(method); } return regexpQuery; } BooleanQuery.Builder query = new BooleanQuery.Builder(); for (String queryType : queryTypes) { RegexpQuery regexpQuery = new RegexpQuery( new Term( UidFieldMapper.NAME, Uid.createUidAsBytes(queryType, BytesRefs.toBytesRef(value))), flags, maxDeterminizedStates); if (method != null) { regexpQuery.setRewriteMethod(method); } query.add(regexpQuery, BooleanClause.Occur.SHOULD); } return query.build(); }
public Query combineGrouped(List<? extends Query> groupQuery) { if (groupQuery == null || groupQuery.isEmpty()) { return null; } if (groupQuery.size() == 1) { return groupQuery.get(0); } if (groupDismax) { List<Query> queries = new ArrayList<>(); for (Query query : groupQuery) { queries.add(query); } return new DisjunctionMaxQuery(queries, tieBreaker); } else { final BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); for (Query query : groupQuery) { booleanQuery.add(query, BooleanClause.Occur.SHOULD); } return booleanQuery.build(); } }
private void handleExclude(BooleanQuery.Builder boolQuery, MultiTermVectorsRequest likeItems) { // artificial docs get assigned a random id and should be disregarded List<BytesRef> uids = new ArrayList<>(); for (TermVectorsRequest item : likeItems) { if (item.doc() != null) { continue; } uids.add(createUidAsBytes(item.type(), item.id())); } if (!uids.isEmpty()) { TermsQuery query = new TermsQuery(UidFieldMapper.NAME, uids.toArray(new BytesRef[0])); boolQuery.add(query, BooleanClause.Occur.MUST_NOT); } }
@Override public Query prefixQuery( String value, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) { if (indexOptions() != IndexOptions.NONE || context == null) { return super.prefixQuery(value, method, context); } Collection<String> queryTypes = context.queryTypes(); BooleanQuery.Builder query = new BooleanQuery.Builder(); for (String queryType : queryTypes) { PrefixQuery prefixQuery = new PrefixQuery( new Term( UidFieldMapper.NAME, Uid.createUidAsBytes(queryType, BytesRefs.toBytesRef(value)))); if (method != null) { prefixQuery.setRewriteMethod(method); } query.add(prefixQuery, BooleanClause.Occur.SHOULD); } return query.build(); }
/** * Returns the filter associated with listed filtering aliases. * * <p>The list of filtering aliases should be obtained by calling MetaData.filteringAliases. * Returns <tt>null</tt> if no filtering is required. */ public Query aliasFilter(QueryShardContext context, String... aliasNames) { if (aliasNames == null || aliasNames.length == 0) { return null; } final ImmutableOpenMap<String, AliasMetaData> aliases = indexSettings.getIndexMetaData().getAliases(); if (aliasNames.length == 1) { AliasMetaData alias = aliases.get(aliasNames[0]); if (alias == null) { // This shouldn't happen unless alias disappeared after filteringAliases was called. throw new InvalidAliasNameException( index(), aliasNames[0], "Unknown alias name was passed to alias Filter"); } return parse(alias, context); } else { // we need to bench here a bit, to see maybe it makes sense to use OrFilter BooleanQuery.Builder combined = new BooleanQuery.Builder(); for (String aliasName : aliasNames) { AliasMetaData alias = aliases.get(aliasName); if (alias == null) { // This shouldn't happen unless alias disappeared after filteringAliases was called. throw new InvalidAliasNameException( indexSettings.getIndex(), aliasNames[0], "Unknown alias name was passed to alias Filter"); } Query parsedFilter = parse(alias, context); if (parsedFilter != null) { combined.add(parsedFilter, BooleanClause.Occur.SHOULD); } else { // The filter might be null only if filter was removed after filteringAliases was called return null; } } return combined.build(); } }
/** Create the More like query from a PriorityQueue */ private Query createQuery(PriorityQueue<ScoreTerm> q) { BooleanQuery.Builder query = new BooleanQuery.Builder(); ScoreTerm scoreTerm; float bestScore = -1; while ((scoreTerm = q.pop()) != null) { Query tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word)); if (boost) { if (bestScore == -1) { bestScore = (scoreTerm.score); } float myScore = (scoreTerm.score); tq = new BoostQuery(tq, boostFactor * myScore / bestScore); } try { query.add(tq, BooleanClause.Occur.SHOULD); } catch (BooleanQuery.TooManyClauses ignore) { break; } } return query.build(); }
/** Constructs a query to retrieve documents that fully contain the input envelope. */ private Query makeWithin(Rectangle bbox) { BooleanQuery.Builder bq = new BooleanQuery.Builder(); BooleanClause.Occur MUST = BooleanClause.Occur.MUST; if (bbox.getCrossesDateLine()) { // use null as performance trick since no data will be beyond the world bounds bq.add(rangeQuery(fieldNameX, null /*-180*/, bbox.getMaxX()), BooleanClause.Occur.SHOULD); bq.add(rangeQuery(fieldNameX, bbox.getMinX(), null /*+180*/), BooleanClause.Occur.SHOULD); bq.setMinimumNumberShouldMatch(1); // must match at least one of the SHOULD } else { bq.add(rangeQuery(fieldNameX, bbox.getMinX(), bbox.getMaxX()), MUST); } bq.add(rangeQuery(fieldNameY, bbox.getMinY(), bbox.getMaxY()), MUST); return bq.build(); }
protected Query buildQuery( final int maxDoc, final TermContext[] contextArray, final Term[] queryTerms) { List<Query> lowFreqQueries = new ArrayList<>(); List<Query> highFreqQueries = new ArrayList<>(); for (int i = 0; i < queryTerms.length; i++) { TermContext termContext = contextArray[i]; if (termContext == null) { lowFreqQueries.add(newTermQuery(queryTerms[i], null)); } else { if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency) || (termContext.docFreq() > (int) Math.ceil(maxTermFrequency * (float) maxDoc))) { highFreqQueries.add(newTermQuery(queryTerms[i], termContext)); } else { lowFreqQueries.add(newTermQuery(queryTerms[i], termContext)); } } } final int numLowFreqClauses = lowFreqQueries.size(); final int numHighFreqClauses = highFreqQueries.size(); Occur lowFreqOccur = this.lowFreqOccur; Occur highFreqOccur = this.highFreqOccur; int lowFreqMinShouldMatch = 0; int highFreqMinShouldMatch = 0; if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) { lowFreqMinShouldMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses); } if (highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) { highFreqMinShouldMatch = calcHighFreqMinimumNumberShouldMatch(numHighFreqClauses); } if (lowFreqQueries.isEmpty()) { /* * if lowFreq is empty we rewrite the high freq terms in a conjunction to * prevent slow queries. */ if (highFreqMinShouldMatch == 0 && highFreqOccur != Occur.MUST) { highFreqOccur = Occur.MUST; } } BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setDisableCoord(true); if (lowFreqQueries.isEmpty() == false) { BooleanQuery.Builder lowFreq = new BooleanQuery.Builder(); lowFreq.setDisableCoord(disableCoord); for (Query query : lowFreqQueries) { lowFreq.add(query, lowFreqOccur); } lowFreq.setMinimumNumberShouldMatch(lowFreqMinShouldMatch); Query lowFreqQuery = lowFreq.build(); builder.add(new BoostQuery(lowFreqQuery, lowFreqBoost), Occur.MUST); } if (highFreqQueries.isEmpty() == false) { BooleanQuery.Builder highFreq = new BooleanQuery.Builder(); highFreq.setDisableCoord(disableCoord); for (Query query : highFreqQueries) { highFreq.add(query, highFreqOccur); } highFreq.setMinimumNumberShouldMatch(highFreqMinShouldMatch); Query highFreqQuery = highFreq.build(); builder.add(new BoostQuery(highFreqQuery, highFreqBoost), Occur.SHOULD); } return builder.build(); }
/** * Given a latitude and longitude (in degrees) and the maximum great circle (surface of the earth) * distance, returns a simple Filter bounding box to "fast match" candidates. */ public static Query getBoundingBoxQuery( double originLat, double originLng, double maxDistanceKM) { // Basic bounding box geo math from // http://JanMatuschek.de/LatitudeLongitudeBoundingCoordinates, // licensed under creative commons 3.0: // http://creativecommons.org/licenses/by/3.0 // TODO: maybe switch to recursive prefix tree instead // (in lucene/spatial)? It should be more efficient // since it's a 2D trie... // Degrees -> Radians: double originLatRadians = SloppyMath.toRadians(originLat); double originLngRadians = SloppyMath.toRadians(originLng); double angle = maxDistanceKM / EARTH_RADIUS_KM; double minLat = originLatRadians - angle; double maxLat = originLatRadians + angle; double minLng; double maxLng; if (minLat > SloppyMath.toRadians(-90) && maxLat < SloppyMath.toRadians(90)) { double delta = Math.asin(Math.sin(angle) / Math.cos(originLatRadians)); minLng = originLngRadians - delta; if (minLng < SloppyMath.toRadians(-180)) { minLng += 2 * Math.PI; } maxLng = originLngRadians + delta; if (maxLng > SloppyMath.toRadians(180)) { maxLng -= 2 * Math.PI; } } else { // The query includes a pole! minLat = Math.max(minLat, SloppyMath.toRadians(-90)); maxLat = Math.min(maxLat, SloppyMath.toRadians(90)); minLng = SloppyMath.toRadians(-180); maxLng = SloppyMath.toRadians(180); } BooleanQuery.Builder f = new BooleanQuery.Builder(); // Add latitude range filter: f.add( DoublePoint.newRangeQuery( "latitude", SloppyMath.toDegrees(minLat), SloppyMath.toDegrees(maxLat)), BooleanClause.Occur.FILTER); // Add longitude range filter: if (minLng > maxLng) { // The bounding box crosses the international date // line: BooleanQuery.Builder lonF = new BooleanQuery.Builder(); lonF.add( DoublePoint.newRangeQuery( "longitude", SloppyMath.toDegrees(minLng), Double.POSITIVE_INFINITY), BooleanClause.Occur.SHOULD); lonF.add( DoublePoint.newRangeQuery( "longitude", Double.NEGATIVE_INFINITY, SloppyMath.toDegrees(maxLng)), BooleanClause.Occur.SHOULD); f.add(lonF.build(), BooleanClause.Occur.MUST); } else { f.add( DoublePoint.newRangeQuery( "longitude", SloppyMath.toDegrees(minLng), SloppyMath.toDegrees(maxLng)), BooleanClause.Occur.FILTER); } return f.build(); }
private void _run() throws IOException { for (int i = 0; i < iters; i++) { String color; String sortField; switch (random.nextInt(4)) { case 0: // TermQuery on yellow cabs color = "y"; if (sparse) { sortField = "yellow_pickup_longitude"; } else { sortField = "pickup_longitude"; } break; case 1: // TermQuery on green cabs color = "g"; if (sparse) { sortField = "green_pickup_longitude"; } else { sortField = "pickup_longitude"; } break; case 2: // BooleanQuery on both cabs (all docs) color = "both"; sortField = null; break; case 3: // Point range query color = "neither"; sortField = null; break; default: throw new AssertionError(); } Query query; if (color.equals("both")) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("cab_color", "y")), BooleanClause.Occur.SHOULD); builder.add(new TermQuery(new Term("cab_color", "g")), BooleanClause.Occur.SHOULD); query = builder.build(); } else if (color.equals("neither")) { if (sparse) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add( DoublePoint.newRangeQuery("green_pickup_latitude", 40.75, 40.9), BooleanClause.Occur.SHOULD); builder.add( DoublePoint.newRangeQuery("yellow_pickup_latitude", 40.75, 40.9), BooleanClause.Occur.SHOULD); query = builder.build(); } else { query = DoublePoint.newRangeQuery("pickup_latitude", 40.75, 40.9); } } else { query = new TermQuery(new Term("cab_color", color)); } Sort sort; if (sortField != null && random.nextBoolean()) { sort = new Sort(new SortField(sortField, SortField.Type.DOUBLE)); } else { sort = null; } long t0 = System.nanoTime(); TopDocs hits; if (sort == null) { hits = searcher.search(query, 10); } else { hits = searcher.search(query, 10, sort); } long t1 = System.nanoTime(); results.add( "T" + threadID + " " + query + " sort=" + sort + ": " + hits.totalHits + " hits in " + ((t1 - t0) / 1000000.) + " msec"); for (ScoreDoc hit : hits.scoreDocs) { Document doc = searcher.doc(hit.doc); results.add(" " + hit.doc + " " + hit.score + ": " + doc.getFields().size() + " fields"); } /* synchronized(printLock) { System.out.println("T" + threadID + " " + query + " sort=" + sort + ": " + hits.totalHits + " hits in " + ((t1-t0)/1000000.) + " msec"); for(ScoreDoc hit : hits.scoreDocs) { Document doc = searcher.doc(hit.doc); System.out.println(" " + hit.doc + " " + hit.score + ": " + doc.getFields().size() + " fields"); } } */ } }
public void testResetRootDocId() throws Exception { Directory directory = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMergePolicy(NoMergePolicy.INSTANCE); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, iwc); List<Document> documents = new ArrayList<>(); // 1 segment with, 1 root document, with 3 nested sub docs Document document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.FIELD_TYPE)); document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); indexWriter.addDocuments(documents); indexWriter.commit(); documents.clear(); // 1 segment with: // 1 document, with 1 nested subdoc document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.FIELD_TYPE)); document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); indexWriter.addDocuments(documents); documents.clear(); // and 1 document, with 1 nested subdoc document = new Document(); document.add( new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add( new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); document = new Document(); document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.FIELD_TYPE)); document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); documents.add(document); indexWriter.addDocuments(documents); indexWriter.commit(); indexWriter.close(); IndexService indexService = createIndex("test"); DirectoryReader directoryReader = DirectoryReader.open(directory); directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(indexService.index(), 0)); IndexSearcher searcher = new IndexSearcher(directoryReader); indexService .mapperService() .merge( "test", new CompressedXContent( PutMappingRequest.buildFromSimplifiedDef("test", "nested_field", "type=nested") .string()), MapperService.MergeReason.MAPPING_UPDATE, false); SearchContext searchContext = createSearchContext(indexService); AggregationContext context = new AggregationContext(searchContext); AggregatorFactories.Builder builder = AggregatorFactories.builder(); NestedAggregatorBuilder factory = new NestedAggregatorBuilder("test", "nested_field"); builder.addAggregator(factory); AggregatorFactories factories = builder.build(context, null); searchContext.aggregations(new SearchContextAggregations(factories)); Aggregator[] aggs = factories.createTopLevelAggregators(); BucketCollector collector = BucketCollector.wrap(Arrays.asList(aggs)); collector.preCollection(); // A regular search always exclude nested docs, so we use NonNestedDocsFilter.INSTANCE here // (otherwise MatchAllDocsQuery would be sufficient) // We exclude root doc with uid type#2, this will trigger the bug if we don't reset the root doc // when we process a new segment, because // root doc type#3 and root doc type#1 have the same segment docid BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(Queries.newNonNestedFilter(), Occur.MUST); bq.add(new TermQuery(new Term(UidFieldMapper.NAME, "type#2")), Occur.MUST_NOT); searcher.search(new ConstantScoreQuery(bq.build()), collector); collector.postCollection(); Nested nested = (Nested) aggs[0].buildAggregation(0); // The bug manifests if 6 docs are returned, because currentRootDoc isn't reset the previous // child docs from the first segment are emitted as hits. assertThat(nested.getDocCount(), equalTo(4L)); directoryReader.close(); directory.close(); }