private final void populateTerm( final HQuery hQuery, final boolean isShould, final boolean isMust, final Query subQueryL) throws FederatedSearchException { HTerm hTerm = new HTerm(); hTerm.isShould = isShould; hTerm.isMust = isMust; hTerm.boost = subQueryL.getBoost(); hQuery.terms.add(hTerm); if (subQueryL instanceof TermQuery) { TermQuery lTerm = (TermQuery) subQueryL; hTerm.type = lTerm.getTerm().field(); hTerm.text = lTerm.getTerm().text(); } else if (subQueryL instanceof FuzzyQuery) { FuzzyQuery lTerm = (FuzzyQuery) subQueryL; hTerm.isFuzzy = true; hTerm.type = lTerm.getTerm().field(); hTerm.text = lTerm.getTerm().text(); } else if (subQueryL instanceof TermRangeQuery) { TermRangeQuery lTerm = (TermRangeQuery) subQueryL; hTerm.isFuzzy = false; hTerm.type = lTerm.getField(); hTerm.minRange = lTerm.getLowerTerm(); hTerm.maxRange = lTerm.getUpperTerm(); } else { throw new FederatedSearchException( "HQueryParser: Not Implemented Query :" + subQueryL.getClass().toString()); } }
public Map<String, Double> search(String text) { Map<String, Double> similar = new HashMap<String, Double>(); try { TokenStream tokenStream = analyzer.tokenStream("text", text); CharTermAttribute charTermAtt = tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); BooleanQuery bQuery = new BooleanQuery(); while (tokenStream.incrementToken()) { String token = charTermAtt.toString(); TermQuery tq = new TermQuery(new Term("text", token)); tq.setBoost(2f); bQuery.add(tq, Occur.MUST); } tokenStream.close(); TopDocs results = searcher.search(bQuery, 100000); ScoreDoc[] hits = results.scoreDocs; for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); similar.put(doc.get("id"), new Double(hit.score)); } } catch (Exception e) { e.printStackTrace(); } return similar; }
public List<Document> searchDocuments(String text) { List<Document> documents = new ArrayList<Document>(); try { TokenStream tokenStream = analyzer.tokenStream("text", text); CharTermAttribute charTermAtt = tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); BooleanQuery bQuery = new BooleanQuery(); while (tokenStream.incrementToken()) { String token = charTermAtt.toString(); TermQuery tq = new TermQuery(new Term("text", token)); tq.setBoost(2f); bQuery.add(tq, Occur.MUST); } tokenStream.close(); TopDocs results = searcher.search(bQuery, 100000); ScoreDoc[] hits = results.scoreDocs; for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); doc.add(new FloatField("score", hit.score, FloatField.TYPE_STORED)); documents.add(doc); } } catch (Exception e) { e.printStackTrace(); } return documents; }
public void testToQueryTermQuery() throws IOException { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); Query query = queryStringQuery("test").defaultField(STRING_FIELD_NAME).toQuery(createShardContext()); assertThat(query, instanceOf(TermQuery.class)); TermQuery termQuery = (TermQuery) query; assertThat(termQuery.getTerm(), equalTo(new Term(STRING_FIELD_NAME, "test"))); }
/** * combine weights according to expansion formula * * @param queryTerms * @param relevantDocsTerms * @param irrelevantDocsTerms * @return */ public List<TermQuery> combine( Map<String, TermQuery> queryTerms, Map<String, TermQuery> relevantDocsTerms, Map<String, TermQuery> irrelevantDocsTerms) { // Add Terms of the relevant documents for (Map.Entry<String, TermQuery> e : queryTerms.entrySet()) { if (relevantDocsTerms.containsKey(e.getKey())) { TermQuery tq = relevantDocsTerms.get(e.getKey()); tq.setBoost(tq.getBoost() + e.getValue().getBoost()); } else { relevantDocsTerms.put(e.getKey(), e.getValue()); } } // Substract terms of irrelevant documents for (Map.Entry<String, TermQuery> e : irrelevantDocsTerms.entrySet()) { if (relevantDocsTerms.containsKey(e.getKey())) { TermQuery tq = relevantDocsTerms.get(e.getKey()); tq.setBoost(tq.getBoost() - e.getValue().getBoost()); } else { TermQuery tq = e.getValue(); tq.setBoost(-tq.getBoost()); relevantDocsTerms.put(e.getKey(), tq); } } return new ArrayList<>(relevantDocsTerms.values()); }
/** * Construct a span term query * * @param scope * @param q * @return */ private SpanQuery buildTermQuery(String scope, Query q) { TermQuery tq = (TermQuery) q; ArrayList<SpanQuery> spans = new ArrayList<SpanQuery>(); spans.add( new SpanTermQuery(new Term(scope, getTag(scope, tq.getTerm().field(), TAG_TYPE.BEG_TAG)))); spans.add( new SpanTermQuery(new Term(scope, getTag(scope, tq.getTerm().field(), TAG_TYPE.END_TAG)))); spans.add(new SpanTermQuery(new Term(scope, tq.getTerm().text()))); return new SpanBetweenQuery(spans.toArray(new SpanQuery[spans.size()])); }
/** * Adjust termClaimsDescriptionAbstractTitle features of the docs with alpha * query; and beta; * and assign weights/boost to termClaimsDescriptionAbstractTitles (tf*idf). * * @param query * @param currentField * @param alpha * @param beta - factor of the equation * @param gamma * @param decay * @param maxExpandedQueryTerms - maximum number of termClaimsDescriptionAbstractTitles in * expanded query * @return expandedQuery with boost factors adjusted using Rocchio's algorithm * @throws IOException * @throws ParseException */ public Query adjust( Query query, String currentField, float alpha, float beta, float gamma, float decay, int maxExpandedQueryTerms) throws IOException, ParseException { Query expandedQuery; // setBoost of docs terms Map<String, TermQuery> relevantDocsTerms = setBoost(docsTermVectorReldocs, currentField, beta, decay); Map<String, TermQuery> irrrelevantDocsTerms = setBoost(docsTermVectorIrreldocs, currentField, gamma, decay); // Map<String, TermQuery> relevantDocsTerms = new HashMap<>(); // Map<String, TermQuery> irrrelevantDocsTerms = new HashMap<>(); // setBoost of query terms // Get queryTerms from the query // combine weights according to expansion formula List<TermQuery> expandedQueryTerms = combine(new HashMap<String, TermQuery>(), relevantDocsTerms, irrrelevantDocsTerms); // Sort by boost=weight Comparator comparator = new QueryBoostComparator(); Collections.sort(expandedQueryTerms, comparator); relevantDocsTerms.clear(); int termCount = Math.min(expandedQueryTerms.size(), maxExpandedQueryTerms); for (int i = 0; i < termCount; i++) { TermQuery tq = expandedQueryTerms.get(i); relevantDocsTerms.put(tq.getTerm().text(), tq); System.out.print(tq.getTerm().text() + ", "); } TermFreqVector queryTermsVector = new TermFreqVector(query); Map<String, TermQuery> queryTerms; queryTerms = setBoost(queryTermsVector, currentField, alpha); // List<TermQuery> queryTermsList=new ArrayList(queryTerms.values()); // Collections.sort(queryTermsList, comparator); // queryTerms.clear(); // for(TermQuery tq:queryTermsList){ // queryTerms.put(tq.getTerm().text(), tq); // } expandedQueryTerms = combine(queryTerms, relevantDocsTerms, new HashMap<String, TermQuery>()); Collections.sort(expandedQueryTerms, comparator); // Create Expanded Query expandedQuery = mergeQueries(expandedQueryTerms, Integer.MAX_VALUE); return expandedQuery; }
private static JsonObject convertTermQuery(TermQuery termQuery) { String field = termQuery.getTerm().field(); JsonObject matchQuery = JsonBuilder.object() .add( "term", JsonBuilder.object() .add( field, JsonBuilder.object() .addProperty("value", termQuery.getTerm().text()) .addProperty("boost", termQuery.getBoost()))) .build(); return wrapQueryForNestedIfRequired(field, matchQuery); }
public Map<String, Float> getRocchioVector(String currentField) throws IOException { Map<String, Float> out = new HashMap<>(); float beta = parameters.get(RocchioQueryExpansion.ROCCHIO_BETA_FLD); float gamma = parameters.get(RocchioQueryExpansion.ROCCHIO_GAMMA_FLD); float decay = parameters.get(RocchioQueryExpansion.DECAY_FLD); Map<String, TermQuery> relevantDocsTerms = setBoost(docsTermVectorReldocs, currentField, beta, decay); Map<String, TermQuery> irrrelevantDocsTerms = setBoost(docsTermVectorIrreldocs, currentField, gamma, decay); List<TermQuery> expandedQueryTerms = combine( new HashMap<String, TermQuery>(), relevantDocsTerms, new HashMap<String, TermQuery>()); for (TermQuery tq : expandedQueryTerms) { out.put(tq.getTerm().text(), tq.getBoost()); } return out; }
public void testBoostFactor() throws Throwable { Map<String, Float> originalValues = getOriginalValues(); MoreLikeThis mlt = new MoreLikeThis(reader); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); mlt.setAnalyzer(analyzer); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {"text"}); mlt.setBoost(true); // this mean that every term boost factor will be multiplied by this // number float boostFactor = 5; mlt.setBoostFactor(boostFactor); BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene release")); Collection<BooleanClause> clauses = query.clauses(); assertEquals( "Expected " + originalValues.size() + " clauses.", originalValues.size(), clauses.size()); for (BooleanClause clause : clauses) { BoostQuery bq = (BoostQuery) clause.getQuery(); TermQuery tq = (TermQuery) bq.getQuery(); Float termBoost = originalValues.get(tq.getTerm().text()); assertNotNull("Expected term " + tq.getTerm().text(), termBoost); float totalBoost = termBoost * boostFactor; assertEquals( "Expected boost of " + totalBoost + " for term '" + tq.getTerm().text() + "' got " + bq.getBoost(), totalBoost, bq.getBoost(), 0.0001); } analyzer.close(); }
private Query computeQuery(String patternString) { String upperPatternString = patternString.toUpperCase(); boolean hasBooleanSpecifiers = upperPatternString.contains(" OR ") || upperPatternString.contains(" AND ") // $NON-NLS-1$ //$NON-NLS-2$ || upperPatternString.contains(" NOT "); // $NON-NLS-1$ if (!hasBooleanSpecifiers && defaultField.equals(FIELD_SUMMARY) && !containsSpecialCharacters(patternString)) { return new PrefixQuery(new Term(defaultField.getIndexKey(), patternString)); } QueryParser qp = new QueryParser( Version.LUCENE_CURRENT, defaultField.getIndexKey(), TaskAnalyzer.instance()); Query q; try { q = qp.parse(patternString); } catch (ParseException e) { return new PrefixQuery(new Term(defaultField.getIndexKey(), patternString)); } // relax term clauses to be prefix clauses so that we get results close // to what we're expecting // from previous task list search if (q instanceof BooleanQuery) { BooleanQuery query = (BooleanQuery) q; for (BooleanClause clause : query.getClauses()) { if (clause.getQuery() instanceof TermQuery) { TermQuery termQuery = (TermQuery) clause.getQuery(); clause.setQuery(new PrefixQuery(termQuery.getTerm())); } if (!hasBooleanSpecifiers) { clause.setOccur(Occur.MUST); } } } else if (q instanceof TermQuery) { return new PrefixQuery(((TermQuery) q).getTerm()); } return q; }
private Map<String, Float> getOriginalValues() throws IOException { Map<String, Float> originalValues = new HashMap<>(); MoreLikeThis mlt = new MoreLikeThis(reader); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); mlt.setAnalyzer(analyzer); mlt.setMinDocFreq(1); mlt.setMinTermFreq(1); mlt.setMinWordLen(1); mlt.setFieldNames(new String[] {"text"}); mlt.setBoost(true); BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene release")); Collection<BooleanClause> clauses = query.clauses(); for (BooleanClause clause : clauses) { BoostQuery bq = (BoostQuery) clause.getQuery(); TermQuery tq = (TermQuery) bq.getQuery(); originalValues.put(tq.getTerm().text(), bq.getBoost()); } analyzer.close(); return originalValues; }
/** * Sets boost of termClaimsDescriptionAbstractTitles. boost = weight = factor(tf*idf) * * @param vecsTerms * @param currentField * @param factor - adjustment factor ( ex. alpha or beta ) * @param decayFactor * @return * @throws java.io.IOException */ public Map<String, TermQuery> setBoost( Map<TermFreqVector, String> vecsTerms, String currentField, float factor, float decayFactor) throws IOException { Map<String, TermQuery> terms = new HashMap<>(); // setBoost for each of the terms of each of the docs int i = 0; float norm = (float) 1 / vecsTerms.size(); // System.out.println("--------------------------"); for (Map.Entry<TermFreqVector, String> e : vecsTerms.entrySet()) { // Increase decay String field = e.getValue(); TermFreqVector docTerms = e.getKey(); float decay = decayFactor * i; // Populate terms: with TermQuries and set boost for (String termTxt : docTerms.getTerms()) { // Create Term Term term = new Term(currentField, termTxt); // Calculate weight float tf = docTerms.getFreq(termTxt); // float idf = ir.docFreq(termTitle); int docs; float idf; if (sourceField.equals(PatentQuery.all)) { docs = ir.getDocCount(field); idf = (float) Math.log10((double) docs / (ir.docFreq(new Term(field, termTxt)) + 1)); } else { docs = ir.getDocCount(sourceField); idf = (float) Math.log10((double) docs / (ir.docFreq(new Term(sourceField, termTxt)) + 1)); } float weight = tf * idf; // System.out.println(term.text() + " -> tf= " + tf + " idf= " + idf + " // tfidf= " + weight); // Adjust weight by decay factor weight = weight - (weight * decay); // Create TermQuery and add it to the collection TermQuery termQuery = new TermQuery(term); // Calculate and set boost float boost; if (vecsTerms.size() == 1) { boost = factor * tf; } else { boost = factor; } if (boost != 0) { termQuery.setBoost(boost * norm); if (terms.containsKey(termTxt)) { TermQuery tq = terms.get(termTxt); tq.setBoost(tq.getBoost() + termQuery.getBoost()); } else { terms.put(termTxt, termQuery); } } } i++; } return terms; }
/** Add to an existing boolean query the More Like This query from this PriorityQueue */ private void addToQuery(PriorityQueue<ScoreTerm> q, BooleanQuery query) { ScoreTerm scoreTerm; float bestScore = -1; while ((scoreTerm = q.pop()) != null) { TermQuery tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word)); if (boost) { if (bestScore == -1) { bestScore = (scoreTerm.score); } float myScore = (scoreTerm.score); tq.setBoost(boostFactor * myScore / bestScore); } try { query.add(tq, BooleanClause.Occur.SHOULD); } catch (BooleanQuery.TooManyClauses ignore) { break; } } }
static void assertLateParsingQuery(Query query, String type, String id) throws IOException { assertThat(query, instanceOf(HasChildQueryBuilder.LateParsingQuery.class)); HasChildQueryBuilder.LateParsingQuery lateParsingQuery = (HasChildQueryBuilder.LateParsingQuery) query; assertThat(lateParsingQuery.getInnerQuery(), instanceOf(BooleanQuery.class)); BooleanQuery booleanQuery = (BooleanQuery) lateParsingQuery.getInnerQuery(); assertThat(booleanQuery.clauses().size(), equalTo(2)); // check the inner ids query, we have to call rewrite to get to check the type it's executed // against assertThat(booleanQuery.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.MUST)); assertThat(booleanQuery.clauses().get(0).getQuery(), instanceOf(TermsQuery.class)); TermsQuery termsQuery = (TermsQuery) booleanQuery.clauses().get(0).getQuery(); Query rewrittenTermsQuery = termsQuery.rewrite(null); assertThat(rewrittenTermsQuery, instanceOf(ConstantScoreQuery.class)); ConstantScoreQuery constantScoreQuery = (ConstantScoreQuery) rewrittenTermsQuery; assertThat(constantScoreQuery.getQuery(), instanceOf(BooleanQuery.class)); BooleanQuery booleanTermsQuery = (BooleanQuery) constantScoreQuery.getQuery(); assertThat(booleanTermsQuery.clauses().size(), equalTo(1)); assertThat(booleanTermsQuery.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.SHOULD)); assertThat(booleanTermsQuery.clauses().get(0).getQuery(), instanceOf(TermQuery.class)); TermQuery termQuery = (TermQuery) booleanTermsQuery.clauses().get(0).getQuery(); assertThat(termQuery.getTerm().field(), equalTo(UidFieldMapper.NAME)); // we want to make sure that the inner ids query gets executed against the child type rather // than the main type we initially set to the context BytesRef[] ids = Uid.createUidsForTypesAndIds( Collections.singletonList(type), Collections.singletonList(id)); assertThat(termQuery.getTerm().bytes(), equalTo(ids[0])); // check the type filter assertThat(booleanQuery.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.FILTER)); assertThat(booleanQuery.clauses().get(1).getQuery(), instanceOf(ConstantScoreQuery.class)); ConstantScoreQuery typeConstantScoreQuery = (ConstantScoreQuery) booleanQuery.clauses().get(1).getQuery(); assertThat(typeConstantScoreQuery.getQuery(), instanceOf(TermQuery.class)); TermQuery typeTermQuery = (TermQuery) typeConstantScoreQuery.getQuery(); assertThat(typeTermQuery.getTerm().field(), equalTo(TypeFieldMapper.NAME)); assertThat(typeTermQuery.getTerm().text(), equalTo(type)); }
@Override public void testStarParsing() throws Exception { final int[] type = new int[1]; QueryParser qp = new QueryParser("field", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) { @Override protected Query getWildcardQuery(String field, String termStr) { // override error checking of superclass type[0] = 1; return new TermQuery(new Term(field, termStr)); } @Override protected Query getPrefixQuery(String field, String termStr) { // override error checking of superclass type[0] = 2; return new TermQuery(new Term(field, termStr)); } @Override protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException { type[0] = 3; return super.getFieldQuery(field, queryText, quoted); } }; TermQuery tq; tq = (TermQuery) qp.parse("foo:zoo*"); assertEquals("zoo", tq.getTerm().text()); assertEquals(2, type[0]); BoostQuery bq = (BoostQuery) qp.parse("foo:zoo*^2"); tq = (TermQuery) bq.getQuery(); assertEquals("zoo", tq.getTerm().text()); assertEquals(2, type[0]); assertEquals(bq.getBoost(), 2, 0); tq = (TermQuery) qp.parse("foo:*"); assertEquals("*", tq.getTerm().text()); assertEquals(1, type[0]); // could be a valid prefix query in the future too bq = (BoostQuery) qp.parse("foo:*^2"); tq = (TermQuery) bq.getQuery(); assertEquals("*", tq.getTerm().text()); assertEquals(1, type[0]); assertEquals(bq.getBoost(), 2, 0); tq = (TermQuery) qp.parse("*:foo"); assertEquals("*", tq.getTerm().field()); assertEquals("foo", tq.getTerm().text()); assertEquals(3, type[0]); tq = (TermQuery) qp.parse("*:*"); assertEquals("*", tq.getTerm().field()); assertEquals("*", tq.getTerm().text()); assertEquals(1, type[0]); // could be handled as a prefix query in the // future tq = (TermQuery) qp.parse("(*:*)"); assertEquals("*", tq.getTerm().field()); assertEquals("*", tq.getTerm().text()); assertEquals(1, type[0]); }
private static void planTermQuery(final StringBuilder builder, final TermQuery query) { builder.append(query.getTerm()); }
/** * Construct the query (using spans). This method will be called recursively. * * @param q Query * @param scope Author, affiliation, or Reference * @param level Used only for formatting (indentation) the level of recursion * @param andSpans ArrayList of Spans that should be 'and' * @param orSpans ArrayList of Spans that should be 'or' * @param notSpans ArrayList of Spans that should be 'not' * @return SpanQuery */ private SpanQuery buildQuery_recursive( Query q, String scope, int level, ArrayList<SpanQuery> andSpans, ArrayList<SpanQuery> orSpans, ArrayList<SpanQuery> notSpans) { BooleanQuery castQuery = (BooleanQuery) q; String subscope = null; for (BooleanClause clause : castQuery.getClauses()) { Class queryclazz = clause.getQuery().getClass(); System.out.println( repeat(' ', level) + "[" + queryclazz + "][" + clause.getOccur() + "] " + clause.toString()); if (queryclazz == BooleanQuery.class) { System.out.println("Number of Clauses is " + castQuery.clauses().size()); System.out.println("Minimum Number to Match is " + castQuery.getMinimumNumberShouldMatch()); if (subscope == null) { if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add( buildQuery_recursive( clause.getQuery(), scope, level + 1, new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>())); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add( buildQuery_recursive( clause.getQuery(), scope, level + 1, new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>())); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add( buildQuery_recursive( clause.getQuery(), scope, level + 1, new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>())); } } else { ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>(); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG)))); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG)))); subscopeQuery.add( buildQuery_recursive( clause.getQuery(), scope, level + 1, new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>(), new ArrayList<SpanQuery>())); if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } } } else if (queryclazz == TermQuery.class) { TermQuery tq = (TermQuery) clause.getQuery(); if (tq.getTerm().field().compareTo(SUBSCOPE_FIELD) == 0) { // Set the subscope subscope = tq.getTerm().text(); // Need to add a term here (otherwise we have problems) WildcardQuery wildcard = new WildcardQuery(new Term(scope, "*")); if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add(new SpanMultiTermQueryWrapper<WildcardQuery>(wildcard)); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add(new SpanMultiTermQueryWrapper<WildcardQuery>(wildcard)); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { notSpans.add(new SpanMultiTermQueryWrapper<WildcardQuery>(wildcard)); } } else if (subscope == null) { if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add(buildTermQuery(scope, tq)); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add(buildTermQuery(scope, tq)); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { notSpans.add(buildTermQuery(scope, tq)); } } else { ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>(); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG)))); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG)))); subscopeQuery.add(buildTermQuery(scope, tq)); if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } } } else if (queryclazz == WildcardQuery.class) { if (subscope == null) { if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add(buildWildcardQuery(scope, clause.getQuery())); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add(buildWildcardQuery(scope, clause.getQuery())); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add(buildWildcardQuery(scope, clause.getQuery())); } } else { ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>(); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG)))); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG)))); subscopeQuery.add(buildWildcardQuery(scope, clause.getQuery())); if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } } } else if (queryclazz == PrefixQuery.class) { if (subscope == null) { if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add(buildPrefixQuery(scope, clause.getQuery())); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add(buildPrefixQuery(scope, clause.getQuery())); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add(buildPrefixQuery(scope, clause.getQuery())); } } else { ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>(); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG)))); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG)))); subscopeQuery.add(buildPrefixQuery(scope, clause.getQuery())); if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } } } else if (queryclazz == PhraseQuery.class) { if (subscope == null) { if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add(buildPhraseQuery(scope, clause.getQuery())); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add(buildPhraseQuery(scope, clause.getQuery())); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add(buildPhraseQuery(scope, clause.getQuery())); } } else { ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>(); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG)))); subscopeQuery.add( new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG)))); subscopeQuery.add(buildPhraseQuery(scope, clause.getQuery())); if (clause.getOccur() == BooleanClause.Occur.MUST) { andSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { // FIX notSpans.add( new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()]))); } } } else { System.out.println("[" + q.getClass() + "]"); } } ArrayList<SpanQuery> includeSpans = new ArrayList<SpanQuery>(); ; // Add the 'and' queries to the includeSpans (if there were any) if (!andSpans.isEmpty()) { if (andSpans.size() > 1) { includeSpans.add(new SpanAndQuery(andSpans.toArray(new SpanQuery[andSpans.size()]))); } else { includeSpans.add(andSpans.get(0)); } } // Add the 'or' queries to the includeSpans (if there were any) if (!orSpans.isEmpty()) { includeSpans.add(new SpanOrQuery(orSpans.toArray(new SpanQuery[orSpans.size()]))); } // Exclude the 'not' queries from the includeSpans (if there were any) if (!notSpans.isEmpty()) { if (includeSpans.size() > 1) { if (notSpans.size() > 1) { return new SpanNotQuery( new SpanAndQuery(includeSpans.toArray(new SpanQuery[includeSpans.size()])), new SpanAndQuery(notSpans.toArray(new SpanQuery[notSpans.size()]))); } else { return new SpanNotQuery( new SpanAndQuery(includeSpans.toArray(new SpanQuery[includeSpans.size()])), notSpans.get(0)); } } else { if (notSpans.size() > 1) { return new SpanNotQuery( includeSpans.get(0), new SpanAndQuery(notSpans.toArray(new SpanQuery[notSpans.size()]))); } else { return new SpanNotQuery(includeSpans.get(0), notSpans.get(0)); } } } else { if (includeSpans.size() > 1) { return new SpanAndQuery(includeSpans.toArray(new SpanQuery[includeSpans.size()])); } else { return includeSpans.get(0); } } }
private void _includeIfUnique( BooleanQuery booleanQuery, boolean like, QueryParser queryParser, Query query, BooleanClause.Occur occur) { if (query instanceof TermQuery) { Set<Term> terms = new HashSet<Term>(); TermQuery termQuery = (TermQuery) query; termQuery.extractTerms(terms); float boost = termQuery.getBoost(); for (Term term : terms) { String termValue = term.text(); if (like) { termValue = termValue.toLowerCase(queryParser.getLocale()); term = term.createTerm(StringPool.STAR.concat(termValue).concat(StringPool.STAR)); query = new WildcardQuery(term); } else { query = new TermQuery(term); } query.setBoost(boost); boolean included = false; for (BooleanClause booleanClause : booleanQuery.getClauses()) { if (query.equals(booleanClause.getQuery())) { included = true; } } if (!included) { booleanQuery.add(query, occur); } } } else if (query instanceof BooleanQuery) { BooleanQuery curBooleanQuery = (BooleanQuery) query; BooleanQuery containerBooleanQuery = new BooleanQuery(); for (BooleanClause booleanClause : curBooleanQuery.getClauses()) { _includeIfUnique( containerBooleanQuery, like, queryParser, booleanClause.getQuery(), booleanClause.getOccur()); } if (containerBooleanQuery.getClauses().length > 0) { booleanQuery.add(containerBooleanQuery, occur); } } else { boolean included = false; for (BooleanClause booleanClause : booleanQuery.getClauses()) { if (query.equals(booleanClause.getQuery())) { included = true; } } if (!included) { booleanQuery.add(query, occur); } } }
@Override public void prepare(ResponseBuilder rb) throws IOException { SolrQueryRequest req = rb.req; SolrParams params = req.getParams(); // A runtime param can skip if (!params.getBool(QueryElevationParams.ENABLE, true)) { return; } boolean exclusive = params.getBool(QueryElevationParams.EXCLUSIVE, false); // A runtime parameter can alter the config value for forceElevation boolean force = params.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation); boolean markExcludes = params.getBool(QueryElevationParams.MARK_EXCLUDES, false); String boostStr = params.get(QueryElevationParams.IDS); String exStr = params.get(QueryElevationParams.EXCLUDE); Query query = rb.getQuery(); String qstr = rb.getQueryString(); if (query == null || qstr == null) { return; } ElevationObj booster = null; try { if (boostStr != null || exStr != null) { List<String> boosts = (boostStr != null) ? StrUtils.splitSmart(boostStr, ",", true) : new ArrayList<String>(0); List<String> excludes = (exStr != null) ? StrUtils.splitSmart(exStr, ",", true) : new ArrayList<String>(0); booster = new ElevationObj(qstr, boosts, excludes); } else { IndexReader reader = req.getSearcher().getIndexReader(); qstr = getAnalyzedQuery(qstr); booster = getElevationMap(reader, req.getCore()).get(qstr); } } catch (Exception ex) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error loading elevation", ex); } if (booster != null) { rb.req.getContext().put(BOOSTED, booster.ids); // Change the query to insert forced documents if (exclusive == true) { // we only want these results rb.setQuery(booster.include); } else { BooleanQuery newq = new BooleanQuery(true); newq.add(query, BooleanClause.Occur.SHOULD); newq.add(booster.include, BooleanClause.Occur.SHOULD); if (booster.exclude != null) { if (markExcludes == false) { for (TermQuery tq : booster.exclude) { newq.add(new BooleanClause(tq, BooleanClause.Occur.MUST_NOT)); } } else { // we are only going to mark items as excluded, not actually exclude them. This works // with the EditorialMarkerFactory rb.req.getContext().put(EXCLUDED, booster.excludeIds); } } rb.setQuery(newq); } ElevationComparatorSource comparator = new ElevationComparatorSource(booster); // if the sort is 'score desc' use a custom sorting method to // insert documents in their proper place SortSpec sortSpec = rb.getSortSpec(); if (sortSpec.getSort() == null) { sortSpec.setSortAndFields( new Sort( new SortField[] { new SortField("_elevate_", comparator, true), new SortField(null, SortField.Type.SCORE, false) }), Arrays.asList(new SchemaField[2])); } else { // Check if the sort is based on score SortSpec modSortSpec = this.modifySortSpec(sortSpec, force, comparator); if (null != modSortSpec) { rb.setSortSpec(modSortSpec); } } // alter the sorting in the grouping specification if there is one GroupingSpecification groupingSpec = rb.getGroupingSpec(); if (groupingSpec != null) { SortField[] groupSort = groupingSpec.getGroupSort().getSort(); Sort modGroupSort = this.modifySort(groupSort, force, comparator); if (modGroupSort != null) { groupingSpec.setGroupSort(modGroupSort); } SortField[] withinGroupSort = groupingSpec.getSortWithinGroup().getSort(); Sort modWithinGroupSort = this.modifySort(withinGroupSort, force, comparator); if (modWithinGroupSort != null) { groupingSpec.setSortWithinGroup(modWithinGroupSort); } } } // Add debugging information if (rb.isDebug()) { List<String> match = null; if (booster != null) { // Extract the elevated terms into a list match = new ArrayList<String>(booster.priority.size()); for (Object o : booster.include.clauses()) { TermQuery tq = (TermQuery) ((BooleanClause) o).getQuery(); match.add(tq.getTerm().text()); } } SimpleOrderedMap<Object> dbg = new SimpleOrderedMap<Object>(); dbg.add("q", qstr); dbg.add("match", match); if (rb.isDebugQuery()) { rb.addDebugInfo("queryBoosting", dbg); } } }
/** @see #toString(Query,IndexSchema) */ public static void toString(Query query, IndexSchema schema, Appendable out, int flags) throws IOException { boolean writeBoost = true; if (query instanceof TermQuery) { TermQuery q = (TermQuery) query; Term t = q.getTerm(); FieldType ft = writeFieldName(t.field(), schema, out, flags); writeFieldVal(t.bytes(), ft, out, flags); } else if (query instanceof TermRangeQuery) { TermRangeQuery q = (TermRangeQuery) query; String fname = q.getField(); FieldType ft = writeFieldName(fname, schema, out, flags); out.append(q.includesLower() ? '[' : '{'); BytesRef lt = q.getLowerTerm(); BytesRef ut = q.getUpperTerm(); if (lt == null) { out.append('*'); } else { writeFieldVal(lt, ft, out, flags); } out.append(" TO "); if (ut == null) { out.append('*'); } else { writeFieldVal(ut, ft, out, flags); } out.append(q.includesUpper() ? ']' : '}'); } else if (query instanceof NumericRangeQuery) { NumericRangeQuery q = (NumericRangeQuery) query; String fname = q.getField(); FieldType ft = writeFieldName(fname, schema, out, flags); out.append(q.includesMin() ? '[' : '{'); Number lt = q.getMin(); Number ut = q.getMax(); if (lt == null) { out.append('*'); } else { out.append(lt.toString()); } out.append(" TO "); if (ut == null) { out.append('*'); } else { out.append(ut.toString()); } out.append(q.includesMax() ? ']' : '}'); } else if (query instanceof BooleanQuery) { BooleanQuery q = (BooleanQuery) query; boolean needParens = false; if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0 || q.isCoordDisabled()) { needParens = true; } if (needParens) { out.append('('); } boolean first = true; for (BooleanClause c : q.clauses()) { if (!first) { out.append(' '); } else { first = false; } if (c.isProhibited()) { out.append('-'); } else if (c.isRequired()) { out.append('+'); } Query subQuery = c.getQuery(); boolean wrapQuery = false; // TODO: may need to put parens around other types // of queries too, depending on future syntax. if (subQuery instanceof BooleanQuery) { wrapQuery = true; } if (wrapQuery) { out.append('('); } toString(subQuery, schema, out, flags); if (wrapQuery) { out.append(')'); } } if (needParens) { out.append(')'); } if (q.getMinimumNumberShouldMatch() > 0) { out.append('~'); out.append(Integer.toString(q.getMinimumNumberShouldMatch())); } if (q.isCoordDisabled()) { out.append("/no_coord"); } } else if (query instanceof PrefixQuery) { PrefixQuery q = (PrefixQuery) query; Term prefix = q.getPrefix(); FieldType ft = writeFieldName(prefix.field(), schema, out, flags); out.append(prefix.text()); out.append('*'); } else if (query instanceof WildcardQuery) { out.append(query.toString()); writeBoost = false; } else if (query instanceof FuzzyQuery) { out.append(query.toString()); writeBoost = false; } else if (query instanceof ConstantScoreQuery) { out.append(query.toString()); writeBoost = false; } else if (query instanceof WrappedQuery) { WrappedQuery q = (WrappedQuery) query; out.append(q.getOptions()); toString(q.getWrappedQuery(), schema, out, flags); writeBoost = false; // we don't use the boost on wrapped queries } else { out.append(query.getClass().getSimpleName() + '(' + query.toString() + ')'); writeBoost = false; } if (writeBoost && query.getBoost() != 1.0f) { out.append("^"); out.append(Float.toString(query.getBoost())); } }