public void testFQ4() throws Exception { TermQuery termQuery = new TermQuery(new Term(FIELD, "xx")); termQuery.setBoost(1000); qtest( new FilteredQuery(termQuery, new QueryWrapperFilter(matchTheseItems(new int[] {1, 3}))), new int[] {3}); }
public void testToQueryTermQuery() throws IOException { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); Query query = queryStringQuery("test").defaultField(STRING_FIELD_NAME).toQuery(createShardContext()); assertThat(query, instanceOf(TermQuery.class)); TermQuery termQuery = (TermQuery) query; assertThat(termQuery.getTerm(), equalTo(new Term(STRING_FIELD_NAME, "test"))); }
public void testBQ11() throws Exception { BooleanQuery query = new BooleanQuery(); query.add(new TermQuery(new Term(FIELD, "w1")), BooleanClause.Occur.SHOULD); TermQuery boostedQuery = new TermQuery(new Term(FIELD, "w1")); boostedQuery.setBoost(1000); query.add(boostedQuery, BooleanClause.Occur.SHOULD); qtest(query, new int[] {0, 1, 2, 3}); }
/** * combine weights according to expansion formula * * @param queryTerms * @param relevantDocsTerms * @param irrelevantDocsTerms * @return */ public List<TermQuery> combine( Map<String, TermQuery> queryTerms, Map<String, TermQuery> relevantDocsTerms, Map<String, TermQuery> irrelevantDocsTerms) { // Add Terms of the relevant documents for (Map.Entry<String, TermQuery> e : queryTerms.entrySet()) { if (relevantDocsTerms.containsKey(e.getKey())) { TermQuery tq = relevantDocsTerms.get(e.getKey()); tq.setBoost(tq.getBoost() + e.getValue().getBoost()); } else { relevantDocsTerms.put(e.getKey(), e.getValue()); } } // Substract terms of irrelevant documents for (Map.Entry<String, TermQuery> e : irrelevantDocsTerms.entrySet()) { if (relevantDocsTerms.containsKey(e.getKey())) { TermQuery tq = relevantDocsTerms.get(e.getKey()); tq.setBoost(tq.getBoost() - e.getValue().getBoost()); } else { TermQuery tq = e.getValue(); tq.setBoost(-tq.getBoost()); relevantDocsTerms.put(e.getKey(), tq); } } return new ArrayList<>(relevantDocsTerms.values()); }
/** * Adjust termClaimsDescriptionAbstractTitle features of the docs with alpha * query; and beta; * and assign weights/boost to termClaimsDescriptionAbstractTitles (tf*idf). * * @param query * @param currentField * @param alpha * @param beta - factor of the equation * @param gamma * @param decay * @param maxExpandedQueryTerms - maximum number of termClaimsDescriptionAbstractTitles in * expanded query * @return expandedQuery with boost factors adjusted using Rocchio's algorithm * @throws IOException * @throws ParseException */ public Query adjust( Query query, String currentField, float alpha, float beta, float gamma, float decay, int maxExpandedQueryTerms) throws IOException, ParseException { Query expandedQuery; // setBoost of docs terms Map<String, TermQuery> relevantDocsTerms = setBoost(docsTermVectorReldocs, currentField, beta, decay); Map<String, TermQuery> irrrelevantDocsTerms = setBoost(docsTermVectorIrreldocs, currentField, gamma, decay); // Map<String, TermQuery> relevantDocsTerms = new HashMap<>(); // Map<String, TermQuery> irrrelevantDocsTerms = new HashMap<>(); // setBoost of query terms // Get queryTerms from the query // combine weights according to expansion formula List<TermQuery> expandedQueryTerms = combine(new HashMap<String, TermQuery>(), relevantDocsTerms, irrrelevantDocsTerms); // Sort by boost=weight Comparator comparator = new QueryBoostComparator(); Collections.sort(expandedQueryTerms, comparator); relevantDocsTerms.clear(); int termCount = Math.min(expandedQueryTerms.size(), maxExpandedQueryTerms); for (int i = 0; i < termCount; i++) { TermQuery tq = expandedQueryTerms.get(i); relevantDocsTerms.put(tq.getTerm().text(), tq); System.out.print(tq.getTerm().text() + ", "); } TermFreqVector queryTermsVector = new TermFreqVector(query); Map<String, TermQuery> queryTerms; queryTerms = setBoost(queryTermsVector, currentField, alpha); // List<TermQuery> queryTermsList=new ArrayList(queryTerms.values()); // Collections.sort(queryTermsList, comparator); // queryTerms.clear(); // for(TermQuery tq:queryTermsList){ // queryTerms.put(tq.getTerm().text(), tq); // } expandedQueryTerms = combine(queryTerms, relevantDocsTerms, new HashMap<String, TermQuery>()); Collections.sort(expandedQueryTerms, comparator); // Create Expanded Query expandedQuery = mergeQueries(expandedQueryTerms, Integer.MAX_VALUE); return expandedQuery; }
public Map<String, Float> getRocchioVector(String currentField) throws IOException { Map<String, Float> out = new HashMap<>(); float beta = parameters.get(RocchioQueryExpansion.ROCCHIO_BETA_FLD); float gamma = parameters.get(RocchioQueryExpansion.ROCCHIO_GAMMA_FLD); float decay = parameters.get(RocchioQueryExpansion.DECAY_FLD); Map<String, TermQuery> relevantDocsTerms = setBoost(docsTermVectorReldocs, currentField, beta, decay); Map<String, TermQuery> irrrelevantDocsTerms = setBoost(docsTermVectorIrreldocs, currentField, gamma, decay); List<TermQuery> expandedQueryTerms = combine( new HashMap<String, TermQuery>(), relevantDocsTerms, new HashMap<String, TermQuery>()); for (TermQuery tq : expandedQueryTerms) { out.put(tq.getTerm().text(), tq.getBoost()); } return out; }
public void testDMQ9() throws Exception { DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(new TermQuery(new Term(FIELD, "yy")), BooleanClause.Occur.SHOULD); TermQuery boostedQuery = new TermQuery(new Term(FIELD, "w5")); boostedQuery.setBoost(100); booleanQuery.add(boostedQuery, BooleanClause.Occur.SHOULD); q.add(booleanQuery); TermQuery xxBoostedQuery = new TermQuery(new Term(FIELD, "xx")); xxBoostedQuery.setBoost(0); q.add(xxBoostedQuery); qtest(q, new int[] {0, 2, 3}); }
/** * Sets boost of termClaimsDescriptionAbstractTitles. boost = weight = factor(tf*idf) * * @param vecsTerms * @param currentField * @param factor - adjustment factor ( ex. alpha or beta ) * @param decayFactor * @return * @throws java.io.IOException */ public Map<String, TermQuery> setBoost( Map<TermFreqVector, String> vecsTerms, String currentField, float factor, float decayFactor) throws IOException { Map<String, TermQuery> terms = new HashMap<>(); // setBoost for each of the terms of each of the docs int i = 0; float norm = (float) 1 / vecsTerms.size(); // System.out.println("--------------------------"); for (Map.Entry<TermFreqVector, String> e : vecsTerms.entrySet()) { // Increase decay String field = e.getValue(); TermFreqVector docTerms = e.getKey(); float decay = decayFactor * i; // Populate terms: with TermQuries and set boost for (String termTxt : docTerms.getTerms()) { // Create Term Term term = new Term(currentField, termTxt); // Calculate weight float tf = docTerms.getFreq(termTxt); // float idf = ir.docFreq(termTitle); int docs; float idf; if (sourceField.equals(PatentQuery.all)) { docs = ir.getDocCount(field); idf = (float) Math.log10((double) docs / (ir.docFreq(new Term(field, termTxt)) + 1)); } else { docs = ir.getDocCount(sourceField); idf = (float) Math.log10((double) docs / (ir.docFreq(new Term(sourceField, termTxt)) + 1)); } float weight = tf * idf; // System.out.println(term.text() + " -> tf= " + tf + " idf= " + idf + " // tfidf= " + weight); // Adjust weight by decay factor weight = weight - (weight * decay); // Create TermQuery and add it to the collection TermQuery termQuery = new TermQuery(term); // Calculate and set boost float boost; if (vecsTerms.size() == 1) { boost = factor * tf; } else { boost = factor; } if (boost != 0) { termQuery.setBoost(boost * norm); if (terms.containsKey(termTxt)) { TermQuery tq = terms.get(termTxt); tq.setBoost(tq.getBoost() + termQuery.getBoost()); } else { terms.put(termTxt, termQuery); } } } i++; } return terms; }
/** Add to an existing boolean query the More Like This query from this PriorityQueue */ private void addToQuery(PriorityQueue<ScoreTerm> q, BooleanQuery query) { ScoreTerm scoreTerm; float bestScore = -1; while ((scoreTerm = q.pop()) != null) { TermQuery tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word)); if (boost) { if (bestScore == -1) { bestScore = (scoreTerm.score); } float myScore = (scoreTerm.score); tq.setBoost(boostFactor * myScore / bestScore); } try { query.add(tq, BooleanClause.Occur.SHOULD); } catch (BooleanQuery.TooManyClauses ignore) { break; } } }
static void assertLateParsingQuery(Query query, String type, String id) throws IOException { assertThat(query, instanceOf(HasChildQueryBuilder.LateParsingQuery.class)); HasChildQueryBuilder.LateParsingQuery lateParsingQuery = (HasChildQueryBuilder.LateParsingQuery) query; assertThat(lateParsingQuery.getInnerQuery(), instanceOf(BooleanQuery.class)); BooleanQuery booleanQuery = (BooleanQuery) lateParsingQuery.getInnerQuery(); assertThat(booleanQuery.clauses().size(), equalTo(2)); // check the inner ids query, we have to call rewrite to get to check the type it's executed // against assertThat(booleanQuery.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.MUST)); assertThat(booleanQuery.clauses().get(0).getQuery(), instanceOf(TermsQuery.class)); TermsQuery termsQuery = (TermsQuery) booleanQuery.clauses().get(0).getQuery(); Query rewrittenTermsQuery = termsQuery.rewrite(null); assertThat(rewrittenTermsQuery, instanceOf(ConstantScoreQuery.class)); ConstantScoreQuery constantScoreQuery = (ConstantScoreQuery) rewrittenTermsQuery; assertThat(constantScoreQuery.getQuery(), instanceOf(BooleanQuery.class)); BooleanQuery booleanTermsQuery = (BooleanQuery) constantScoreQuery.getQuery(); assertThat(booleanTermsQuery.clauses().size(), equalTo(1)); assertThat(booleanTermsQuery.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.SHOULD)); assertThat(booleanTermsQuery.clauses().get(0).getQuery(), instanceOf(TermQuery.class)); TermQuery termQuery = (TermQuery) booleanTermsQuery.clauses().get(0).getQuery(); assertThat(termQuery.getTerm().field(), equalTo(UidFieldMapper.NAME)); // we want to make sure that the inner ids query gets executed against the child type rather // than the main type we initially set to the context BytesRef[] ids = Uid.createUidsForTypesAndIds( Collections.singletonList(type), Collections.singletonList(id)); assertThat(termQuery.getTerm().bytes(), equalTo(ids[0])); // check the type filter assertThat(booleanQuery.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.FILTER)); assertThat(booleanQuery.clauses().get(1).getQuery(), instanceOf(ConstantScoreQuery.class)); ConstantScoreQuery typeConstantScoreQuery = (ConstantScoreQuery) booleanQuery.clauses().get(1).getQuery(); assertThat(typeConstantScoreQuery.getQuery(), instanceOf(TermQuery.class)); TermQuery typeTermQuery = (TermQuery) typeConstantScoreQuery.getQuery(); assertThat(typeTermQuery.getTerm().field(), equalTo(TypeFieldMapper.NAME)); assertThat(typeTermQuery.getTerm().text(), equalTo(type)); }
public void testT2() throws Exception { TermQuery termQuery = new TermQuery(new Term(FIELD, "w1")); termQuery.setBoost(100); qtest(termQuery, new int[] {0, 1, 2, 3}); }
public void testFQ4() throws Exception { TermQuery termQuery = new TermQuery(new Term(FIELD, "xx")); termQuery.setBoost(1000); qtest(new FilteredQuery(termQuery, new ItemizedFilter(new int[] {1, 3})), new int[] {3}); }