/** * combine weights according to expansion formula * * @param queryTerms * @param relevantDocsTerms * @param irrelevantDocsTerms * @return */ public List<TermQuery> combine( Map<String, TermQuery> queryTerms, Map<String, TermQuery> relevantDocsTerms, Map<String, TermQuery> irrelevantDocsTerms) { // Add Terms of the relevant documents for (Map.Entry<String, TermQuery> e : queryTerms.entrySet()) { if (relevantDocsTerms.containsKey(e.getKey())) { TermQuery tq = relevantDocsTerms.get(e.getKey()); tq.setBoost(tq.getBoost() + e.getValue().getBoost()); } else { relevantDocsTerms.put(e.getKey(), e.getValue()); } } // Substract terms of irrelevant documents for (Map.Entry<String, TermQuery> e : irrelevantDocsTerms.entrySet()) { if (relevantDocsTerms.containsKey(e.getKey())) { TermQuery tq = relevantDocsTerms.get(e.getKey()); tq.setBoost(tq.getBoost() - e.getValue().getBoost()); } else { TermQuery tq = e.getValue(); tq.setBoost(-tq.getBoost()); relevantDocsTerms.put(e.getKey(), tq); } } return new ArrayList<>(relevantDocsTerms.values()); }
/** * Sets boost of termClaimsDescriptionAbstractTitles. boost = weight = factor(tf*idf) * * @param vecsTerms * @param currentField * @param factor - adjustment factor ( ex. alpha or beta ) * @param decayFactor * @return * @throws java.io.IOException */ public Map<String, TermQuery> setBoost( Map<TermFreqVector, String> vecsTerms, String currentField, float factor, float decayFactor) throws IOException { Map<String, TermQuery> terms = new HashMap<>(); // setBoost for each of the terms of each of the docs int i = 0; float norm = (float) 1 / vecsTerms.size(); // System.out.println("--------------------------"); for (Map.Entry<TermFreqVector, String> e : vecsTerms.entrySet()) { // Increase decay String field = e.getValue(); TermFreqVector docTerms = e.getKey(); float decay = decayFactor * i; // Populate terms: with TermQuries and set boost for (String termTxt : docTerms.getTerms()) { // Create Term Term term = new Term(currentField, termTxt); // Calculate weight float tf = docTerms.getFreq(termTxt); // float idf = ir.docFreq(termTitle); int docs; float idf; if (sourceField.equals(PatentQuery.all)) { docs = ir.getDocCount(field); idf = (float) Math.log10((double) docs / (ir.docFreq(new Term(field, termTxt)) + 1)); } else { docs = ir.getDocCount(sourceField); idf = (float) Math.log10((double) docs / (ir.docFreq(new Term(sourceField, termTxt)) + 1)); } float weight = tf * idf; // System.out.println(term.text() + " -> tf= " + tf + " idf= " + idf + " // tfidf= " + weight); // Adjust weight by decay factor weight = weight - (weight * decay); // Create TermQuery and add it to the collection TermQuery termQuery = new TermQuery(term); // Calculate and set boost float boost; if (vecsTerms.size() == 1) { boost = factor * tf; } else { boost = factor; } if (boost != 0) { termQuery.setBoost(boost * norm); if (terms.containsKey(termTxt)) { TermQuery tq = terms.get(termTxt); tq.setBoost(tq.getBoost() + termQuery.getBoost()); } else { terms.put(termTxt, termQuery); } } } i++; } return terms; }
public void testFQ4() throws Exception { TermQuery termQuery = new TermQuery(new Term(FIELD, "xx")); termQuery.setBoost(1000); qtest( new FilteredQuery(termQuery, new QueryWrapperFilter(matchTheseItems(new int[] {1, 3}))), new int[] {3}); }
public void testDMQ9() throws Exception { DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(new TermQuery(new Term(FIELD, "yy")), BooleanClause.Occur.SHOULD); TermQuery boostedQuery = new TermQuery(new Term(FIELD, "w5")); boostedQuery.setBoost(100); booleanQuery.add(boostedQuery, BooleanClause.Occur.SHOULD); q.add(booleanQuery); TermQuery xxBoostedQuery = new TermQuery(new Term(FIELD, "xx")); xxBoostedQuery.setBoost(0); q.add(xxBoostedQuery); qtest(q, new int[] {0, 2, 3}); }
public void testBQ11() throws Exception { BooleanQuery query = new BooleanQuery(); query.add(new TermQuery(new Term(FIELD, "w1")), BooleanClause.Occur.SHOULD); TermQuery boostedQuery = new TermQuery(new Term(FIELD, "w1")); boostedQuery.setBoost(1000); query.add(boostedQuery, BooleanClause.Occur.SHOULD); qtest(query, new int[] {0, 1, 2, 3}); }
/** Add to an existing boolean query the More Like This query from this PriorityQueue */ private void addToQuery(PriorityQueue<ScoreTerm> q, BooleanQuery query) { ScoreTerm scoreTerm; float bestScore = -1; while ((scoreTerm = q.pop()) != null) { TermQuery tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word)); if (boost) { if (bestScore == -1) { bestScore = (scoreTerm.score); } float myScore = (scoreTerm.score); tq.setBoost(boostFactor * myScore / bestScore); } try { query.add(tq, BooleanClause.Occur.SHOULD); } catch (BooleanQuery.TooManyClauses ignore) { break; } } }
public void testT2() throws Exception { TermQuery termQuery = new TermQuery(new Term(FIELD, "w1")); termQuery.setBoost(100); qtest(termQuery, new int[] {0, 1, 2, 3}); }
public void testFQ4() throws Exception { TermQuery termQuery = new TermQuery(new Term(FIELD, "xx")); termQuery.setBoost(1000); qtest(new FilteredQuery(termQuery, new ItemizedFilter(new int[] {1, 3})), new int[] {3}); }