/** * Adjust termClaimsDescriptionAbstractTitle features of the docs with alpha * query; and beta; * and assign weights/boost to termClaimsDescriptionAbstractTitles (tf*idf). * * @param query * @param currentField * @param alpha * @param beta - factor of the equation * @param gamma * @param decay * @param maxExpandedQueryTerms - maximum number of termClaimsDescriptionAbstractTitles in * expanded query * @return expandedQuery with boost factors adjusted using Rocchio's algorithm * @throws IOException * @throws ParseException */ public Query adjust( Query query, String currentField, float alpha, float beta, float gamma, float decay, int maxExpandedQueryTerms) throws IOException, ParseException { Query expandedQuery; // setBoost of docs terms Map<String, TermQuery> relevantDocsTerms = setBoost(docsTermVectorReldocs, currentField, beta, decay); Map<String, TermQuery> irrrelevantDocsTerms = setBoost(docsTermVectorIrreldocs, currentField, gamma, decay); // Map<String, TermQuery> relevantDocsTerms = new HashMap<>(); // Map<String, TermQuery> irrrelevantDocsTerms = new HashMap<>(); // setBoost of query terms // Get queryTerms from the query // combine weights according to expansion formula List<TermQuery> expandedQueryTerms = combine(new HashMap<String, TermQuery>(), relevantDocsTerms, irrrelevantDocsTerms); // Sort by boost=weight Comparator comparator = new QueryBoostComparator(); Collections.sort(expandedQueryTerms, comparator); relevantDocsTerms.clear(); int termCount = Math.min(expandedQueryTerms.size(), maxExpandedQueryTerms); for (int i = 0; i < termCount; i++) { TermQuery tq = expandedQueryTerms.get(i); relevantDocsTerms.put(tq.getTerm().text(), tq); System.out.print(tq.getTerm().text() + ", "); } TermFreqVector queryTermsVector = new TermFreqVector(query); Map<String, TermQuery> queryTerms; queryTerms = setBoost(queryTermsVector, currentField, alpha); // List<TermQuery> queryTermsList=new ArrayList(queryTerms.values()); // Collections.sort(queryTermsList, comparator); // queryTerms.clear(); // for(TermQuery tq:queryTermsList){ // queryTerms.put(tq.getTerm().text(), tq); // } expandedQueryTerms = combine(queryTerms, relevantDocsTerms, new HashMap<String, TermQuery>()); Collections.sort(expandedQueryTerms, comparator); // Create Expanded Query expandedQuery = mergeQueries(expandedQueryTerms, Integer.MAX_VALUE); return expandedQuery; }
public Map<String, Float> getRocchioVector(String currentField) throws IOException { Map<String, Float> out = new HashMap<>(); float beta = parameters.get(RocchioQueryExpansion.ROCCHIO_BETA_FLD); float gamma = parameters.get(RocchioQueryExpansion.ROCCHIO_GAMMA_FLD); float decay = parameters.get(RocchioQueryExpansion.DECAY_FLD); Map<String, TermQuery> relevantDocsTerms = setBoost(docsTermVectorReldocs, currentField, beta, decay); Map<String, TermQuery> irrrelevantDocsTerms = setBoost(docsTermVectorIrreldocs, currentField, gamma, decay); List<TermQuery> expandedQueryTerms = combine( new HashMap<String, TermQuery>(), relevantDocsTerms, new HashMap<String, TermQuery>()); for (TermQuery tq : expandedQueryTerms) { out.put(tq.getTerm().text(), tq.getBoost()); } return out; }