/** * Set the original query terms. * * @param query The original query. */ public void setOriginalQueryTerms(MatchingQueryTerms query) { String[] terms = query.getTerms(); this.originalTermids.clear(); for (int i = 0; i < terms.length; i++) { EntryStatistics te = query.getStatistics(terms[i]); if (te != null) { this.originalTermids.put(te.getTermId(), terms[i]); this.originalTermFreqs.adjustOrPutValue( te.getTermId(), query.getTermWeight(terms[i]), query.getTermWeight(terms[i])); } } }
/** * This method implements the functionality of expanding a query. * * @param query MatchingQueryTerms the query terms of the original query. * @param rq the Request thus far, giving access to the query and the result set */ public void expandQuery(MatchingQueryTerms query, Request rq) throws IOException { // the number of term to re-weight (i.e. to do relevance feedback) is // the maximum between the system setting and the actual query length. // if the query length is larger than the system setting, it does not // make sense to do relevance feedback for a portion of the query. Therefore, // we re-weight the number of query length of terms. int numberOfTermsToReweight = Math.max(ApplicationSetup.EXPANSION_TERMS, query.length()); if (ApplicationSetup.EXPANSION_TERMS == 0) numberOfTermsToReweight = 0; if (selector == null) selector = this.getFeedbackSelector(rq); if (selector == null) return; FeedbackDocument[] feedback = selector.getFeedbackDocuments(rq); if (feedback == null || feedback.length == 0) return; // double totalDocumentLength = 0; // for(FeedbackDocument doc : feedback) // { // totalDocumentLength += documentIndex.getDocumentLength(doc.docid); // if(logger.isDebugEnabled()){ // logger.debug(doc.rank +": " + metaIndex.getItem("docno", doc.docid)+ // " ("+doc.docid+") with "+doc.score); // } // } ExpansionTerms expansionTerms = getExpansionTerms(); expansionTerms.setModel(QEModel); for (FeedbackDocument doc : feedback) { expansionTerms.insertDocument(doc); } logger.debug( "Selecting " + numberOfTermsToReweight + " from " + expansionTerms.getNumberOfUniqueTerms()); expansionTerms.setOriginalQueryTerms(query); SingleTermQuery[] expandedTerms = expansionTerms.getExpandedTerms(numberOfTermsToReweight); for (int i = 0; i < expandedTerms.length; i++) { SingleTermQuery expandedTerm = expandedTerms[i]; query.addTermPropertyWeight(expandedTerm.getTerm(), expandedTerm.getWeight()); if (logger.isDebugEnabled()) { logger.debug( "term " + expandedTerms[i].getTerm() + " appears in expanded query with normalised weight: " + Rounding.toString(query.getTermWeight(expandedTerms[i].getTerm()), 4)); } } }
/* * (non-Javadoc) * * @see org.irlib.features.Feature#computeValue() */ @Override public Collection<? extends Double> computeValue() { outputFeatures.removeAllElements(); double ctf = 1; for (String term : terms.getTerms()) { if (index.getLexicon().getLexiconEntry(term) != null) ctf *= (double) index.getLexicon().getLexiconEntry(term).getFrequency() / (double) totalNumberOfTokens; } outputFeatures.add((Math.log(ctf) / Math.log(2)) / terms.length()); return outputFeatures; }
/** * Runs the actual query expansion * * @see * org.terrier.querying.PostProcess#process(org.terrier.querying.Manager,org.terrier.querying.SearchRequest) */ public void process(Manager manager, SearchRequest q) { Index index = getIndex(manager); lastIndex = index; documentIndex = index.getDocumentIndex(); invertedIndex = index.getInvertedIndex(); lexicon = index.getLexicon(); collStats = index.getCollectionStatistics(); directIndex = index.getDirectIndex(); metaIndex = index.getMetaIndex(); if (directIndex == null) { logger.error("This index does not have a direct index. Query expansion disabled!!"); return; } logger.debug("Starting query expansion post-processing."); // get the query expansion model to use String qeModel = q.getControl("qemodel"); if (qeModel == null || qeModel.length() == 0) { logger.warn( "qemodel control not set for QueryExpansion" + " post process. Using default model Bo1"); qeModel = "Bo1"; } setQueryExpansionModel(getQueryExpansionModel(qeModel)); if (logger.isDebugEnabled()) { logger.info("query expansion model: " + QEModel.getInfo()); } MatchingQueryTerms queryTerms = ((Request) q).getMatchingQueryTerms(); if (queryTerms == null) { logger.warn("No query terms for this query. Skipping QE"); return; } // get the expanded query terms try { expandQuery(queryTerms, (Request) q); } catch (IOException ioe) { logger.error("IOException while expanding query, skipping QE", ioe); return; } if (logger.isDebugEnabled()) { logger.info("query length after expansion: " + queryTerms.length()); logger.info("Expanded query: "); } final String[] newQueryTerms = queryTerms.getTerms(); StringBuilder newQuery = new StringBuilder(); for (int i = 0; i < newQueryTerms.length; i++) { try { if (logger.isDebugEnabled()) { logger.info( (i + 1) + ": " + newQueryTerms[i] + ", normalisedFrequency: " + Rounding.toString(queryTerms.getTermWeight(newQueryTerms[i]), 4)); } newQuery.append(newQueryTerms[i]); newQuery.append('^'); newQuery.append(Rounding.toString(queryTerms.getTermWeight(newQueryTerms[i]), 9)); newQuery.append(' '); } catch (NullPointerException npe) { logger.error("Nullpointer exception occured in Query Expansion dumping of new Query", npe); } } logger.debug("NEWQUERY " + q.getQueryID() + " " + newQuery.toString()); lastExpandedQuery = newQuery.toString(); q.setControl("QE.ExpandedQuery", newQuery.toString()); final boolean no2ndPass = Boolean.parseBoolean(ApplicationSetup.getProperty("qe.no.2nd.matching", "false")); if (no2ndPass) { return; } // run retrieval process again for the expanded query logger.info("Accessing inverted file for expanded query " + q.getQueryID()); manager.runMatching(q); }