/**
  * Set the original query terms.
  *
  * @param query The original query.
  */
 public void setOriginalQueryTerms(MatchingQueryTerms query) {
   String[] terms = query.getTerms();
   this.originalTermids.clear();
   for (int i = 0; i < terms.length; i++) {
     EntryStatistics te = query.getStatistics(terms[i]);
     if (te != null) {
       this.originalTermids.put(te.getTermId(), terms[i]);
       this.originalTermFreqs.adjustOrPutValue(
           te.getTermId(), query.getTermWeight(terms[i]), query.getTermWeight(terms[i]));
     }
   }
 }
  /**
   * This method implements the functionality of expanding a query.
   *
   * @param query MatchingQueryTerms the query terms of the original query.
   * @param rq the Request thus far, giving access to the query and the result set
   */
  public void expandQuery(MatchingQueryTerms query, Request rq) throws IOException {
    // the number of term to re-weight (i.e. to do relevance feedback) is
    // the maximum between the system setting and the actual query length.
    // if the query length is larger than the system setting, it does not
    // make sense to do relevance feedback for a portion of the query. Therefore,
    // we re-weight the number of query length of terms.
    int numberOfTermsToReweight = Math.max(ApplicationSetup.EXPANSION_TERMS, query.length());
    if (ApplicationSetup.EXPANSION_TERMS == 0) numberOfTermsToReweight = 0;

    if (selector == null) selector = this.getFeedbackSelector(rq);
    if (selector == null) return;
    FeedbackDocument[] feedback = selector.getFeedbackDocuments(rq);
    if (feedback == null || feedback.length == 0) return;

    // double totalDocumentLength = 0;
    // for(FeedbackDocument doc : feedback)
    // {
    // totalDocumentLength += documentIndex.getDocumentLength(doc.docid);

    //	if(logger.isDebugEnabled()){
    //		logger.debug(doc.rank +": " + metaIndex.getItem("docno", doc.docid)+
    //			" ("+doc.docid+") with "+doc.score);
    //	}
    // }
    ExpansionTerms expansionTerms = getExpansionTerms();
    expansionTerms.setModel(QEModel);

    for (FeedbackDocument doc : feedback) {
      expansionTerms.insertDocument(doc);
    }
    logger.debug(
        "Selecting "
            + numberOfTermsToReweight
            + " from "
            + expansionTerms.getNumberOfUniqueTerms());

    expansionTerms.setOriginalQueryTerms(query);
    SingleTermQuery[] expandedTerms = expansionTerms.getExpandedTerms(numberOfTermsToReweight);
    for (int i = 0; i < expandedTerms.length; i++) {
      SingleTermQuery expandedTerm = expandedTerms[i];
      query.addTermPropertyWeight(expandedTerm.getTerm(), expandedTerm.getWeight());
      if (logger.isDebugEnabled()) {
        logger.debug(
            "term "
                + expandedTerms[i].getTerm()
                + " appears in expanded query with normalised weight: "
                + Rounding.toString(query.getTermWeight(expandedTerms[i].getTerm()), 4));
      }
    }
  }
示例#3
0
  /*
   * (non-Javadoc)
   *
   * @see org.irlib.features.Feature#computeValue()
   */
  @Override
  public Collection<? extends Double> computeValue() {
    outputFeatures.removeAllElements();
    double ctf = 1;
    for (String term : terms.getTerms()) {
      if (index.getLexicon().getLexiconEntry(term) != null)
        ctf *=
            (double) index.getLexicon().getLexiconEntry(term).getFrequency()
                / (double) totalNumberOfTokens;
    }

    outputFeatures.add((Math.log(ctf) / Math.log(2)) / terms.length());
    return outputFeatures;
  }
  /**
   * Runs the actual query expansion
   *
   * @see
   *     org.terrier.querying.PostProcess#process(org.terrier.querying.Manager,org.terrier.querying.SearchRequest)
   */
  public void process(Manager manager, SearchRequest q) {
    Index index = getIndex(manager);
    lastIndex = index;
    documentIndex = index.getDocumentIndex();
    invertedIndex = index.getInvertedIndex();
    lexicon = index.getLexicon();
    collStats = index.getCollectionStatistics();
    directIndex = index.getDirectIndex();
    metaIndex = index.getMetaIndex();
    if (directIndex == null) {
      logger.error("This index does not have a direct index. Query expansion disabled!!");
      return;
    }
    logger.debug("Starting query expansion post-processing.");
    // get the query expansion model to use
    String qeModel = q.getControl("qemodel");
    if (qeModel == null || qeModel.length() == 0) {
      logger.warn(
          "qemodel control not set for QueryExpansion" + " post process. Using default model Bo1");
      qeModel = "Bo1";
    }
    setQueryExpansionModel(getQueryExpansionModel(qeModel));
    if (logger.isDebugEnabled()) {
      logger.info("query expansion model: " + QEModel.getInfo());
    }
    MatchingQueryTerms queryTerms = ((Request) q).getMatchingQueryTerms();
    if (queryTerms == null) {
      logger.warn("No query terms for this query. Skipping QE");
      return;
    }
    // get the expanded query terms
    try {
      expandQuery(queryTerms, (Request) q);
    } catch (IOException ioe) {
      logger.error("IOException while expanding query, skipping QE", ioe);
      return;
    }
    if (logger.isDebugEnabled()) {
      logger.info("query length after expansion: " + queryTerms.length());
      logger.info("Expanded query: ");
    }
    final String[] newQueryTerms = queryTerms.getTerms();
    StringBuilder newQuery = new StringBuilder();
    for (int i = 0; i < newQueryTerms.length; i++) {
      try {
        if (logger.isDebugEnabled()) {
          logger.info(
              (i + 1)
                  + ": "
                  + newQueryTerms[i]
                  + ", normalisedFrequency: "
                  + Rounding.toString(queryTerms.getTermWeight(newQueryTerms[i]), 4));
        }
        newQuery.append(newQueryTerms[i]);
        newQuery.append('^');
        newQuery.append(Rounding.toString(queryTerms.getTermWeight(newQueryTerms[i]), 9));
        newQuery.append(' ');
      } catch (NullPointerException npe) {
        logger.error("Nullpointer exception occured in Query Expansion dumping of new Query", npe);
      }
    }

    logger.debug("NEWQUERY " + q.getQueryID() + " " + newQuery.toString());
    lastExpandedQuery = newQuery.toString();
    q.setControl("QE.ExpandedQuery", newQuery.toString());
    final boolean no2ndPass =
        Boolean.parseBoolean(ApplicationSetup.getProperty("qe.no.2nd.matching", "false"));
    if (no2ndPass) {
      return;
    }

    // run retrieval process again for the expanded query
    logger.info("Accessing inverted file for expanded query " + q.getQueryID());
    manager.runMatching(q);
  }