/**
   * Sets boost of termClaimsDescriptionAbstractTitles. boost = weight = factor(tf*idf)
   *
   * @param vecsTerms
   * @param currentField
   * @param factor - adjustment factor ( ex. alpha or beta )
   * @param decayFactor
   * @return
   * @throws java.io.IOException
   */
  public Map<String, TermQuery> setBoost(
      Map<TermFreqVector, String> vecsTerms, String currentField, float factor, float decayFactor)
      throws IOException {
    Map<String, TermQuery> terms = new HashMap<>();
    // setBoost for each of the terms of each of the docs
    int i = 0;
    float norm = (float) 1 / vecsTerms.size();
    //        System.out.println("--------------------------");
    for (Map.Entry<TermFreqVector, String> e : vecsTerms.entrySet()) {
      // Increase decay
      String field = e.getValue();
      TermFreqVector docTerms = e.getKey();
      float decay = decayFactor * i;
      // Populate terms: with TermQuries and set boost
      for (String termTxt : docTerms.getTerms()) {
        // Create Term
        Term term = new Term(currentField, termTxt);
        // Calculate weight
        float tf = docTerms.getFreq(termTxt);
        //                float idf = ir.docFreq(termTitle);
        int docs;
        float idf;
        if (sourceField.equals(PatentQuery.all)) {
          docs = ir.getDocCount(field);
          idf = (float) Math.log10((double) docs / (ir.docFreq(new Term(field, termTxt)) + 1));
        } else {
          docs = ir.getDocCount(sourceField);
          idf =
              (float) Math.log10((double) docs / (ir.docFreq(new Term(sourceField, termTxt)) + 1));
        }
        float weight = tf * idf;

        //                System.out.println(term.text() + " -> tf= " + tf + " idf= " + idf + "
        // tfidf= " + weight);
        // Adjust weight by decay factor
        weight = weight - (weight * decay);
        // Create TermQuery and add it to the collection
        TermQuery termQuery = new TermQuery(term);
        // Calculate and set boost
        float boost;
        if (vecsTerms.size() == 1) {
          boost = factor * tf;
        } else {
          boost = factor;
        }

        if (boost != 0) {
          termQuery.setBoost(boost * norm);
          if (terms.containsKey(termTxt)) {
            TermQuery tq = terms.get(termTxt);
            tq.setBoost(tq.getBoost() + termQuery.getBoost());
          } else {
            terms.put(termTxt, termQuery);
          }
        }
      }
      i++;
    }
    return terms;
  }
 /**
  * combine weights according to expansion formula
  *
  * @param queryTerms
  * @param relevantDocsTerms
  * @param irrelevantDocsTerms
  * @return
  */
 public List<TermQuery> combine(
     Map<String, TermQuery> queryTerms,
     Map<String, TermQuery> relevantDocsTerms,
     Map<String, TermQuery> irrelevantDocsTerms) {
   // Add Terms of the relevant documents
   for (Map.Entry<String, TermQuery> e : queryTerms.entrySet()) {
     if (relevantDocsTerms.containsKey(e.getKey())) {
       TermQuery tq = relevantDocsTerms.get(e.getKey());
       tq.setBoost(tq.getBoost() + e.getValue().getBoost());
     } else {
       relevantDocsTerms.put(e.getKey(), e.getValue());
     }
   }
   // Substract terms of irrelevant documents
   for (Map.Entry<String, TermQuery> e : irrelevantDocsTerms.entrySet()) {
     if (relevantDocsTerms.containsKey(e.getKey())) {
       TermQuery tq = relevantDocsTerms.get(e.getKey());
       tq.setBoost(tq.getBoost() - e.getValue().getBoost());
     } else {
       TermQuery tq = e.getValue();
       tq.setBoost(-tq.getBoost());
       relevantDocsTerms.put(e.getKey(), tq);
     }
   }
   return new ArrayList<>(relevantDocsTerms.values());
 }
Пример #3
0
  /** Add to an existing boolean query the More Like This query from this PriorityQueue */
  private void addToQuery(PriorityQueue<ScoreTerm> q, BooleanQuery query) {
    ScoreTerm scoreTerm;
    float bestScore = -1;

    while ((scoreTerm = q.pop()) != null) {
      TermQuery tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word));

      if (boost) {
        if (bestScore == -1) {
          bestScore = (scoreTerm.score);
        }
        float myScore = (scoreTerm.score);
        tq.setBoost(boostFactor * myScore / bestScore);
      }

      try {
        query.add(tq, BooleanClause.Occur.SHOULD);
      } catch (BooleanQuery.TooManyClauses ignore) {
        break;
      }
    }
  }