/**
  * combine weights according to expansion formula
  *
  * @param queryTerms
  * @param relevantDocsTerms
  * @param irrelevantDocsTerms
  * @return
  */
 public List<TermQuery> combine(
     Map<String, TermQuery> queryTerms,
     Map<String, TermQuery> relevantDocsTerms,
     Map<String, TermQuery> irrelevantDocsTerms) {
   // Add Terms of the relevant documents
   for (Map.Entry<String, TermQuery> e : queryTerms.entrySet()) {
     if (relevantDocsTerms.containsKey(e.getKey())) {
       TermQuery tq = relevantDocsTerms.get(e.getKey());
       tq.setBoost(tq.getBoost() + e.getValue().getBoost());
     } else {
       relevantDocsTerms.put(e.getKey(), e.getValue());
     }
   }
   // Substract terms of irrelevant documents
   for (Map.Entry<String, TermQuery> e : irrelevantDocsTerms.entrySet()) {
     if (relevantDocsTerms.containsKey(e.getKey())) {
       TermQuery tq = relevantDocsTerms.get(e.getKey());
       tq.setBoost(tq.getBoost() - e.getValue().getBoost());
     } else {
       TermQuery tq = e.getValue();
       tq.setBoost(-tq.getBoost());
       relevantDocsTerms.put(e.getKey(), tq);
     }
   }
   return new ArrayList<>(relevantDocsTerms.values());
 }
  /**
   * Sets boost of termClaimsDescriptionAbstractTitles. boost = weight = factor(tf*idf)
   *
   * @param vecsTerms
   * @param currentField
   * @param factor - adjustment factor ( ex. alpha or beta )
   * @param decayFactor
   * @return
   * @throws java.io.IOException
   */
  public Map<String, TermQuery> setBoost(
      Map<TermFreqVector, String> vecsTerms, String currentField, float factor, float decayFactor)
      throws IOException {
    Map<String, TermQuery> terms = new HashMap<>();
    // setBoost for each of the terms of each of the docs
    int i = 0;
    float norm = (float) 1 / vecsTerms.size();
    //        System.out.println("--------------------------");
    for (Map.Entry<TermFreqVector, String> e : vecsTerms.entrySet()) {
      // Increase decay
      String field = e.getValue();
      TermFreqVector docTerms = e.getKey();
      float decay = decayFactor * i;
      // Populate terms: with TermQuries and set boost
      for (String termTxt : docTerms.getTerms()) {
        // Create Term
        Term term = new Term(currentField, termTxt);
        // Calculate weight
        float tf = docTerms.getFreq(termTxt);
        //                float idf = ir.docFreq(termTitle);
        int docs;
        float idf;
        if (sourceField.equals(PatentQuery.all)) {
          docs = ir.getDocCount(field);
          idf = (float) Math.log10((double) docs / (ir.docFreq(new Term(field, termTxt)) + 1));
        } else {
          docs = ir.getDocCount(sourceField);
          idf =
              (float) Math.log10((double) docs / (ir.docFreq(new Term(sourceField, termTxt)) + 1));
        }
        float weight = tf * idf;

        //                System.out.println(term.text() + " -> tf= " + tf + " idf= " + idf + "
        // tfidf= " + weight);
        // Adjust weight by decay factor
        weight = weight - (weight * decay);
        // Create TermQuery and add it to the collection
        TermQuery termQuery = new TermQuery(term);
        // Calculate and set boost
        float boost;
        if (vecsTerms.size() == 1) {
          boost = factor * tf;
        } else {
          boost = factor;
        }

        if (boost != 0) {
          termQuery.setBoost(boost * norm);
          if (terms.containsKey(termTxt)) {
            TermQuery tq = terms.get(termTxt);
            tq.setBoost(tq.getBoost() + termQuery.getBoost());
          } else {
            terms.put(termTxt, termQuery);
          }
        }
      }
      i++;
    }
    return terms;
  }
Exemplo n.º 3
0
  private static JsonObject convertTermQuery(TermQuery termQuery) {
    String field = termQuery.getTerm().field();

    JsonObject matchQuery =
        JsonBuilder.object()
            .add(
                "term",
                JsonBuilder.object()
                    .add(
                        field,
                        JsonBuilder.object()
                            .addProperty("value", termQuery.getTerm().text())
                            .addProperty("boost", termQuery.getBoost())))
            .build();

    return wrapQueryForNestedIfRequired(field, matchQuery);
  }
 public Map<String, Float> getRocchioVector(String currentField) throws IOException {
   Map<String, Float> out = new HashMap<>();
   float beta = parameters.get(RocchioQueryExpansion.ROCCHIO_BETA_FLD);
   float gamma = parameters.get(RocchioQueryExpansion.ROCCHIO_GAMMA_FLD);
   float decay = parameters.get(RocchioQueryExpansion.DECAY_FLD);
   Map<String, TermQuery> relevantDocsTerms =
       setBoost(docsTermVectorReldocs, currentField, beta, decay);
   Map<String, TermQuery> irrrelevantDocsTerms =
       setBoost(docsTermVectorIrreldocs, currentField, gamma, decay);
   List<TermQuery> expandedQueryTerms =
       combine(
           new HashMap<String, TermQuery>(), relevantDocsTerms, new HashMap<String, TermQuery>());
   for (TermQuery tq : expandedQueryTerms) {
     out.put(tq.getTerm().text(), tq.getBoost());
   }
   return out;
 }
Exemplo n.º 5
0
  private void _includeIfUnique(
      BooleanQuery booleanQuery,
      boolean like,
      QueryParser queryParser,
      Query query,
      BooleanClause.Occur occur) {

    if (query instanceof TermQuery) {
      Set<Term> terms = new HashSet<Term>();

      TermQuery termQuery = (TermQuery) query;

      termQuery.extractTerms(terms);

      float boost = termQuery.getBoost();

      for (Term term : terms) {
        String termValue = term.text();

        if (like) {
          termValue = termValue.toLowerCase(queryParser.getLocale());

          term = term.createTerm(StringPool.STAR.concat(termValue).concat(StringPool.STAR));

          query = new WildcardQuery(term);
        } else {
          query = new TermQuery(term);
        }

        query.setBoost(boost);

        boolean included = false;

        for (BooleanClause booleanClause : booleanQuery.getClauses()) {
          if (query.equals(booleanClause.getQuery())) {
            included = true;
          }
        }

        if (!included) {
          booleanQuery.add(query, occur);
        }
      }
    } else if (query instanceof BooleanQuery) {
      BooleanQuery curBooleanQuery = (BooleanQuery) query;

      BooleanQuery containerBooleanQuery = new BooleanQuery();

      for (BooleanClause booleanClause : curBooleanQuery.getClauses()) {
        _includeIfUnique(
            containerBooleanQuery,
            like,
            queryParser,
            booleanClause.getQuery(),
            booleanClause.getOccur());
      }

      if (containerBooleanQuery.getClauses().length > 0) {
        booleanQuery.add(containerBooleanQuery, occur);
      }
    } else {
      boolean included = false;

      for (BooleanClause booleanClause : booleanQuery.getClauses()) {
        if (query.equals(booleanClause.getQuery())) {
          included = true;
        }
      }

      if (!included) {
        booleanQuery.add(query, occur);
      }
    }
  }
  public void testStarParsing() throws Exception {
    final int[] type = new int[1];
    QueryParser qp =
        new QueryParser(
            TEST_VERSION_CURRENT,
            "field",
            new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)) {
          @Override
          protected Query getWildcardQuery(String field, String termStr) throws ParseException {
            // override error checking of superclass
            type[0] = 1;
            return new TermQuery(new Term(field, termStr));
          }

          @Override
          protected Query getPrefixQuery(String field, String termStr) throws ParseException {
            // override error checking of superclass
            type[0] = 2;
            return new TermQuery(new Term(field, termStr));
          }

          @Override
          protected Query getFieldQuery(String field, String queryText, boolean quoted)
              throws ParseException {
            type[0] = 3;
            return super.getFieldQuery(field, queryText, quoted);
          }
        };

    TermQuery tq;

    tq = (TermQuery) qp.parse("foo:zoo*");
    assertEquals("zoo", tq.getTerm().text());
    assertEquals(2, type[0]);

    tq = (TermQuery) qp.parse("foo:zoo*^2");
    assertEquals("zoo", tq.getTerm().text());
    assertEquals(2, type[0]);
    assertEquals(tq.getBoost(), 2, 0);

    tq = (TermQuery) qp.parse("foo:*");
    assertEquals("*", tq.getTerm().text());
    assertEquals(1, type[0]); // could be a valid prefix query in the future too

    tq = (TermQuery) qp.parse("foo:*^2");
    assertEquals("*", tq.getTerm().text());
    assertEquals(1, type[0]);
    assertEquals(tq.getBoost(), 2, 0);

    tq = (TermQuery) qp.parse("*:foo");
    assertEquals("*", tq.getTerm().field());
    assertEquals("foo", tq.getTerm().text());
    assertEquals(3, type[0]);

    tq = (TermQuery) qp.parse("*:*");
    assertEquals("*", tq.getTerm().field());
    assertEquals("*", tq.getTerm().text());
    assertEquals(1, type[0]); // could be handled as a prefix query in the future

    tq = (TermQuery) qp.parse("(*:*)");
    assertEquals("*", tq.getTerm().field());
    assertEquals("*", tq.getTerm().text());
    assertEquals(1, type[0]);
  }