예제 #1
0
 public void testFQ4() throws Exception {
   TermQuery termQuery = new TermQuery(new Term(FIELD, "xx"));
   termQuery.setBoost(1000);
   qtest(
       new FilteredQuery(termQuery, new QueryWrapperFilter(matchTheseItems(new int[] {1, 3}))),
       new int[] {3});
 }
 public void testToQueryTermQuery() throws IOException {
   assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
   Query query =
       queryStringQuery("test").defaultField(STRING_FIELD_NAME).toQuery(createShardContext());
   assertThat(query, instanceOf(TermQuery.class));
   TermQuery termQuery = (TermQuery) query;
   assertThat(termQuery.getTerm(), equalTo(new Term(STRING_FIELD_NAME, "test")));
 }
  public void testBQ11() throws Exception {
    BooleanQuery query = new BooleanQuery();
    query.add(new TermQuery(new Term(FIELD, "w1")), BooleanClause.Occur.SHOULD);
    TermQuery boostedQuery = new TermQuery(new Term(FIELD, "w1"));
    boostedQuery.setBoost(1000);
    query.add(boostedQuery, BooleanClause.Occur.SHOULD);

    qtest(query, new int[] {0, 1, 2, 3});
  }
 /**
  * combine weights according to expansion formula
  *
  * @param queryTerms
  * @param relevantDocsTerms
  * @param irrelevantDocsTerms
  * @return
  */
 public List<TermQuery> combine(
     Map<String, TermQuery> queryTerms,
     Map<String, TermQuery> relevantDocsTerms,
     Map<String, TermQuery> irrelevantDocsTerms) {
   // Add Terms of the relevant documents
   for (Map.Entry<String, TermQuery> e : queryTerms.entrySet()) {
     if (relevantDocsTerms.containsKey(e.getKey())) {
       TermQuery tq = relevantDocsTerms.get(e.getKey());
       tq.setBoost(tq.getBoost() + e.getValue().getBoost());
     } else {
       relevantDocsTerms.put(e.getKey(), e.getValue());
     }
   }
   // Substract terms of irrelevant documents
   for (Map.Entry<String, TermQuery> e : irrelevantDocsTerms.entrySet()) {
     if (relevantDocsTerms.containsKey(e.getKey())) {
       TermQuery tq = relevantDocsTerms.get(e.getKey());
       tq.setBoost(tq.getBoost() - e.getValue().getBoost());
     } else {
       TermQuery tq = e.getValue();
       tq.setBoost(-tq.getBoost());
       relevantDocsTerms.put(e.getKey(), tq);
     }
   }
   return new ArrayList<>(relevantDocsTerms.values());
 }
  /**
   * Adjust termClaimsDescriptionAbstractTitle features of the docs with alpha * query; and beta;
   * and assign weights/boost to termClaimsDescriptionAbstractTitles (tf*idf).
   *
   * @param query
   * @param currentField
   * @param alpha
   * @param beta - factor of the equation
   * @param gamma
   * @param decay
   * @param maxExpandedQueryTerms - maximum number of termClaimsDescriptionAbstractTitles in
   *     expanded query
   * @return expandedQuery with boost factors adjusted using Rocchio's algorithm
   * @throws IOException
   * @throws ParseException
   */
  public Query adjust(
      Query query,
      String currentField,
      float alpha,
      float beta,
      float gamma,
      float decay,
      int maxExpandedQueryTerms)
      throws IOException, ParseException {
    Query expandedQuery;
    // setBoost of docs terms
    Map<String, TermQuery> relevantDocsTerms =
        setBoost(docsTermVectorReldocs, currentField, beta, decay);
    Map<String, TermQuery> irrrelevantDocsTerms =
        setBoost(docsTermVectorIrreldocs, currentField, gamma, decay);
    //        Map<String, TermQuery> relevantDocsTerms = new HashMap<>();
    //        Map<String, TermQuery> irrrelevantDocsTerms = new HashMap<>();
    // setBoost of query terms
    // Get queryTerms from the query

    // combine weights according to expansion formula
    List<TermQuery> expandedQueryTerms =
        combine(new HashMap<String, TermQuery>(), relevantDocsTerms, irrrelevantDocsTerms);
    // Sort by boost=weight
    Comparator comparator = new QueryBoostComparator();
    Collections.sort(expandedQueryTerms, comparator);
    relevantDocsTerms.clear();
    int termCount = Math.min(expandedQueryTerms.size(), maxExpandedQueryTerms);
    for (int i = 0; i < termCount; i++) {
      TermQuery tq = expandedQueryTerms.get(i);
      relevantDocsTerms.put(tq.getTerm().text(), tq);
      System.out.print(tq.getTerm().text() + ", ");
    }
    TermFreqVector queryTermsVector = new TermFreqVector(query);
    Map<String, TermQuery> queryTerms;

    queryTerms = setBoost(queryTermsVector, currentField, alpha);

    //        List<TermQuery> queryTermsList=new ArrayList(queryTerms.values());
    //        Collections.sort(queryTermsList, comparator);
    //        queryTerms.clear();
    //        for(TermQuery tq:queryTermsList){
    //            queryTerms.put(tq.getTerm().text(), tq);
    //        }
    expandedQueryTerms = combine(queryTerms, relevantDocsTerms, new HashMap<String, TermQuery>());
    Collections.sort(expandedQueryTerms, comparator);
    // Create Expanded Query
    expandedQuery = mergeQueries(expandedQueryTerms, Integer.MAX_VALUE);

    return expandedQuery;
  }
 public Map<String, Float> getRocchioVector(String currentField) throws IOException {
   Map<String, Float> out = new HashMap<>();
   float beta = parameters.get(RocchioQueryExpansion.ROCCHIO_BETA_FLD);
   float gamma = parameters.get(RocchioQueryExpansion.ROCCHIO_GAMMA_FLD);
   float decay = parameters.get(RocchioQueryExpansion.DECAY_FLD);
   Map<String, TermQuery> relevantDocsTerms =
       setBoost(docsTermVectorReldocs, currentField, beta, decay);
   Map<String, TermQuery> irrrelevantDocsTerms =
       setBoost(docsTermVectorIrreldocs, currentField, gamma, decay);
   List<TermQuery> expandedQueryTerms =
       combine(
           new HashMap<String, TermQuery>(), relevantDocsTerms, new HashMap<String, TermQuery>());
   for (TermQuery tq : expandedQueryTerms) {
     out.put(tq.getTerm().text(), tq.getBoost());
   }
   return out;
 }
  public void testDMQ9() throws Exception {
    DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.5f);

    BooleanQuery booleanQuery = new BooleanQuery();
    booleanQuery.add(new TermQuery(new Term(FIELD, "yy")), BooleanClause.Occur.SHOULD);

    TermQuery boostedQuery = new TermQuery(new Term(FIELD, "w5"));
    boostedQuery.setBoost(100);
    booleanQuery.add(boostedQuery, BooleanClause.Occur.SHOULD);
    q.add(booleanQuery);

    TermQuery xxBoostedQuery = new TermQuery(new Term(FIELD, "xx"));
    xxBoostedQuery.setBoost(0);
    q.add(xxBoostedQuery);

    qtest(q, new int[] {0, 2, 3});
  }
  /**
   * Sets boost of termClaimsDescriptionAbstractTitles. boost = weight = factor(tf*idf)
   *
   * @param vecsTerms
   * @param currentField
   * @param factor - adjustment factor ( ex. alpha or beta )
   * @param decayFactor
   * @return
   * @throws java.io.IOException
   */
  public Map<String, TermQuery> setBoost(
      Map<TermFreqVector, String> vecsTerms, String currentField, float factor, float decayFactor)
      throws IOException {
    Map<String, TermQuery> terms = new HashMap<>();
    // setBoost for each of the terms of each of the docs
    int i = 0;
    float norm = (float) 1 / vecsTerms.size();
    //        System.out.println("--------------------------");
    for (Map.Entry<TermFreqVector, String> e : vecsTerms.entrySet()) {
      // Increase decay
      String field = e.getValue();
      TermFreqVector docTerms = e.getKey();
      float decay = decayFactor * i;
      // Populate terms: with TermQuries and set boost
      for (String termTxt : docTerms.getTerms()) {
        // Create Term
        Term term = new Term(currentField, termTxt);
        // Calculate weight
        float tf = docTerms.getFreq(termTxt);
        //                float idf = ir.docFreq(termTitle);
        int docs;
        float idf;
        if (sourceField.equals(PatentQuery.all)) {
          docs = ir.getDocCount(field);
          idf = (float) Math.log10((double) docs / (ir.docFreq(new Term(field, termTxt)) + 1));
        } else {
          docs = ir.getDocCount(sourceField);
          idf =
              (float) Math.log10((double) docs / (ir.docFreq(new Term(sourceField, termTxt)) + 1));
        }
        float weight = tf * idf;

        //                System.out.println(term.text() + " -> tf= " + tf + " idf= " + idf + "
        // tfidf= " + weight);
        // Adjust weight by decay factor
        weight = weight - (weight * decay);
        // Create TermQuery and add it to the collection
        TermQuery termQuery = new TermQuery(term);
        // Calculate and set boost
        float boost;
        if (vecsTerms.size() == 1) {
          boost = factor * tf;
        } else {
          boost = factor;
        }

        if (boost != 0) {
          termQuery.setBoost(boost * norm);
          if (terms.containsKey(termTxt)) {
            TermQuery tq = terms.get(termTxt);
            tq.setBoost(tq.getBoost() + termQuery.getBoost());
          } else {
            terms.put(termTxt, termQuery);
          }
        }
      }
      i++;
    }
    return terms;
  }
예제 #9
0
  /** Add to an existing boolean query the More Like This query from this PriorityQueue */
  private void addToQuery(PriorityQueue<ScoreTerm> q, BooleanQuery query) {
    ScoreTerm scoreTerm;
    float bestScore = -1;

    while ((scoreTerm = q.pop()) != null) {
      TermQuery tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word));

      if (boost) {
        if (bestScore == -1) {
          bestScore = (scoreTerm.score);
        }
        float myScore = (scoreTerm.score);
        tq.setBoost(boostFactor * myScore / bestScore);
      }

      try {
        query.add(tq, BooleanClause.Occur.SHOULD);
      } catch (BooleanQuery.TooManyClauses ignore) {
        break;
      }
    }
  }
 static void assertLateParsingQuery(Query query, String type, String id) throws IOException {
   assertThat(query, instanceOf(HasChildQueryBuilder.LateParsingQuery.class));
   HasChildQueryBuilder.LateParsingQuery lateParsingQuery =
       (HasChildQueryBuilder.LateParsingQuery) query;
   assertThat(lateParsingQuery.getInnerQuery(), instanceOf(BooleanQuery.class));
   BooleanQuery booleanQuery = (BooleanQuery) lateParsingQuery.getInnerQuery();
   assertThat(booleanQuery.clauses().size(), equalTo(2));
   // check the inner ids query, we have to call rewrite to get to check the type it's executed
   // against
   assertThat(booleanQuery.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.MUST));
   assertThat(booleanQuery.clauses().get(0).getQuery(), instanceOf(TermsQuery.class));
   TermsQuery termsQuery = (TermsQuery) booleanQuery.clauses().get(0).getQuery();
   Query rewrittenTermsQuery = termsQuery.rewrite(null);
   assertThat(rewrittenTermsQuery, instanceOf(ConstantScoreQuery.class));
   ConstantScoreQuery constantScoreQuery = (ConstantScoreQuery) rewrittenTermsQuery;
   assertThat(constantScoreQuery.getQuery(), instanceOf(BooleanQuery.class));
   BooleanQuery booleanTermsQuery = (BooleanQuery) constantScoreQuery.getQuery();
   assertThat(booleanTermsQuery.clauses().size(), equalTo(1));
   assertThat(booleanTermsQuery.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.SHOULD));
   assertThat(booleanTermsQuery.clauses().get(0).getQuery(), instanceOf(TermQuery.class));
   TermQuery termQuery = (TermQuery) booleanTermsQuery.clauses().get(0).getQuery();
   assertThat(termQuery.getTerm().field(), equalTo(UidFieldMapper.NAME));
   // we want to make sure that the inner ids query gets executed against the child type rather
   // than the main type we initially set to the context
   BytesRef[] ids =
       Uid.createUidsForTypesAndIds(
           Collections.singletonList(type), Collections.singletonList(id));
   assertThat(termQuery.getTerm().bytes(), equalTo(ids[0]));
   // check the type filter
   assertThat(booleanQuery.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.FILTER));
   assertThat(booleanQuery.clauses().get(1).getQuery(), instanceOf(ConstantScoreQuery.class));
   ConstantScoreQuery typeConstantScoreQuery =
       (ConstantScoreQuery) booleanQuery.clauses().get(1).getQuery();
   assertThat(typeConstantScoreQuery.getQuery(), instanceOf(TermQuery.class));
   TermQuery typeTermQuery = (TermQuery) typeConstantScoreQuery.getQuery();
   assertThat(typeTermQuery.getTerm().field(), equalTo(TypeFieldMapper.NAME));
   assertThat(typeTermQuery.getTerm().text(), equalTo(type));
 }
 public void testT2() throws Exception {
   TermQuery termQuery = new TermQuery(new Term(FIELD, "w1"));
   termQuery.setBoost(100);
   qtest(termQuery, new int[] {0, 1, 2, 3});
 }
 public void testFQ4() throws Exception {
   TermQuery termQuery = new TermQuery(new Term(FIELD, "xx"));
   termQuery.setBoost(1000);
   qtest(new FilteredQuery(termQuery, new ItemizedFilter(new int[] {1, 3})), new int[] {3});
 }