예제 #1
0
  private final void populateTerm(
      final HQuery hQuery, final boolean isShould, final boolean isMust, final Query subQueryL)
      throws FederatedSearchException {

    HTerm hTerm = new HTerm();
    hTerm.isShould = isShould;
    hTerm.isMust = isMust;
    hTerm.boost = subQueryL.getBoost();
    hQuery.terms.add(hTerm);

    if (subQueryL instanceof TermQuery) {
      TermQuery lTerm = (TermQuery) subQueryL;
      hTerm.type = lTerm.getTerm().field();
      hTerm.text = lTerm.getTerm().text();

    } else if (subQueryL instanceof FuzzyQuery) {
      FuzzyQuery lTerm = (FuzzyQuery) subQueryL;
      hTerm.isFuzzy = true;
      hTerm.type = lTerm.getTerm().field();
      hTerm.text = lTerm.getTerm().text();

    } else if (subQueryL instanceof TermRangeQuery) {
      TermRangeQuery lTerm = (TermRangeQuery) subQueryL;
      hTerm.isFuzzy = false;
      hTerm.type = lTerm.getField();
      hTerm.minRange = lTerm.getLowerTerm();
      hTerm.maxRange = lTerm.getUpperTerm();

    } else {
      throw new FederatedSearchException(
          "HQueryParser: Not Implemented Query :" + subQueryL.getClass().toString());
    }
  }
  public Map<String, Double> search(String text) {
    Map<String, Double> similar = new HashMap<String, Double>();
    try {
      TokenStream tokenStream = analyzer.tokenStream("text", text);
      CharTermAttribute charTermAtt = tokenStream.addAttribute(CharTermAttribute.class);
      tokenStream.reset();
      BooleanQuery bQuery = new BooleanQuery();
      while (tokenStream.incrementToken()) {
        String token = charTermAtt.toString();
        TermQuery tq = new TermQuery(new Term("text", token));
        tq.setBoost(2f);

        bQuery.add(tq, Occur.MUST);
      }
      tokenStream.close();

      TopDocs results = searcher.search(bQuery, 100000);
      ScoreDoc[] hits = results.scoreDocs;
      for (ScoreDoc hit : hits) {
        Document doc = searcher.doc(hit.doc);
        similar.put(doc.get("id"), new Double(hit.score));
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
    return similar;
  }
  public List<Document> searchDocuments(String text) {
    List<Document> documents = new ArrayList<Document>();
    try {
      TokenStream tokenStream = analyzer.tokenStream("text", text);
      CharTermAttribute charTermAtt = tokenStream.addAttribute(CharTermAttribute.class);
      tokenStream.reset();
      BooleanQuery bQuery = new BooleanQuery();
      while (tokenStream.incrementToken()) {
        String token = charTermAtt.toString();
        TermQuery tq = new TermQuery(new Term("text", token));
        tq.setBoost(2f);

        bQuery.add(tq, Occur.MUST);
      }
      tokenStream.close();

      TopDocs results = searcher.search(bQuery, 100000);
      ScoreDoc[] hits = results.scoreDocs;
      for (ScoreDoc hit : hits) {
        Document doc = searcher.doc(hit.doc);
        doc.add(new FloatField("score", hit.score, FloatField.TYPE_STORED));
        documents.add(doc);
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
    return documents;
  }
 public void testToQueryTermQuery() throws IOException {
   assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
   Query query =
       queryStringQuery("test").defaultField(STRING_FIELD_NAME).toQuery(createShardContext());
   assertThat(query, instanceOf(TermQuery.class));
   TermQuery termQuery = (TermQuery) query;
   assertThat(termQuery.getTerm(), equalTo(new Term(STRING_FIELD_NAME, "test")));
 }
 /**
  * combine weights according to expansion formula
  *
  * @param queryTerms
  * @param relevantDocsTerms
  * @param irrelevantDocsTerms
  * @return
  */
 public List<TermQuery> combine(
     Map<String, TermQuery> queryTerms,
     Map<String, TermQuery> relevantDocsTerms,
     Map<String, TermQuery> irrelevantDocsTerms) {
   // Add Terms of the relevant documents
   for (Map.Entry<String, TermQuery> e : queryTerms.entrySet()) {
     if (relevantDocsTerms.containsKey(e.getKey())) {
       TermQuery tq = relevantDocsTerms.get(e.getKey());
       tq.setBoost(tq.getBoost() + e.getValue().getBoost());
     } else {
       relevantDocsTerms.put(e.getKey(), e.getValue());
     }
   }
   // Substract terms of irrelevant documents
   for (Map.Entry<String, TermQuery> e : irrelevantDocsTerms.entrySet()) {
     if (relevantDocsTerms.containsKey(e.getKey())) {
       TermQuery tq = relevantDocsTerms.get(e.getKey());
       tq.setBoost(tq.getBoost() - e.getValue().getBoost());
     } else {
       TermQuery tq = e.getValue();
       tq.setBoost(-tq.getBoost());
       relevantDocsTerms.put(e.getKey(), tq);
     }
   }
   return new ArrayList<>(relevantDocsTerms.values());
 }
  /**
   * Construct a span term query
   *
   * @param scope
   * @param q
   * @return
   */
  private SpanQuery buildTermQuery(String scope, Query q) {

    TermQuery tq = (TermQuery) q;
    ArrayList<SpanQuery> spans = new ArrayList<SpanQuery>();
    spans.add(
        new SpanTermQuery(new Term(scope, getTag(scope, tq.getTerm().field(), TAG_TYPE.BEG_TAG))));
    spans.add(
        new SpanTermQuery(new Term(scope, getTag(scope, tq.getTerm().field(), TAG_TYPE.END_TAG))));
    spans.add(new SpanTermQuery(new Term(scope, tq.getTerm().text())));
    return new SpanBetweenQuery(spans.toArray(new SpanQuery[spans.size()]));
  }
  /**
   * Adjust termClaimsDescriptionAbstractTitle features of the docs with alpha * query; and beta;
   * and assign weights/boost to termClaimsDescriptionAbstractTitles (tf*idf).
   *
   * @param query
   * @param currentField
   * @param alpha
   * @param beta - factor of the equation
   * @param gamma
   * @param decay
   * @param maxExpandedQueryTerms - maximum number of termClaimsDescriptionAbstractTitles in
   *     expanded query
   * @return expandedQuery with boost factors adjusted using Rocchio's algorithm
   * @throws IOException
   * @throws ParseException
   */
  public Query adjust(
      Query query,
      String currentField,
      float alpha,
      float beta,
      float gamma,
      float decay,
      int maxExpandedQueryTerms)
      throws IOException, ParseException {
    Query expandedQuery;
    // setBoost of docs terms
    Map<String, TermQuery> relevantDocsTerms =
        setBoost(docsTermVectorReldocs, currentField, beta, decay);
    Map<String, TermQuery> irrrelevantDocsTerms =
        setBoost(docsTermVectorIrreldocs, currentField, gamma, decay);
    //        Map<String, TermQuery> relevantDocsTerms = new HashMap<>();
    //        Map<String, TermQuery> irrrelevantDocsTerms = new HashMap<>();
    // setBoost of query terms
    // Get queryTerms from the query

    // combine weights according to expansion formula
    List<TermQuery> expandedQueryTerms =
        combine(new HashMap<String, TermQuery>(), relevantDocsTerms, irrrelevantDocsTerms);
    // Sort by boost=weight
    Comparator comparator = new QueryBoostComparator();
    Collections.sort(expandedQueryTerms, comparator);
    relevantDocsTerms.clear();
    int termCount = Math.min(expandedQueryTerms.size(), maxExpandedQueryTerms);
    for (int i = 0; i < termCount; i++) {
      TermQuery tq = expandedQueryTerms.get(i);
      relevantDocsTerms.put(tq.getTerm().text(), tq);
      System.out.print(tq.getTerm().text() + ", ");
    }
    TermFreqVector queryTermsVector = new TermFreqVector(query);
    Map<String, TermQuery> queryTerms;

    queryTerms = setBoost(queryTermsVector, currentField, alpha);

    //        List<TermQuery> queryTermsList=new ArrayList(queryTerms.values());
    //        Collections.sort(queryTermsList, comparator);
    //        queryTerms.clear();
    //        for(TermQuery tq:queryTermsList){
    //            queryTerms.put(tq.getTerm().text(), tq);
    //        }
    expandedQueryTerms = combine(queryTerms, relevantDocsTerms, new HashMap<String, TermQuery>());
    Collections.sort(expandedQueryTerms, comparator);
    // Create Expanded Query
    expandedQuery = mergeQueries(expandedQueryTerms, Integer.MAX_VALUE);

    return expandedQuery;
  }
예제 #8
0
  private static JsonObject convertTermQuery(TermQuery termQuery) {
    String field = termQuery.getTerm().field();

    JsonObject matchQuery =
        JsonBuilder.object()
            .add(
                "term",
                JsonBuilder.object()
                    .add(
                        field,
                        JsonBuilder.object()
                            .addProperty("value", termQuery.getTerm().text())
                            .addProperty("boost", termQuery.getBoost())))
            .build();

    return wrapQueryForNestedIfRequired(field, matchQuery);
  }
 public Map<String, Float> getRocchioVector(String currentField) throws IOException {
   Map<String, Float> out = new HashMap<>();
   float beta = parameters.get(RocchioQueryExpansion.ROCCHIO_BETA_FLD);
   float gamma = parameters.get(RocchioQueryExpansion.ROCCHIO_GAMMA_FLD);
   float decay = parameters.get(RocchioQueryExpansion.DECAY_FLD);
   Map<String, TermQuery> relevantDocsTerms =
       setBoost(docsTermVectorReldocs, currentField, beta, decay);
   Map<String, TermQuery> irrrelevantDocsTerms =
       setBoost(docsTermVectorIrreldocs, currentField, gamma, decay);
   List<TermQuery> expandedQueryTerms =
       combine(
           new HashMap<String, TermQuery>(), relevantDocsTerms, new HashMap<String, TermQuery>());
   for (TermQuery tq : expandedQueryTerms) {
     out.put(tq.getTerm().text(), tq.getBoost());
   }
   return out;
 }
예제 #10
0
  public void testBoostFactor() throws Throwable {
    Map<String, Float> originalValues = getOriginalValues();

    MoreLikeThis mlt = new MoreLikeThis(reader);
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    mlt.setAnalyzer(analyzer);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] {"text"});
    mlt.setBoost(true);

    // this mean that every term boost factor will be multiplied by this
    // number
    float boostFactor = 5;
    mlt.setBoostFactor(boostFactor);

    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene release"));
    Collection<BooleanClause> clauses = query.clauses();

    assertEquals(
        "Expected " + originalValues.size() + " clauses.", originalValues.size(), clauses.size());

    for (BooleanClause clause : clauses) {
      BoostQuery bq = (BoostQuery) clause.getQuery();
      TermQuery tq = (TermQuery) bq.getQuery();
      Float termBoost = originalValues.get(tq.getTerm().text());
      assertNotNull("Expected term " + tq.getTerm().text(), termBoost);

      float totalBoost = termBoost * boostFactor;
      assertEquals(
          "Expected boost of "
              + totalBoost
              + " for term '"
              + tq.getTerm().text()
              + "' got "
              + bq.getBoost(),
          totalBoost,
          bq.getBoost(),
          0.0001);
    }
    analyzer.close();
  }
예제 #11
0
  private Query computeQuery(String patternString) {
    String upperPatternString = patternString.toUpperCase();

    boolean hasBooleanSpecifiers =
        upperPatternString.contains(" OR ")
            || upperPatternString.contains(" AND ") // $NON-NLS-1$ //$NON-NLS-2$
            || upperPatternString.contains(" NOT "); // $NON-NLS-1$

    if (!hasBooleanSpecifiers
        && defaultField.equals(FIELD_SUMMARY)
        && !containsSpecialCharacters(patternString)) {
      return new PrefixQuery(new Term(defaultField.getIndexKey(), patternString));
    }
    QueryParser qp =
        new QueryParser(
            Version.LUCENE_CURRENT, defaultField.getIndexKey(), TaskAnalyzer.instance());
    Query q;
    try {
      q = qp.parse(patternString);
    } catch (ParseException e) {
      return new PrefixQuery(new Term(defaultField.getIndexKey(), patternString));
    }

    // relax term clauses to be prefix clauses so that we get results close
    // to what we're expecting
    // from previous task list search
    if (q instanceof BooleanQuery) {
      BooleanQuery query = (BooleanQuery) q;
      for (BooleanClause clause : query.getClauses()) {
        if (clause.getQuery() instanceof TermQuery) {
          TermQuery termQuery = (TermQuery) clause.getQuery();
          clause.setQuery(new PrefixQuery(termQuery.getTerm()));
        }
        if (!hasBooleanSpecifiers) {
          clause.setOccur(Occur.MUST);
        }
      }
    } else if (q instanceof TermQuery) {
      return new PrefixQuery(((TermQuery) q).getTerm());
    }
    return q;
  }
예제 #12
0
  private Map<String, Float> getOriginalValues() throws IOException {
    Map<String, Float> originalValues = new HashMap<>();
    MoreLikeThis mlt = new MoreLikeThis(reader);
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    mlt.setAnalyzer(analyzer);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] {"text"});
    mlt.setBoost(true);
    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader("lucene release"));
    Collection<BooleanClause> clauses = query.clauses();

    for (BooleanClause clause : clauses) {
      BoostQuery bq = (BoostQuery) clause.getQuery();
      TermQuery tq = (TermQuery) bq.getQuery();
      originalValues.put(tq.getTerm().text(), bq.getBoost());
    }
    analyzer.close();
    return originalValues;
  }
  /**
   * Sets boost of termClaimsDescriptionAbstractTitles. boost = weight = factor(tf*idf)
   *
   * @param vecsTerms
   * @param currentField
   * @param factor - adjustment factor ( ex. alpha or beta )
   * @param decayFactor
   * @return
   * @throws java.io.IOException
   */
  public Map<String, TermQuery> setBoost(
      Map<TermFreqVector, String> vecsTerms, String currentField, float factor, float decayFactor)
      throws IOException {
    Map<String, TermQuery> terms = new HashMap<>();
    // setBoost for each of the terms of each of the docs
    int i = 0;
    float norm = (float) 1 / vecsTerms.size();
    //        System.out.println("--------------------------");
    for (Map.Entry<TermFreqVector, String> e : vecsTerms.entrySet()) {
      // Increase decay
      String field = e.getValue();
      TermFreqVector docTerms = e.getKey();
      float decay = decayFactor * i;
      // Populate terms: with TermQuries and set boost
      for (String termTxt : docTerms.getTerms()) {
        // Create Term
        Term term = new Term(currentField, termTxt);
        // Calculate weight
        float tf = docTerms.getFreq(termTxt);
        //                float idf = ir.docFreq(termTitle);
        int docs;
        float idf;
        if (sourceField.equals(PatentQuery.all)) {
          docs = ir.getDocCount(field);
          idf = (float) Math.log10((double) docs / (ir.docFreq(new Term(field, termTxt)) + 1));
        } else {
          docs = ir.getDocCount(sourceField);
          idf =
              (float) Math.log10((double) docs / (ir.docFreq(new Term(sourceField, termTxt)) + 1));
        }
        float weight = tf * idf;

        //                System.out.println(term.text() + " -> tf= " + tf + " idf= " + idf + "
        // tfidf= " + weight);
        // Adjust weight by decay factor
        weight = weight - (weight * decay);
        // Create TermQuery and add it to the collection
        TermQuery termQuery = new TermQuery(term);
        // Calculate and set boost
        float boost;
        if (vecsTerms.size() == 1) {
          boost = factor * tf;
        } else {
          boost = factor;
        }

        if (boost != 0) {
          termQuery.setBoost(boost * norm);
          if (terms.containsKey(termTxt)) {
            TermQuery tq = terms.get(termTxt);
            tq.setBoost(tq.getBoost() + termQuery.getBoost());
          } else {
            terms.put(termTxt, termQuery);
          }
        }
      }
      i++;
    }
    return terms;
  }
예제 #14
0
  /** Add to an existing boolean query the More Like This query from this PriorityQueue */
  private void addToQuery(PriorityQueue<ScoreTerm> q, BooleanQuery query) {
    ScoreTerm scoreTerm;
    float bestScore = -1;

    while ((scoreTerm = q.pop()) != null) {
      TermQuery tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word));

      if (boost) {
        if (bestScore == -1) {
          bestScore = (scoreTerm.score);
        }
        float myScore = (scoreTerm.score);
        tq.setBoost(boostFactor * myScore / bestScore);
      }

      try {
        query.add(tq, BooleanClause.Occur.SHOULD);
      } catch (BooleanQuery.TooManyClauses ignore) {
        break;
      }
    }
  }
 static void assertLateParsingQuery(Query query, String type, String id) throws IOException {
   assertThat(query, instanceOf(HasChildQueryBuilder.LateParsingQuery.class));
   HasChildQueryBuilder.LateParsingQuery lateParsingQuery =
       (HasChildQueryBuilder.LateParsingQuery) query;
   assertThat(lateParsingQuery.getInnerQuery(), instanceOf(BooleanQuery.class));
   BooleanQuery booleanQuery = (BooleanQuery) lateParsingQuery.getInnerQuery();
   assertThat(booleanQuery.clauses().size(), equalTo(2));
   // check the inner ids query, we have to call rewrite to get to check the type it's executed
   // against
   assertThat(booleanQuery.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.MUST));
   assertThat(booleanQuery.clauses().get(0).getQuery(), instanceOf(TermsQuery.class));
   TermsQuery termsQuery = (TermsQuery) booleanQuery.clauses().get(0).getQuery();
   Query rewrittenTermsQuery = termsQuery.rewrite(null);
   assertThat(rewrittenTermsQuery, instanceOf(ConstantScoreQuery.class));
   ConstantScoreQuery constantScoreQuery = (ConstantScoreQuery) rewrittenTermsQuery;
   assertThat(constantScoreQuery.getQuery(), instanceOf(BooleanQuery.class));
   BooleanQuery booleanTermsQuery = (BooleanQuery) constantScoreQuery.getQuery();
   assertThat(booleanTermsQuery.clauses().size(), equalTo(1));
   assertThat(booleanTermsQuery.clauses().get(0).getOccur(), equalTo(BooleanClause.Occur.SHOULD));
   assertThat(booleanTermsQuery.clauses().get(0).getQuery(), instanceOf(TermQuery.class));
   TermQuery termQuery = (TermQuery) booleanTermsQuery.clauses().get(0).getQuery();
   assertThat(termQuery.getTerm().field(), equalTo(UidFieldMapper.NAME));
   // we want to make sure that the inner ids query gets executed against the child type rather
   // than the main type we initially set to the context
   BytesRef[] ids =
       Uid.createUidsForTypesAndIds(
           Collections.singletonList(type), Collections.singletonList(id));
   assertThat(termQuery.getTerm().bytes(), equalTo(ids[0]));
   // check the type filter
   assertThat(booleanQuery.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.FILTER));
   assertThat(booleanQuery.clauses().get(1).getQuery(), instanceOf(ConstantScoreQuery.class));
   ConstantScoreQuery typeConstantScoreQuery =
       (ConstantScoreQuery) booleanQuery.clauses().get(1).getQuery();
   assertThat(typeConstantScoreQuery.getQuery(), instanceOf(TermQuery.class));
   TermQuery typeTermQuery = (TermQuery) typeConstantScoreQuery.getQuery();
   assertThat(typeTermQuery.getTerm().field(), equalTo(TypeFieldMapper.NAME));
   assertThat(typeTermQuery.getTerm().text(), equalTo(type));
 }
  @Override
  public void testStarParsing() throws Exception {
    final int[] type = new int[1];
    QueryParser qp =
        new QueryParser("field", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
          @Override
          protected Query getWildcardQuery(String field, String termStr) {
            // override error checking of superclass
            type[0] = 1;
            return new TermQuery(new Term(field, termStr));
          }

          @Override
          protected Query getPrefixQuery(String field, String termStr) {
            // override error checking of superclass
            type[0] = 2;
            return new TermQuery(new Term(field, termStr));
          }

          @Override
          protected Query getFieldQuery(String field, String queryText, boolean quoted)
              throws ParseException {
            type[0] = 3;
            return super.getFieldQuery(field, queryText, quoted);
          }
        };

    TermQuery tq;

    tq = (TermQuery) qp.parse("foo:zoo*");
    assertEquals("zoo", tq.getTerm().text());
    assertEquals(2, type[0]);

    BoostQuery bq = (BoostQuery) qp.parse("foo:zoo*^2");
    tq = (TermQuery) bq.getQuery();
    assertEquals("zoo", tq.getTerm().text());
    assertEquals(2, type[0]);
    assertEquals(bq.getBoost(), 2, 0);

    tq = (TermQuery) qp.parse("foo:*");
    assertEquals("*", tq.getTerm().text());
    assertEquals(1, type[0]); // could be a valid prefix query in the future too

    bq = (BoostQuery) qp.parse("foo:*^2");
    tq = (TermQuery) bq.getQuery();
    assertEquals("*", tq.getTerm().text());
    assertEquals(1, type[0]);
    assertEquals(bq.getBoost(), 2, 0);

    tq = (TermQuery) qp.parse("*:foo");
    assertEquals("*", tq.getTerm().field());
    assertEquals("foo", tq.getTerm().text());
    assertEquals(3, type[0]);

    tq = (TermQuery) qp.parse("*:*");
    assertEquals("*", tq.getTerm().field());
    assertEquals("*", tq.getTerm().text());
    assertEquals(1, type[0]); // could be handled as a prefix query in the
    // future

    tq = (TermQuery) qp.parse("(*:*)");
    assertEquals("*", tq.getTerm().field());
    assertEquals("*", tq.getTerm().text());
    assertEquals(1, type[0]);
  }
예제 #17
0
 private static void planTermQuery(final StringBuilder builder, final TermQuery query) {
   builder.append(query.getTerm());
 }
  /**
   * Construct the query (using spans). This method will be called recursively.
   *
   * @param q Query
   * @param scope Author, affiliation, or Reference
   * @param level Used only for formatting (indentation) the level of recursion
   * @param andSpans ArrayList of Spans that should be 'and'
   * @param orSpans ArrayList of Spans that should be 'or'
   * @param notSpans ArrayList of Spans that should be 'not'
   * @return SpanQuery
   */
  private SpanQuery buildQuery_recursive(
      Query q,
      String scope,
      int level,
      ArrayList<SpanQuery> andSpans,
      ArrayList<SpanQuery> orSpans,
      ArrayList<SpanQuery> notSpans) {

    BooleanQuery castQuery = (BooleanQuery) q;
    String subscope = null;

    for (BooleanClause clause : castQuery.getClauses()) {

      Class queryclazz = clause.getQuery().getClass();

      System.out.println(
          repeat(' ', level)
              + "["
              + queryclazz
              + "]["
              + clause.getOccur()
              + "] "
              + clause.toString());

      if (queryclazz == BooleanQuery.class) {

        System.out.println("Number of Clauses is " + castQuery.clauses().size());
        System.out.println("Minimum Number to Match is " + castQuery.getMinimumNumberShouldMatch());

        if (subscope == null) {

          if (clause.getOccur() == BooleanClause.Occur.MUST) {
            andSpans.add(
                buildQuery_recursive(
                    clause.getQuery(),
                    scope,
                    level + 1,
                    new ArrayList<SpanQuery>(),
                    new ArrayList<SpanQuery>(),
                    new ArrayList<SpanQuery>()));
          } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
            orSpans.add(
                buildQuery_recursive(
                    clause.getQuery(),
                    scope,
                    level + 1,
                    new ArrayList<SpanQuery>(),
                    new ArrayList<SpanQuery>(),
                    new ArrayList<SpanQuery>()));
          } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) {
            // FIX
            notSpans.add(
                buildQuery_recursive(
                    clause.getQuery(),
                    scope,
                    level + 1,
                    new ArrayList<SpanQuery>(),
                    new ArrayList<SpanQuery>(),
                    new ArrayList<SpanQuery>()));
          }

        } else {

          ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>();
          subscopeQuery.add(
              new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG))));
          subscopeQuery.add(
              new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG))));
          subscopeQuery.add(
              buildQuery_recursive(
                  clause.getQuery(),
                  scope,
                  level + 1,
                  new ArrayList<SpanQuery>(),
                  new ArrayList<SpanQuery>(),
                  new ArrayList<SpanQuery>()));
          if (clause.getOccur() == BooleanClause.Occur.MUST) {
            andSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
            orSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) {
            // FIX
            notSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          }
        }

      } else if (queryclazz == TermQuery.class) {

        TermQuery tq = (TermQuery) clause.getQuery();

        if (tq.getTerm().field().compareTo(SUBSCOPE_FIELD) == 0) {

          // Set the subscope
          subscope = tq.getTerm().text();

          // Need to add a term here (otherwise we have problems)
          WildcardQuery wildcard = new WildcardQuery(new Term(scope, "*"));
          if (clause.getOccur() == BooleanClause.Occur.MUST) {
            andSpans.add(new SpanMultiTermQueryWrapper<WildcardQuery>(wildcard));
          } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
            orSpans.add(new SpanMultiTermQueryWrapper<WildcardQuery>(wildcard));
          } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) {
            notSpans.add(new SpanMultiTermQueryWrapper<WildcardQuery>(wildcard));
          }

        } else if (subscope == null) {

          if (clause.getOccur() == BooleanClause.Occur.MUST) {
            andSpans.add(buildTermQuery(scope, tq));
          } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
            orSpans.add(buildTermQuery(scope, tq));
          } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) {
            notSpans.add(buildTermQuery(scope, tq));
          }

        } else {

          ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>();
          subscopeQuery.add(
              new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG))));
          subscopeQuery.add(
              new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG))));
          subscopeQuery.add(buildTermQuery(scope, tq));
          if (clause.getOccur() == BooleanClause.Occur.MUST) {
            andSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
            orSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) {
            // FIX
            notSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          }
        }

      } else if (queryclazz == WildcardQuery.class) {

        if (subscope == null) {

          if (clause.getOccur() == BooleanClause.Occur.MUST) {
            andSpans.add(buildWildcardQuery(scope, clause.getQuery()));
          } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
            orSpans.add(buildWildcardQuery(scope, clause.getQuery()));
          } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) {
            // FIX
            notSpans.add(buildWildcardQuery(scope, clause.getQuery()));
          }

        } else {

          ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>();
          subscopeQuery.add(
              new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG))));
          subscopeQuery.add(
              new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG))));
          subscopeQuery.add(buildWildcardQuery(scope, clause.getQuery()));
          if (clause.getOccur() == BooleanClause.Occur.MUST) {
            andSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
            orSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) {
            // FIX
            notSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          }
        }

      } else if (queryclazz == PrefixQuery.class) {

        if (subscope == null) {

          if (clause.getOccur() == BooleanClause.Occur.MUST) {
            andSpans.add(buildPrefixQuery(scope, clause.getQuery()));
          } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
            orSpans.add(buildPrefixQuery(scope, clause.getQuery()));
          } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) {
            // FIX
            notSpans.add(buildPrefixQuery(scope, clause.getQuery()));
          }

        } else {

          ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>();
          subscopeQuery.add(
              new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG))));
          subscopeQuery.add(
              new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG))));
          subscopeQuery.add(buildPrefixQuery(scope, clause.getQuery()));
          if (clause.getOccur() == BooleanClause.Occur.MUST) {
            andSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
            orSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) {
            // FIX
            notSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          }
        }

      } else if (queryclazz == PhraseQuery.class) {

        if (subscope == null) {

          if (clause.getOccur() == BooleanClause.Occur.MUST) {
            andSpans.add(buildPhraseQuery(scope, clause.getQuery()));
          } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
            orSpans.add(buildPhraseQuery(scope, clause.getQuery()));
          } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) {
            // FIX
            notSpans.add(buildPhraseQuery(scope, clause.getQuery()));
          }

        } else {

          ArrayList<SpanQuery> subscopeQuery = new ArrayList<SpanQuery>();
          subscopeQuery.add(
              new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.BEG_TAG))));
          subscopeQuery.add(
              new SpanTermQuery(new Term(scope, getTag(scope, subscope, TAG_TYPE.END_TAG))));
          subscopeQuery.add(buildPhraseQuery(scope, clause.getQuery()));
          if (clause.getOccur() == BooleanClause.Occur.MUST) {
            andSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
            orSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) {
            // FIX
            notSpans.add(
                new SpanBetweenQuery(subscopeQuery.toArray(new SpanQuery[subscopeQuery.size()])));
          }
        }

      } else {

        System.out.println("[" + q.getClass() + "]");
      }
    }

    ArrayList<SpanQuery> includeSpans = new ArrayList<SpanQuery>();
    ;

    // Add the 'and' queries to the includeSpans (if there were any)
    if (!andSpans.isEmpty()) {
      if (andSpans.size() > 1) {
        includeSpans.add(new SpanAndQuery(andSpans.toArray(new SpanQuery[andSpans.size()])));
      } else {
        includeSpans.add(andSpans.get(0));
      }
    }

    // Add the 'or' queries to the includeSpans (if there were any)
    if (!orSpans.isEmpty()) {
      includeSpans.add(new SpanOrQuery(orSpans.toArray(new SpanQuery[orSpans.size()])));
    }

    // Exclude the 'not' queries from the includeSpans (if there were any)
    if (!notSpans.isEmpty()) {
      if (includeSpans.size() > 1) {
        if (notSpans.size() > 1) {
          return new SpanNotQuery(
              new SpanAndQuery(includeSpans.toArray(new SpanQuery[includeSpans.size()])),
              new SpanAndQuery(notSpans.toArray(new SpanQuery[notSpans.size()])));
        } else {
          return new SpanNotQuery(
              new SpanAndQuery(includeSpans.toArray(new SpanQuery[includeSpans.size()])),
              notSpans.get(0));
        }
      } else {
        if (notSpans.size() > 1) {
          return new SpanNotQuery(
              includeSpans.get(0),
              new SpanAndQuery(notSpans.toArray(new SpanQuery[notSpans.size()])));
        } else {
          return new SpanNotQuery(includeSpans.get(0), notSpans.get(0));
        }
      }
    } else {
      if (includeSpans.size() > 1) {
        return new SpanAndQuery(includeSpans.toArray(new SpanQuery[includeSpans.size()]));
      } else {
        return includeSpans.get(0);
      }
    }
  }
예제 #19
0
  private void _includeIfUnique(
      BooleanQuery booleanQuery,
      boolean like,
      QueryParser queryParser,
      Query query,
      BooleanClause.Occur occur) {

    if (query instanceof TermQuery) {
      Set<Term> terms = new HashSet<Term>();

      TermQuery termQuery = (TermQuery) query;

      termQuery.extractTerms(terms);

      float boost = termQuery.getBoost();

      for (Term term : terms) {
        String termValue = term.text();

        if (like) {
          termValue = termValue.toLowerCase(queryParser.getLocale());

          term = term.createTerm(StringPool.STAR.concat(termValue).concat(StringPool.STAR));

          query = new WildcardQuery(term);
        } else {
          query = new TermQuery(term);
        }

        query.setBoost(boost);

        boolean included = false;

        for (BooleanClause booleanClause : booleanQuery.getClauses()) {
          if (query.equals(booleanClause.getQuery())) {
            included = true;
          }
        }

        if (!included) {
          booleanQuery.add(query, occur);
        }
      }
    } else if (query instanceof BooleanQuery) {
      BooleanQuery curBooleanQuery = (BooleanQuery) query;

      BooleanQuery containerBooleanQuery = new BooleanQuery();

      for (BooleanClause booleanClause : curBooleanQuery.getClauses()) {
        _includeIfUnique(
            containerBooleanQuery,
            like,
            queryParser,
            booleanClause.getQuery(),
            booleanClause.getOccur());
      }

      if (containerBooleanQuery.getClauses().length > 0) {
        booleanQuery.add(containerBooleanQuery, occur);
      }
    } else {
      boolean included = false;

      for (BooleanClause booleanClause : booleanQuery.getClauses()) {
        if (query.equals(booleanClause.getQuery())) {
          included = true;
        }
      }

      if (!included) {
        booleanQuery.add(query, occur);
      }
    }
  }
  @Override
  public void prepare(ResponseBuilder rb) throws IOException {
    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();
    // A runtime param can skip
    if (!params.getBool(QueryElevationParams.ENABLE, true)) {
      return;
    }

    boolean exclusive = params.getBool(QueryElevationParams.EXCLUSIVE, false);
    // A runtime parameter can alter the config value for forceElevation
    boolean force = params.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation);
    boolean markExcludes = params.getBool(QueryElevationParams.MARK_EXCLUDES, false);
    String boostStr = params.get(QueryElevationParams.IDS);
    String exStr = params.get(QueryElevationParams.EXCLUDE);

    Query query = rb.getQuery();
    String qstr = rb.getQueryString();
    if (query == null || qstr == null) {
      return;
    }

    ElevationObj booster = null;
    try {
      if (boostStr != null || exStr != null) {
        List<String> boosts =
            (boostStr != null)
                ? StrUtils.splitSmart(boostStr, ",", true)
                : new ArrayList<String>(0);
        List<String> excludes =
            (exStr != null) ? StrUtils.splitSmart(exStr, ",", true) : new ArrayList<String>(0);
        booster = new ElevationObj(qstr, boosts, excludes);
      } else {
        IndexReader reader = req.getSearcher().getIndexReader();
        qstr = getAnalyzedQuery(qstr);
        booster = getElevationMap(reader, req.getCore()).get(qstr);
      }
    } catch (Exception ex) {
      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error loading elevation", ex);
    }

    if (booster != null) {
      rb.req.getContext().put(BOOSTED, booster.ids);

      // Change the query to insert forced documents
      if (exclusive == true) {
        // we only want these results
        rb.setQuery(booster.include);
      } else {
        BooleanQuery newq = new BooleanQuery(true);
        newq.add(query, BooleanClause.Occur.SHOULD);
        newq.add(booster.include, BooleanClause.Occur.SHOULD);
        if (booster.exclude != null) {
          if (markExcludes == false) {
            for (TermQuery tq : booster.exclude) {
              newq.add(new BooleanClause(tq, BooleanClause.Occur.MUST_NOT));
            }
          } else {
            // we are only going to mark items as excluded, not actually exclude them.  This works
            // with the EditorialMarkerFactory
            rb.req.getContext().put(EXCLUDED, booster.excludeIds);
          }
        }
        rb.setQuery(newq);
      }

      ElevationComparatorSource comparator = new ElevationComparatorSource(booster);
      // if the sort is 'score desc' use a custom sorting method to
      // insert documents in their proper place
      SortSpec sortSpec = rb.getSortSpec();
      if (sortSpec.getSort() == null) {
        sortSpec.setSortAndFields(
            new Sort(
                new SortField[] {
                  new SortField("_elevate_", comparator, true),
                  new SortField(null, SortField.Type.SCORE, false)
                }),
            Arrays.asList(new SchemaField[2]));
      } else {
        // Check if the sort is based on score
        SortSpec modSortSpec = this.modifySortSpec(sortSpec, force, comparator);
        if (null != modSortSpec) {
          rb.setSortSpec(modSortSpec);
        }
      }

      // alter the sorting in the grouping specification if there is one
      GroupingSpecification groupingSpec = rb.getGroupingSpec();
      if (groupingSpec != null) {
        SortField[] groupSort = groupingSpec.getGroupSort().getSort();
        Sort modGroupSort = this.modifySort(groupSort, force, comparator);
        if (modGroupSort != null) {
          groupingSpec.setGroupSort(modGroupSort);
        }
        SortField[] withinGroupSort = groupingSpec.getSortWithinGroup().getSort();
        Sort modWithinGroupSort = this.modifySort(withinGroupSort, force, comparator);
        if (modWithinGroupSort != null) {
          groupingSpec.setSortWithinGroup(modWithinGroupSort);
        }
      }
    }

    // Add debugging information
    if (rb.isDebug()) {
      List<String> match = null;
      if (booster != null) {
        // Extract the elevated terms into a list
        match = new ArrayList<String>(booster.priority.size());
        for (Object o : booster.include.clauses()) {
          TermQuery tq = (TermQuery) ((BooleanClause) o).getQuery();
          match.add(tq.getTerm().text());
        }
      }

      SimpleOrderedMap<Object> dbg = new SimpleOrderedMap<Object>();
      dbg.add("q", qstr);
      dbg.add("match", match);
      if (rb.isDebugQuery()) {
        rb.addDebugInfo("queryBoosting", dbg);
      }
    }
  }
예제 #21
0
  /** @see #toString(Query,IndexSchema) */
  public static void toString(Query query, IndexSchema schema, Appendable out, int flags)
      throws IOException {
    boolean writeBoost = true;

    if (query instanceof TermQuery) {
      TermQuery q = (TermQuery) query;
      Term t = q.getTerm();
      FieldType ft = writeFieldName(t.field(), schema, out, flags);
      writeFieldVal(t.bytes(), ft, out, flags);
    } else if (query instanceof TermRangeQuery) {
      TermRangeQuery q = (TermRangeQuery) query;
      String fname = q.getField();
      FieldType ft = writeFieldName(fname, schema, out, flags);
      out.append(q.includesLower() ? '[' : '{');
      BytesRef lt = q.getLowerTerm();
      BytesRef ut = q.getUpperTerm();
      if (lt == null) {
        out.append('*');
      } else {
        writeFieldVal(lt, ft, out, flags);
      }

      out.append(" TO ");

      if (ut == null) {
        out.append('*');
      } else {
        writeFieldVal(ut, ft, out, flags);
      }

      out.append(q.includesUpper() ? ']' : '}');
    } else if (query instanceof NumericRangeQuery) {
      NumericRangeQuery q = (NumericRangeQuery) query;
      String fname = q.getField();
      FieldType ft = writeFieldName(fname, schema, out, flags);
      out.append(q.includesMin() ? '[' : '{');
      Number lt = q.getMin();
      Number ut = q.getMax();
      if (lt == null) {
        out.append('*');
      } else {
        out.append(lt.toString());
      }

      out.append(" TO ");

      if (ut == null) {
        out.append('*');
      } else {
        out.append(ut.toString());
      }

      out.append(q.includesMax() ? ']' : '}');
    } else if (query instanceof BooleanQuery) {
      BooleanQuery q = (BooleanQuery) query;
      boolean needParens = false;

      if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0 || q.isCoordDisabled()) {
        needParens = true;
      }
      if (needParens) {
        out.append('(');
      }
      boolean first = true;
      for (BooleanClause c : q.clauses()) {
        if (!first) {
          out.append(' ');
        } else {
          first = false;
        }

        if (c.isProhibited()) {
          out.append('-');
        } else if (c.isRequired()) {
          out.append('+');
        }
        Query subQuery = c.getQuery();
        boolean wrapQuery = false;

        // TODO: may need to put parens around other types
        // of queries too, depending on future syntax.
        if (subQuery instanceof BooleanQuery) {
          wrapQuery = true;
        }

        if (wrapQuery) {
          out.append('(');
        }

        toString(subQuery, schema, out, flags);

        if (wrapQuery) {
          out.append(')');
        }
      }

      if (needParens) {
        out.append(')');
      }
      if (q.getMinimumNumberShouldMatch() > 0) {
        out.append('~');
        out.append(Integer.toString(q.getMinimumNumberShouldMatch()));
      }
      if (q.isCoordDisabled()) {
        out.append("/no_coord");
      }

    } else if (query instanceof PrefixQuery) {
      PrefixQuery q = (PrefixQuery) query;
      Term prefix = q.getPrefix();
      FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
      out.append(prefix.text());
      out.append('*');
    } else if (query instanceof WildcardQuery) {
      out.append(query.toString());
      writeBoost = false;
    } else if (query instanceof FuzzyQuery) {
      out.append(query.toString());
      writeBoost = false;
    } else if (query instanceof ConstantScoreQuery) {
      out.append(query.toString());
      writeBoost = false;
    } else if (query instanceof WrappedQuery) {
      WrappedQuery q = (WrappedQuery) query;
      out.append(q.getOptions());
      toString(q.getWrappedQuery(), schema, out, flags);
      writeBoost = false; // we don't use the boost on wrapped queries
    } else {
      out.append(query.getClass().getSimpleName() + '(' + query.toString() + ')');
      writeBoost = false;
    }

    if (writeBoost && query.getBoost() != 1.0f) {
      out.append("^");
      out.append(Float.toString(query.getBoost()));
    }
  }