Ejemplo n.º 1
0
  /**
   * This is best effort only: the PhraseQuery may contain multiple terms at the same position
   * (think synonyms) or gaps (think stopwords) and it's in this case impossible to translate it
   * into a correct ElasticsearchQuery.
   */
  private static JsonObject convertPhraseQuery(PhraseQuery query) {
    Term[] terms = query.getTerms();

    if (terms.length == 0) {
      throw LOG.cannotQueryOnEmptyPhraseQuery();
    }

    String field = terms[0].field(); // phrase queries are only supporting one field
    StringBuilder phrase = new StringBuilder();
    for (Term term : terms) {
      phrase.append(" ").append(term.text());
    }

    JsonObject phraseQuery =
        JsonBuilder.object()
            .add(
                "match_phrase",
                JsonBuilder.object()
                    .add(
                        field,
                        JsonBuilder.object()
                            .addProperty("query", phrase.toString().trim())
                            .addProperty("slop", query.getSlop())
                            .addProperty("boost", query.getBoost())))
            .build();

    return wrapQueryForNestedIfRequired(field, phraseQuery);
  }
Ejemplo n.º 2
0
  public void testPositionIncrement() throws Exception {
    boolean dflt = StopFilter.getEnablePositionIncrementsDefault();
    StopFilter.setEnablePositionIncrementsDefault(true);
    try {
      QueryParserWrapper qp =
          new QueryParserWrapper("a", new StopAnalyzer(new String[] {"the", "in", "are", "this"}));
      qp.setEnablePositionIncrements(true);
      String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
      // 0 2 5 7 8
      int expectedPositions[] = {1, 3, 4, 6, 9};
      PhraseQuery pq = (PhraseQuery) qp.parse(qtxt);
      // System.out.println("Query text: "+qtxt);
      // System.out.println("Result: "+pq);
      Term t[] = pq.getTerms();
      int pos[] = pq.getPositions();
      for (int i = 0; i < t.length; i++) {
        // System.out.println(i+". "+t[i]+"  pos: "+pos[i]);
        assertEquals(
            "term " + i + " = " + t[i] + " has wrong term-position!", expectedPositions[i], pos[i]);
      }

    } finally {
      StopFilter.setEnablePositionIncrementsDefault(dflt);
    }
  }
Ejemplo n.º 3
0
 public Query createPhrasePrefixQuery(
     String field, String queryText, int phraseSlop, int maxExpansions) {
   final Query query =
       createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
   final MultiPhrasePrefixQuery prefixQuery = new MultiPhrasePrefixQuery();
   prefixQuery.setMaxExpansions(maxExpansions);
   prefixQuery.setSlop(phraseSlop);
   if (query instanceof PhraseQuery) {
     PhraseQuery pq = (PhraseQuery) query;
     Term[] terms = pq.getTerms();
     int[] positions = pq.getPositions();
     for (int i = 0; i < terms.length; i++) {
       prefixQuery.add(new Term[] {terms[i]}, positions[i]);
     }
     return prefixQuery;
   } else if (query instanceof MultiPhraseQuery) {
     MultiPhraseQuery pq = (MultiPhraseQuery) query;
     Term[][] terms = pq.getTermArrays();
     int[] positions = pq.getPositions();
     for (int i = 0; i < terms.length; i++) {
       prefixQuery.add(terms[i], positions[i]);
     }
     return prefixQuery;
   } else if (query instanceof TermQuery) {
     prefixQuery.add(((TermQuery) query).getTerm());
     return prefixQuery;
   }
   return query;
 }
  /**
   * Construct a span phrase query. TODO While we have code set up to check for wildcard characters
   * in the query, they have already been stripped so this will never happen. This is one thing that
   * would need to be addressed.
   *
   * @param scope
   * @param q
   * @return
   */
  private SpanQuery buildPhraseQuery(String scope, Query q) {

    PhraseQuery pq = (PhraseQuery) q;
    Term[] terms = pq.getTerms();
    ArrayList<SpanQuery> spans = new ArrayList<SpanQuery>();

    spans.add(
        new SpanTermQuery(new Term(scope, getTag(scope, terms[0].field(), TAG_TYPE.BEG_TAG))));
    spans.add(
        new SpanTermQuery(new Term(scope, getTag(scope, terms[0].field(), TAG_TYPE.END_TAG))));

    ArrayList<SpanQuery> phraseSpan = new ArrayList<SpanQuery>();
    for (int i = 0; i < terms.length; i++) {
      String term = terms[i].text();
      if (term.endsWith("*")) {
        WildcardQuery wildcard = new WildcardQuery(new Term(scope, term));
        phraseSpan.add(new SpanMultiTermQueryWrapper<WildcardQuery>(wildcard));
      } else if (term.contains(singleCs) || term.contains(multiCs)) {
        WildcardQuery wildcard = new WildcardQuery(new Term(scope, term));
        phraseSpan.add(new SpanMultiTermQueryWrapper<WildcardQuery>(wildcard));
      } else {
        phraseSpan.add(new SpanTermQuery(new Term(scope, term)));
      }
    }
    spans.add(
        new SpanNearQuery(
            phraseSpan.toArray(new SpanQuery[phraseSpan.size()]), phraseSpan.size(), true));

    return new SpanBetweenQuery(spans.toArray(new SpanQuery[spans.size()]));
  }
Ejemplo n.º 5
0
 protected PhraseQuery toPhraseQuery(List<BytesRef> bytesRefs, String field) {
   PhraseQuery phraseQuery = new PhraseQuery();
   for (BytesRef bytesRef : bytesRefs) {
     phraseQuery.add(new Term(field, bytesRef));
   }
   return phraseQuery;
 }
Ejemplo n.º 6
0
  public void testSpanNearQuery() throws Exception {
    SpanQuery[] quick_brown_dog = new SpanQuery[] {quick, brown, dog};
    SpanNearQuery snq = new SpanNearQuery(quick_brown_dog, 0, true); // #1
    assertNoMatches(snq);
    dumpSpans(snq);

    snq = new SpanNearQuery(quick_brown_dog, 4, true); // #2
    assertNoMatches(snq);
    dumpSpans(snq);

    snq = new SpanNearQuery(quick_brown_dog, 5, true); // #3
    assertOnlyBrownFox(snq);
    dumpSpans(snq);

    // interesting - even a sloppy phrase query would require
    // more slop to match
    snq = new SpanNearQuery(new SpanQuery[] {lazy, fox}, 3, false); // #4
    assertOnlyBrownFox(snq);
    dumpSpans(snq);

    PhraseQuery pq = new PhraseQuery(); // #5
    pq.add(new Term("f", "lazy")); // #5
    pq.add(new Term("f", "fox")); // #5
    pq.setSlop(4); // #5
    assertNoMatches(pq);

    pq.setSlop(5); // #6
    assertOnlyBrownFox(pq); // #6
  }
 protected void smokeTestSearcher(IndexSearcher s) throws Exception {
   runQuery(s, new TermQuery(new Term("body", "united")));
   runQuery(s, new TermQuery(new Term("titleTokenized", "states")));
   PhraseQuery pq = new PhraseQuery();
   pq.add(new Term("body", "united"));
   pq.add(new Term("body", "states"));
   runQuery(s, pq);
 }
 private static PhraseQuery makePhraseQuery(String terms) {
   PhraseQuery query = new PhraseQuery();
   String[] t = terms.split(" +");
   for (int i = 0; i < t.length; i++) {
     query.add(new Term("f", t[i]));
   }
   return query;
 }
Ejemplo n.º 9
0
 protected Query pq(float boost, int slop, String field, String... texts) {
   PhraseQuery query = new PhraseQuery();
   for (String text : texts) {
     query.add(new Term(field, text));
   }
   query.setBoost(boost);
   query.setSlop(slop);
   return query;
 }
  public void testCJKPhrase() throws Exception {
    // individual CJK chars as terms
    StandardAnalyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);

    PhraseQuery expected = new PhraseQuery();
    expected.add(new Term("field", "中"));
    expected.add(new Term("field", "国"));

    assertEquals(expected, getQuery("\"中国\"", analyzer));
  }
Ejemplo n.º 11
0
 protected Query eq(String field, String[] terms, boolean ignoreCase) {
   if (terms.length > 1) {
     PhraseQuery pq = new PhraseQuery();
     for (String s : terms) {
       pq.add(new Term(field, s));
     }
     return pq;
   }
   return new TermQuery(new Term(field, terms[0]));
 }
  public void testAutoGeneratePhraseQueriesOn() throws Exception {
    // individual CJK chars as terms
    StandardAnalyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);

    PhraseQuery expected = new PhraseQuery();
    expected.add(new Term("field", "中"));
    expected.add(new Term("field", "国"));
    QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "field", analyzer);
    parser.setAutoGeneratePhraseQueries(true);
    assertEquals(expected, parser.parse("中国"));
  }
 @Override
 public Query construct(IndexEnvironment env, Map<String, String[]> querySource)
     throws ParseException {
   if ("1".equals(StringTools.arrayToString(querySource.get("queryversion"), ""))) {
     // preserving old stuff:
     // 1. all lucene special chars to be quoted
     // 2. if "wholewords" is "on" or "true" -> don't add *_*, otherwise add *_*
     BooleanQuery result = new BooleanQuery();
     String wholeWords = StringTools.arrayToString(querySource.get("wholewords"), "");
     boolean useWildcards = !(null != wholeWords && StringTools.stringToBoolean(wholeWords));
     for (Map.Entry<String, String[]> queryItem : querySource.entrySet()) {
       String field = queryItem.getKey();
       if (env.fields.containsKey(field) && queryItem.getValue().length > 0) {
         for (String value : queryItem.getValue()) {
           if (null != value) {
             value = value.trim().toLowerCase();
             if (0 != value.length()) {
               if ("keywords".equals(field) && ACCESSION_REGEX.test(value)) {
                 result.add(new TermQuery(new Term("accession", value)), BooleanClause.Occur.MUST);
               } else if ("keywords".equals(field)
                   && '"' == value.charAt(0)
                   && '"' == value.charAt(value.length() - 1)) {
                 value = value.substring(1, value.length() - 1);
                 PhraseQuery q = new PhraseQuery();
                 String[] tokens = value.split("\\s+");
                 for (String token : tokens) {
                   q.add(new Term(field, token));
                 }
                 result.add(q, BooleanClause.Occur.MUST);
               } else {
                 String[] tokens = value.split("\\s+");
                 for (String token : tokens) {
                   // we use wildcards for keywords depending on "wholewords" switch,
                   // *ALWAYS* for other fields, *NEVER* for user id and accession or boolean
                   // fields
                   Query q =
                       !"boolean".equals(env.fields.get(field).type)
                               && !" userid  accession ".contains(" " + field + " ")
                               && (useWildcards || (!" keywords ".contains(" " + field + " ")))
                           ? new WildcardQuery(new Term(field, "*" + token + "*"))
                           : new TermQuery(new Term(field, token));
                   result.add(q, BooleanClause.Occur.MUST);
                 }
               }
             }
           }
         }
       }
     }
     return result;
   } else {
     return super.construct(env, querySource);
   }
 }
  @Test
  public void testLotsOfPhrases() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(
                TEST_VERSION_CURRENT,
                new MockAnalyzer(
                    random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();
    String[] terms = {"org", "apache", "lucene"};
    int iters = atLeast(1000);
    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < iters; i++) {
      builder.append(terms[random().nextInt(terms.length)]).append(" ");
      if (random().nextInt(6) == 3) {
        builder.append("elasticsearch").append(" ");
      }
    }
    Document doc = new Document();
    Field field = new Field("field", builder.toString(), type);
    doc.add(field);
    writer.addDocument(doc);
    PhraseQuery query = new PhraseQuery();
    query.add(new Term("field", "org"));
    query.add(new Term("field", "apache"));
    query.add(new Term("field", "lucene"));

    XFastVectorHighlighter highlighter = new XFastVectorHighlighter();
    IndexReader reader = DirectoryReader.open(writer, true);
    IndexSearcher searcher = newSearcher(reader);
    TopDocs hits = searcher.search(query, 10);
    assertEquals(1, hits.totalHits);
    XFieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
    String[] bestFragments =
        highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1);
    for (int i = 0; i < bestFragments.length; i++) {
      String result = bestFragments[i].replaceAll("<b>org apache lucene</b>", "FOOBAR");
      assertFalse(result.contains("org apache lucene"));
    }
    reader.close();
    writer.close();
    dir.close();
  }
Ejemplo n.º 15
0
 BooleanClause partialMatch(SearchTerm s, int slop) {
   String[] phrase = getPhrase(s.getValue().toLowerCase().trim());
   PhraseQuery query = new PhraseQuery();
   BooleanClause partialMatchClause = null;
   query.setSlop(slop);
   for (int i = 0; i < phrase.length; i++) {
     query.add(new Term(s.getFieldName(), phrase[i].toLowerCase().trim()));
   }
   if (s.getOperator().equalsIgnoreCase("=")) {
     partialMatchClause = new BooleanClause(query, BooleanClause.Occur.MUST);
   } else if (s.getOperator().equalsIgnoreCase("-")) {
     partialMatchClause = new BooleanClause(query, BooleanClause.Occur.MUST_NOT);
   }
   return partialMatchClause;
 }
  private float checkPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults)
      throws Exception {
    query.setSlop(slop);

    Directory ramDir = newDirectory();
    RandomIndexWriter writer =
        new RandomIndexWriter(random, ramDir, new MockAnalyzer(MockTokenizer.WHITESPACE, false));
    writer.addDocument(doc);

    IndexReader reader = writer.getReader();

    IndexSearcher searcher = newSearcher(reader);
    TopDocs td = searcher.search(query, null, 10);
    // System.out.println("slop: "+slop+"  query: "+query+"  doc: "+doc+"  Expecting number of hits:
    // "+expectedNumResults+" maxScore="+td.getMaxScore());
    assertEquals(
        "slop: " + slop + "  query: " + query + "  doc: " + doc + "  Wrong number of hits",
        expectedNumResults,
        td.totalHits);

    // QueryUtils.check(query,searcher);
    writer.close();
    searcher.close();
    reader.close();
    ramDir.close();

    return td.getMaxScore();
  }
 public void testToQueryPhraseQueryBoostAndSlop() throws IOException {
   assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
   QueryStringQueryBuilder queryStringQueryBuilder =
       new QueryStringQueryBuilder("\"test phrase\"~2").field(STRING_FIELD_NAME, 5f);
   Query query = queryStringQueryBuilder.toQuery(createShardContext());
   assertThat(query, instanceOf(DisjunctionMaxQuery.class));
   DisjunctionMaxQuery disjunctionMaxQuery = (DisjunctionMaxQuery) query;
   assertThat(disjunctionMaxQuery.getDisjuncts().size(), equalTo(1));
   assertThat(disjunctionMaxQuery.getDisjuncts().get(0), instanceOf(BoostQuery.class));
   BoostQuery boostQuery = (BoostQuery) disjunctionMaxQuery.getDisjuncts().get(0);
   assertThat(boostQuery.getBoost(), equalTo(5f));
   assertThat(boostQuery.getQuery(), instanceOf(PhraseQuery.class));
   PhraseQuery phraseQuery = (PhraseQuery) boostQuery.getQuery();
   assertThat(phraseQuery.getSlop(), Matchers.equalTo(2));
   assertThat(phraseQuery.getTerms().length, equalTo(2));
 }
 public void testToQueryPhraseQuery() throws IOException {
   assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
   Query query =
       queryStringQuery("\"term1 term2\"")
           .defaultField(STRING_FIELD_NAME)
           .phraseSlop(3)
           .toQuery(createShardContext());
   assertThat(query, instanceOf(DisjunctionMaxQuery.class));
   DisjunctionMaxQuery disjunctionMaxQuery = (DisjunctionMaxQuery) query;
   assertThat(disjunctionMaxQuery.getDisjuncts().size(), equalTo(1));
   assertThat(disjunctionMaxQuery.getDisjuncts().get(0), instanceOf(PhraseQuery.class));
   PhraseQuery phraseQuery = (PhraseQuery) disjunctionMaxQuery.getDisjuncts().get(0);
   assertThat(phraseQuery.getTerms().length, equalTo(2));
   assertThat(phraseQuery.getTerms()[0], equalTo(new Term(STRING_FIELD_NAME, "term1")));
   assertThat(phraseQuery.getTerms()[1], equalTo(new Term(STRING_FIELD_NAME, "term2")));
   assertThat(phraseQuery.getSlop(), equalTo(3));
 }
  protected Taxon[] findTaxon(String fieldName1, String fieldValue) throws IOException {
    Taxon[] terms = new TaxonImpl[0];
    if (StringUtils.isNotBlank(fieldValue) && indexSearcher != null) {
      PhraseQuery query = new PhraseQuery();
      query.add(new Term(fieldName1, fieldValue));
      int maxHits = 3;
      TopDocs docs = indexSearcher.search(query, maxHits);

      if (docs.totalHits > 0) {
        terms = new TaxonImpl[docs.totalHits];
        for (int i = 0; i < docs.totalHits && i < maxHits; i++) {
          ScoreDoc scoreDoc = docs.scoreDocs[i];
          Document foundDoc = indexSearcher.doc(scoreDoc.doc);
          Taxon term = new TaxonImpl();
          Fieldable idField = foundDoc.getFieldable(FIELD_ID);
          if (idField != null) {
            term.setExternalId(idField.stringValue());
          }
          Fieldable rankPathField = foundDoc.getFieldable(FIELD_RANK_PATH);
          if (rankPathField != null) {
            term.setPath(rankPathField.stringValue());
          }
          Fieldable rankPathIdsField = foundDoc.getFieldable(FIELD_RANK_PATH_IDS);
          if (rankPathIdsField != null) {
            term.setPathIds(rankPathIdsField.stringValue());
          }
          Fieldable rankPathNamesField = foundDoc.getFieldable(FIELD_RANK_PATH_NAMES);
          if (rankPathNamesField != null) {
            term.setPathNames(rankPathNamesField.stringValue());
          }
          Fieldable commonNamesFields = foundDoc.getFieldable(FIELD_COMMON_NAMES);
          if (commonNamesFields != null) {
            term.setCommonNames(commonNamesFields.stringValue());
          }
          Fieldable fieldName = foundDoc.getFieldable(FIELD_RECOMMENDED_NAME);
          if (fieldName != null) {
            term.setName(fieldName.stringValue());
          }
          terms[i] = term;
        }
      }
    }
    return terms;
  }
  @Test
  public void testPhraseQuery() {

    Map<String, ColumnMapper> map = new HashMap<>();
    map.put("name", new ColumnMapperBoolean());
    Schema mappers = new Schema(map, null, EnglishAnalyzer.class.getName());

    List<String> values = new ArrayList<>();
    values.add("hola");
    values.add("adios");

    PhraseCondition phraseCondition = new PhraseCondition(0.5f, "name", values, 2);
    Query query = phraseCondition.query(mappers);
    Assert.assertNotNull(query);
    Assert.assertEquals(org.apache.lucene.search.PhraseQuery.class, query.getClass());
    org.apache.lucene.search.PhraseQuery luceneQuery = (org.apache.lucene.search.PhraseQuery) query;
    Assert.assertEquals(values.size(), luceneQuery.getTerms().length);
    Assert.assertEquals(2, luceneQuery.getSlop());
    Assert.assertEquals(0.5f, query.getBoost(), 0);
  }
Ejemplo n.º 21
0
  public void testWithPendingDeletes3() throws IOException {
    // main directory
    Directory dir = newDirectory();
    // auxiliary directory
    Directory aux = newDirectory();

    setUpDirs(dir, aux);
    IndexWriter writer =
        newWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setOpenMode(OpenMode.APPEND));

    // Adds 10 docs, then replaces them with another 10
    // docs, so 10 pending deletes:
    for (int i = 0; i < 20; i++) {
      Document doc = new Document();
      doc.add(newStringField("id", "" + (i % 10), Field.Store.NO));
      doc.add(newTextField("content", "bbb " + i, Field.Store.NO));
      writer.updateDocument(new Term("id", "" + (i % 10)), doc);
    }

    // Deletes one of the 10 added docs, leaving 9:
    PhraseQuery q = new PhraseQuery();
    q.add(new Term("content", "bbb"));
    q.add(new Term("content", "14"));
    writer.deleteDocuments(q);

    writer.addIndexes(aux);

    writer.forceMerge(1);
    writer.commit();

    verifyNumDocs(dir, 1039);
    verifyTermDocs(dir, new Term("content", "aaa"), 1030);
    verifyTermDocs(dir, new Term("content", "bbb"), 9);

    writer.close();
    dir.close();
    aux.close();
  }
  @Override
  public void execute(String indexDir) {

    ConsoleUtilities.printHeader();

    try {
      Directory indexDirectory = FSDirectory.open(new File(indexDir));
      IndexReader indexReader = DirectoryReader.open(indexDirectory);
      IndexSearcher searcher = new IndexSearcher(indexReader);

      System.out.println("[BooleanQueryOperation.execute]");

      // TODO: Implement and execute BooleanQuery
      PhraseQuery phraseQuery1 = new PhraseQuery();
      phraseQuery1.add(new Term("description", "legendary"));
      phraseQuery1.add(new Term("description", "sacred"));
      phraseQuery1.add(new Term("description", "realm"));

      // cursed land
      PhraseQuery phraseQuery2 = new PhraseQuery();
      phraseQuery2.add(new Term("description", "cursed"));
      phraseQuery2.add(new Term("description", "land"));

      BooleanQuery booleanQuery = new BooleanQuery();
      booleanQuery.add(new BooleanClause(phraseQuery1, Occur.SHOULD));
      booleanQuery.add(new BooleanClause(phraseQuery2, Occur.SHOULD));

      int topHitNum = 30;
      TopDocs topDocs = searcher.search(booleanQuery, topHitNum);
      for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        Document doc = searcher.doc(scoreDoc.doc);

        String path = doc.get("path");

        Game game = gameBuilder.build(new File(path));
        System.out.println(ResultFormatter.format(game, scoreDoc.score));
      }

      // BooleanClause.Occur.SHOULD means that the clause is optional,
      // whereas BooleanClause.Occur.Must means that the clause is required.

      // However, if a boolean query only has optional clauses, at least one
      // clause must match for a document to appear in the results.

      // For better control over what documents match a BooleanQuery, there is
      // also a minimumShouldMatch parameter which lets you tell Lucene that at
      // least minimumShouldMatch BooleanClause.Occur.SHOULD clauses must match
      // for a document to appear in the results.

    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
  public void testPositionIncrement() throws Exception {
    AqpQueryParser qp = getParser();
    qp.setAnalyzer(
        new StopAnalyzer(
            TEST_VERSION_CURRENT,
            StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "in", "are", "this")));

    qp.setEnablePositionIncrements(true);

    String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
    // 0 2 5 7 8
    int expectedPositions[] = {1, 3, 4, 6, 9};
    PhraseQuery pq = (PhraseQuery) qp.parse(qtxt, "a");
    // System.out.println("Query text: "+qtxt);
    // System.out.println("Result: "+pq);
    Term t[] = pq.getTerms();
    int pos[] = pq.getPositions();
    for (int i = 0; i < t.length; i++) {
      // System.out.println(i+". "+t[i]+"  pos: "+pos[i]);
      assertEquals(
          "term " + i + " = " + t[i] + " has wrong term-position!", expectedPositions[i], pos[i]);
    }
  }
Ejemplo n.º 24
0
  BooleanQuery orPhraseQuery(List<SearchTerm> orSearchTerms) {
    BooleanQuery orTerms = new BooleanQuery();
    orTerms.setMaxClauseCount(dvnMaxClauseCount);
    for (Iterator it = orSearchTerms.iterator(); it.hasNext(); ) {
      SearchTerm elem = (SearchTerm) it.next();
      String[] phrase = getPhrase(elem.getValue().toLowerCase().trim());
      if (phrase.length > 1) {
        BooleanClause partialMatchClause = null;
        PhraseQuery phraseQuery = new PhraseQuery();
        phraseQuery.setSlop(3);

        for (int i = 0; i < phrase.length; i++) {
          phraseQuery.add(new Term(elem.getFieldName(), phrase[i].toLowerCase().trim()));
        }
        orTerms.add(phraseQuery, BooleanClause.Occur.SHOULD);
      } else {
        Term t = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim());
        TermQuery orQuery = new TermQuery(t);
        orTerms.add(orQuery, BooleanClause.Occur.SHOULD);
      }
    }
    return orTerms;
  }
Ejemplo n.º 25
0
 BooleanQuery andSearchTermClause(List<SearchTerm> andSearchTerms) {
   BooleanQuery andTerms = new BooleanQuery();
   andTerms.setMaxClauseCount(dvnMaxClauseCount);
   Query rQuery = null;
   for (Iterator it = andSearchTerms.iterator(); it.hasNext(); ) {
     SearchTerm elem = (SearchTerm) it.next();
     if (elem.getOperator().equals("<")) {
       Term end = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim());
       Term begin = null;
       rQuery = new RangeQuery(begin, end, true);
       andTerms.add(rQuery, BooleanClause.Occur.MUST);
     } else if (elem.getOperator().equals(">")) {
       Term end = null;
       Term begin = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim());
       rQuery = new RangeQuery(begin, end, true);
       andTerms.add(rQuery, BooleanClause.Occur.MUST);
     } else if (elem.getFieldName().equalsIgnoreCase("any")) {
       andTerms = buildAnyQuery(elem.getValue().toLowerCase().trim());
     } else {
       String[] phrase = getPhrase(elem.getValue().toLowerCase().trim());
       if (phrase.length > 1) {
         PhraseQuery phraseQuery = new PhraseQuery();
         phraseQuery.setSlop(0);
         andTerms.add(partialMatch(elem, 3));
       } else {
         Term t = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim());
         TermQuery andQuery = new TermQuery(t);
         if (elem.getOperator().equals("=")) {
           andTerms.add(andQuery, BooleanClause.Occur.MUST);
         } else if (elem.getOperator().equalsIgnoreCase("-")) {
           andTerms.add(andQuery, BooleanClause.Occur.MUST_NOT);
         }
       }
     }
   }
   return andTerms;
 }
Ejemplo n.º 26
0
 private Query createQuery(SearchEngineFilter filter) {
   BooleanQuery fieldQuery = new BooleanQuery();
   String key = filter.getKey();
   String attachmentKey = key + IIndexerDAO.ATTACHMENT_FIELD_SUFFIX;
   Object value = filter.getValue();
   if (null != value) {
     if (value instanceof String) {
       SearchEngineFilter.TextSearchOption option = filter.getTextSearchOption();
       if (null == option) {
         option = SearchEngineFilter.TextSearchOption.AT_LEAST_ONE_WORD;
       }
       String stringValue = value.toString();
       String[] values = stringValue.split("\\s+");
       if (!option.equals(SearchEngineFilter.TextSearchOption.EXACT)) {
         BooleanClause.Occur bc = BooleanClause.Occur.SHOULD;
         if (option.equals(SearchEngineFilter.TextSearchOption.ALL_WORDS)) {
           bc = BooleanClause.Occur.MUST;
         } else if (option.equals(SearchEngineFilter.TextSearchOption.ANY_WORD)) {
           bc = BooleanClause.Occur.MUST_NOT;
         }
         for (int i = 0; i < values.length; i++) {
           TermQuery term = new TermQuery(new Term(key, values[i].toLowerCase()));
           // NOTE: search lower case....
           if (filter.isIncludeAttachments()) {
             BooleanQuery compositeQuery = new BooleanQuery();
             compositeQuery.add(term, BooleanClause.Occur.SHOULD);
             TermQuery termAttachment =
                 new TermQuery(new Term(attachmentKey, values[i].toLowerCase()));
             compositeQuery.add(termAttachment, BooleanClause.Occur.SHOULD);
             fieldQuery.add(compositeQuery, bc);
           } else {
             fieldQuery.add(term, bc);
           }
         }
       } else {
         PhraseQuery phraseQuery = new PhraseQuery();
         for (int i = 0; i < values.length; i++) {
           // NOTE: search lower case....
           phraseQuery.add(new Term(key, values[i].toLowerCase()));
         }
         if (filter.isIncludeAttachments()) {
           fieldQuery.add(phraseQuery, BooleanClause.Occur.SHOULD);
           PhraseQuery phraseQuery2 = new PhraseQuery();
           for (int i = 0; i < values.length; i++) {
             // NOTE: search lower case....
             phraseQuery2.add(new Term(attachmentKey, values[i].toLowerCase()));
           }
           fieldQuery.add(phraseQuery2, BooleanClause.Occur.SHOULD);
         } else {
           return phraseQuery;
         }
       }
     } else if (value instanceof Date) {
       String toString =
           DateTools.timeToString(((Date) value).getTime(), DateTools.Resolution.MINUTE);
       TermQuery term = new TermQuery(new Term(filter.getKey(), toString));
       fieldQuery.add(term, BooleanClause.Occur.MUST);
     } else if (value instanceof Number) {
       TermQuery term = new TermQuery(new Term(filter.getKey(), value.toString()));
       fieldQuery.add(term, BooleanClause.Occur.MUST);
     }
   } else {
     if (filter.getStart() instanceof Number || filter.getEnd() instanceof Number) {
       // .............................. TODO
     } else {
       String start = null;
       String end = null;
       if (filter.getStart() instanceof Date || filter.getEnd() instanceof Date) {
         if (null != filter.getStart()) {
           start =
               DateTools.timeToString(
                   ((Date) filter.getStart()).getTime(), DateTools.Resolution.MINUTE);
         }
         if (null != filter.getEnd()) {
           end =
               DateTools.timeToString(
                   ((Date) filter.getEnd()).getTime(), DateTools.Resolution.MINUTE);
         }
       } else {
         start = (null != filter.getStart()) ? filter.getStart().toString().toLowerCase() : null;
         end = (null != filter.getEnd()) ? filter.getEnd().toString().toLowerCase() : null;
       }
       BytesRef byteStart = (null != start) ? new BytesRef(start.getBytes()) : null;
       BytesRef byteEnd = (null != end) ? new BytesRef(end.getBytes()) : null;
       TermRangeQuery range = new TermRangeQuery(filter.getKey(), byteStart, byteEnd, true, true);
       fieldQuery.add(range, BooleanClause.Occur.MUST);
     }
   }
   return fieldQuery;
 }
  public Query createQuery(FieldContext fieldContext) {
    final Query perFieldQuery;
    final String fieldName = fieldContext.getField();

    /*
     * Store terms per position and detect if for a given position more than one term is present
     */
    TokenStream stream = null;
    boolean isMultiPhrase = false;
    Map<Integer, List<Term>> termsPerPosition = new HashMap<Integer, List<Term>>();
    final String sentence = phraseContext.getSentence();
    try {
      Reader reader = new StringReader(sentence);
      stream = queryContext.getQueryAnalyzer().tokenStream(fieldName, reader);

      CharTermAttribute termAttribute = stream.addAttribute(CharTermAttribute.class);
      PositionIncrementAttribute positionAttribute =
          stream.addAttribute(PositionIncrementAttribute.class);

      stream.reset();
      int position = -1; // start at -1 since we apply at least one increment
      List<Term> termsAtSamePosition = null;
      while (stream.incrementToken()) {
        int positionIncrement = 1;
        if (positionAttribute != null) {
          positionIncrement = positionAttribute.getPositionIncrement();
        }

        if (positionIncrement > 0) {
          position += positionIncrement;
          termsAtSamePosition = termsPerPosition.get(position);
        }

        if (termsAtSamePosition == null) {
          termsAtSamePosition = new ArrayList<Term>();
          termsPerPosition.put(position, termsAtSamePosition);
        }

        String termString = new String(termAttribute.buffer(), 0, termAttribute.length());
        termsAtSamePosition.add(new Term(fieldName, termString));
        if (termsAtSamePosition.size() > 1) {
          isMultiPhrase = true;
        }
      }
    } catch (IOException e) {
      throw new AssertionFailure("IOException while reading a string. Doh!", e);
    } finally {
      if (stream != null) {
        try {
          stream.end();
          stream.close();
        } catch (IOException e) {
          throw new AssertionFailure("IOException while reading a string. Doh!", e);
        }
      }
    }

    /*
     * Create the appropriate query depending on the conditions
     * note that a MultiPhraseQuery is needed if several terms share the same position
     * as it will do a OR and not a AND like PhraseQuery
     */
    final int size = termsPerPosition.size();
    if (size == 0) {
      perFieldQuery = new BooleanQuery.Builder().build();
    } else if (size <= 1) {
      final List<Term> terms = termsPerPosition.values().iterator().next();
      if (terms.size() == 1) {
        perFieldQuery = new TermQuery(terms.get(0));
      } else {
        BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder();
        for (Term term : terms) {
          booleanQueryBuilder.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
        }
        perFieldQuery = booleanQueryBuilder.build();
      }
    } else {
      if (isMultiPhrase) {
        MultiPhraseQuery query = new MultiPhraseQuery();
        query.setSlop(phraseContext.getSlop());
        for (Map.Entry<Integer, List<Term>> entry : termsPerPosition.entrySet()) {
          final List<Term> value = entry.getValue();
          query.add(value.toArray(new Term[value.size()]), entry.getKey());
        }
        perFieldQuery = query;
      } else {
        PhraseQuery query = new PhraseQuery();
        query.setSlop(phraseContext.getSlop());
        for (Map.Entry<Integer, List<Term>> entry : termsPerPosition.entrySet()) {
          final List<Term> value = entry.getValue();
          query.add(value.get(0), entry.getKey());
        }
        perFieldQuery = query;
      }
    }
    return fieldContext.getFieldCustomizer().setWrappedQuery(perFieldQuery).createQuery();
  }
Ejemplo n.º 28
0
  // Test scores with one field with Term Freqs and one without, otherwise with equal content
  public void testBasic() throws Exception {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setMaxBufferedDocs(2)
                .setSimilarity(new SimpleSimilarity())
                .setMergePolicy(newLogMergePolicy(2)));

    StringBuilder sb = new StringBuilder(265);
    String term = "term";
    for (int i = 0; i < 30; i++) {
      Document d = new Document();
      sb.append(term).append(" ");
      String content = sb.toString();
      Field noTf = newField("noTf", content + (i % 2 == 0 ? "" : " notf"), omitType);
      d.add(noTf);

      Field tf = newField("tf", content + (i % 2 == 0 ? " tf" : ""), normalType);
      d.add(tf);

      writer.addDocument(d);
      // System.out.println(d);
    }

    writer.forceMerge(1);
    // flush
    writer.close();

    /*
     * Verify the index
     */
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = newSearcher(reader);
    searcher.setSimilarity(new SimpleSimilarity());

    Term a = new Term("noTf", term);
    Term b = new Term("tf", term);
    Term c = new Term("noTf", "notf");
    Term d = new Term("tf", "tf");
    TermQuery q1 = new TermQuery(a);
    TermQuery q2 = new TermQuery(b);
    TermQuery q3 = new TermQuery(c);
    TermQuery q4 = new TermQuery(d);

    PhraseQuery pq = new PhraseQuery();
    pq.add(a);
    pq.add(c);
    try {
      searcher.search(pq, 10);
      fail("did not hit expected exception");
    } catch (Exception e) {
      Throwable cause = e;
      // If the searcher uses an executor service, the IAE is wrapped into other exceptions
      while (cause.getCause() != null) {
        cause = cause.getCause();
      }
      assertTrue("Expected an IAE, got " + cause, cause instanceof IllegalStateException);
    }

    searcher.search(
        q1,
        new CountingHitCollector() {
          private Scorer scorer;

          @Override
          public final void setScorer(Scorer scorer) {
            this.scorer = scorer;
          }

          @Override
          public final void collect(int doc) throws IOException {
            // System.out.println("Q1: Doc=" + doc + " score=" + score);
            float score = scorer.score();
            assertTrue("got score=" + score, score == 1.0f);
            super.collect(doc);
          }
        });
    // System.out.println(CountingHitCollector.getCount());

    searcher.search(
        q2,
        new CountingHitCollector() {
          private Scorer scorer;

          @Override
          public final void setScorer(Scorer scorer) {
            this.scorer = scorer;
          }

          @Override
          public final void collect(int doc) throws IOException {
            // System.out.println("Q2: Doc=" + doc + " score=" + score);
            float score = scorer.score();
            assertEquals(1.0f + doc, score, 0.00001f);
            super.collect(doc);
          }
        });
    // System.out.println(CountingHitCollector.getCount());

    searcher.search(
        q3,
        new CountingHitCollector() {
          private Scorer scorer;

          @Override
          public final void setScorer(Scorer scorer) {
            this.scorer = scorer;
          }

          @Override
          public final void collect(int doc) throws IOException {
            // System.out.println("Q1: Doc=" + doc + " score=" + score);
            float score = scorer.score();
            assertTrue(score == 1.0f);
            assertFalse(doc % 2 == 0);
            super.collect(doc);
          }
        });
    // System.out.println(CountingHitCollector.getCount());

    searcher.search(
        q4,
        new CountingHitCollector() {
          private Scorer scorer;

          @Override
          public final void setScorer(Scorer scorer) {
            this.scorer = scorer;
          }

          @Override
          public final void collect(int doc) throws IOException {
            float score = scorer.score();
            // System.out.println("Q1: Doc=" + doc + " score=" + score);
            assertTrue(score == 1.0f);
            assertTrue(doc % 2 == 0);
            super.collect(doc);
          }
        });
    // System.out.println(CountingHitCollector.getCount());

    BooleanQuery bq = new BooleanQuery();
    bq.add(q1, Occur.MUST);
    bq.add(q4, Occur.MUST);

    searcher.search(
        bq,
        new CountingHitCollector() {
          @Override
          public final void collect(int doc) throws IOException {
            // System.out.println("BQ: Doc=" + doc + " score=" + score);
            super.collect(doc);
          }
        });
    assertEquals(15, CountingHitCollector.getCount());

    reader.close();
    dir.close();
  }