예제 #1
0
 private static void planFuzzyQuery(final StringBuilder builder, final FuzzyQuery query) {
   builder.append(query.getTerm());
   builder.append(",prefixLength=");
   builder.append(query.getPrefixLength());
   builder.append(",minSimilarity=");
   builder.append(query.getMinSimilarity());
 }
예제 #2
0
  private final void populateTerm(
      final HQuery hQuery, final boolean isShould, final boolean isMust, final Query subQueryL)
      throws FederatedSearchException {

    HTerm hTerm = new HTerm();
    hTerm.isShould = isShould;
    hTerm.isMust = isMust;
    hTerm.boost = subQueryL.getBoost();
    hQuery.terms.add(hTerm);

    if (subQueryL instanceof TermQuery) {
      TermQuery lTerm = (TermQuery) subQueryL;
      hTerm.type = lTerm.getTerm().field();
      hTerm.text = lTerm.getTerm().text();

    } else if (subQueryL instanceof FuzzyQuery) {
      FuzzyQuery lTerm = (FuzzyQuery) subQueryL;
      hTerm.isFuzzy = true;
      hTerm.type = lTerm.getTerm().field();
      hTerm.text = lTerm.getTerm().text();

    } else if (subQueryL instanceof TermRangeQuery) {
      TermRangeQuery lTerm = (TermRangeQuery) subQueryL;
      hTerm.isFuzzy = false;
      hTerm.type = lTerm.getField();
      hTerm.minRange = lTerm.getLowerTerm();
      hTerm.maxRange = lTerm.getUpperTerm();

    } else {
      throw new FederatedSearchException(
          "HQueryParser: Not Implemented Query :" + subQueryL.getClass().toString());
    }
  }
  public void assertFromTestData(int codePointTable[]) throws Exception {
    InputStream stream = getClass().getResourceAsStream("fuzzyTestData.txt");
    BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));

    int bits = Integer.parseInt(reader.readLine());
    int terms = (int) Math.pow(2, bits);

    Directory dir = newDirectory();
    RandomIndexWriter writer =
        new RandomIndexWriter(
            random,
            dir,
            newIndexWriterConfig(
                    TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.KEYWORD, false))
                .setMergePolicy(newLogMergePolicy()));

    Document doc = new Document();
    Field field = newField("field", "", Field.Store.NO, Field.Index.ANALYZED);
    doc.add(field);

    for (int i = 0; i < terms; i++) {
      field.setValue(mapInt(codePointTable, i));
      writer.addDocument(doc);
    }

    IndexReader r = writer.getReader();
    IndexSearcher searcher = newSearcher(r);
    writer.close();
    String line;
    while ((line = reader.readLine()) != null) {
      String params[] = line.split(",");
      String query = mapInt(codePointTable, Integer.parseInt(params[0]));
      int prefix = Integer.parseInt(params[1]);
      int pqSize = Integer.parseInt(params[2]);
      float minScore = Float.parseFloat(params[3]);
      FuzzyQuery q = new FuzzyQuery(new Term("field", query), minScore, prefix);
      q.setRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqSize));
      int expectedResults = Integer.parseInt(reader.readLine());
      TopDocs docs = searcher.search(q, expectedResults);
      assertEquals(expectedResults, docs.totalHits);
      for (int i = 0; i < expectedResults; i++) {
        String scoreDoc[] = reader.readLine().split(",");
        assertEquals(Integer.parseInt(scoreDoc[0]), docs.scoreDocs[i].doc);
        assertEquals(Float.parseFloat(scoreDoc[1]), docs.scoreDocs[i].score, epsilon);
      }
    }
    searcher.close();
    r.close();
    dir.close();
  }
예제 #4
0
  private static JsonObject convertFuzzyQuery(FuzzyQuery query) {
    String field = query.getTerm().field();

    JsonObject fuzzyQuery =
        JsonBuilder.object()
            .add(
                "fuzzy",
                JsonBuilder.object()
                    .add(
                        field,
                        JsonBuilder.object()
                            .addProperty("value", query.getTerm().text())
                            .addProperty("fuzziness", query.getMaxEdits())
                            .addProperty("prefix_length", query.getPrefixLength())
                            .addProperty("boost", query.getBoost())))
            .build();

    return wrapQueryForNestedIfRequired(field, fuzzyQuery);
  }
  public void testWildcard() throws Exception {
    assertQueryEquals("term*", null, "term*");
    assertQueryEquals("term*^2", null, "term*^2.0");
    assertQueryEquals("term~", null, "term~0.5");
    assertQueryEquals("term~0.7", null, "term~0.7");

    assertQueryEquals("term~^2", null, "term~0.5^2.0");

    assertQueryEquals("term^2~", null, "term~0.5^2.0");
    assertQueryEquals("term*germ", null, "term*germ");
    assertQueryEquals("term*germ^3", null, "term*germ^3.0");

    assertTrue(getQuery("term*", null) instanceof PrefixQuery);
    assertTrue(getQuery("term*^2", null) instanceof PrefixQuery);
    assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
    assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
    FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null);
    assertEquals(0.7f, fq.getMinSimilarity(), 0.1f);
    assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
    fq = (FuzzyQuery) getQuery("term~", null);
    assertEquals(0.5f, fq.getMinSimilarity(), 0.1f);
    assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());

    assertQueryNodeException("term~1.1"); // value > 1, throws exception

    assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);

    /*
     * Tests to see that wild card terms are (or are not) properly lower-cased
     * with propery parser configuration
     */
    // First prefix queries:
    // by default, convert to lowercase:
    assertWildcardQueryEquals("Term*", true, "term*");
    // explicitly set lowercase:
    assertWildcardQueryEquals("term*", true, "term*");
    assertWildcardQueryEquals("Term*", true, "term*");
    assertWildcardQueryEquals("TERM*", true, "term*");
    // explicitly disable lowercase conversion:
    assertWildcardQueryEquals("term*", false, "term*");
    assertWildcardQueryEquals("Term*", false, "Term*");
    assertWildcardQueryEquals("TERM*", false, "TERM*");
    // Then 'full' wildcard queries:
    // by default, convert to lowercase:
    assertWildcardQueryEquals("Te?m", "te?m");
    // explicitly set lowercase:
    assertWildcardQueryEquals("te?m", true, "te?m");
    assertWildcardQueryEquals("Te?m", true, "te?m");
    assertWildcardQueryEquals("TE?M", true, "te?m");
    assertWildcardQueryEquals("Te?m*gerM", true, "te?m*germ");
    // explicitly disable lowercase conversion:
    assertWildcardQueryEquals("te?m", false, "te?m");
    assertWildcardQueryEquals("Te?m", false, "Te?m");
    assertWildcardQueryEquals("TE?M", false, "TE?M");
    assertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM");
    // Fuzzy queries:
    assertWildcardQueryEquals("Term~", "term~0.5");
    assertWildcardQueryEquals("Term~", true, "term~0.5");
    assertWildcardQueryEquals("Term~", false, "Term~0.5");
    // Range queries:

    // TODO: implement this on QueryParser
    // Q0002E_INVALID_SYNTAX_CANNOT_PARSE: Syntax Error, cannot parse '[A TO
    // C]': Lexical error at line 1, column 1. Encountered: "[" (91), after
    // : ""
    assertWildcardQueryEquals("[A TO C]", "[a TO c]");
    assertWildcardQueryEquals("[A TO C]", true, "[a TO c]");
    assertWildcardQueryEquals("[A TO C]", false, "[A TO C]");
    // Test suffix queries: first disallow
    try {
      assertWildcardQueryEquals("*Term", true, "*term");
      fail();
    } catch (QueryNodeException pe) {
      // expected exception
    }
    try {
      assertWildcardQueryEquals("?Term", true, "?term");
      fail();
    } catch (QueryNodeException pe) {
      // expected exception
    }
    // Test suffix queries: then allow
    assertWildcardQueryEquals("*Term", true, "*term", true);
    assertWildcardQueryEquals("?Term", true, "?term", true);
  }
  /**
   * Extracts all MultiTermQueries for {@code field}, and returns equivalent automata that will
   * match terms.
   */
  static CharacterRunAutomaton[] extractAutomata(Query query, String field) {
    List<CharacterRunAutomaton> list = new ArrayList<>();
    if (query instanceof BooleanQuery) {
      for (BooleanClause clause : (BooleanQuery) query) {
        if (!clause.isProhibited()) {
          list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field)));
        }
      }
    } else if (query instanceof ConstantScoreQuery) {
      list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field)));
    } else if (query instanceof DisjunctionMaxQuery) {
      for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
        list.addAll(Arrays.asList(extractAutomata(sub, field)));
      }
    } else if (query instanceof SpanOrQuery) {
      for (Query sub : ((SpanOrQuery) query).getClauses()) {
        list.addAll(Arrays.asList(extractAutomata(sub, field)));
      }
    } else if (query instanceof SpanNearQuery) {
      for (Query sub : ((SpanNearQuery) query).getClauses()) {
        list.addAll(Arrays.asList(extractAutomata(sub, field)));
      }
    } else if (query instanceof SpanNotQuery) {
      list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field)));
    } else if (query instanceof SpanPositionCheckQuery) {
      list.addAll(
          Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field)));
    } else if (query instanceof SpanMultiTermQueryWrapper) {
      list.addAll(
          Arrays.asList(
              extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field)));
    } else if (query instanceof AutomatonQuery) {
      final AutomatonQuery aq = (AutomatonQuery) query;
      if (aq.getField().equals(field)) {
        list.add(
            new CharacterRunAutomaton(aq.getAutomaton()) {
              @Override
              public String toString() {
                return aq.toString();
              }
            });
      }
    } else if (query instanceof PrefixQuery) {
      final PrefixQuery pq = (PrefixQuery) query;
      Term prefix = pq.getPrefix();
      if (prefix.field().equals(field)) {
        list.add(
            new CharacterRunAutomaton(
                Operations.concatenate(
                    Automata.makeString(prefix.text()), Automata.makeAnyString())) {
              @Override
              public String toString() {
                return pq.toString();
              }
            });
      }
    } else if (query instanceof FuzzyQuery) {
      final FuzzyQuery fq = (FuzzyQuery) query;
      if (fq.getField().equals(field)) {
        String utf16 = fq.getTerm().text();
        int termText[] = new int[utf16.codePointCount(0, utf16.length())];
        for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
          termText[j++] = cp = utf16.codePointAt(i);
        }
        int termLength = termText.length;
        int prefixLength = Math.min(fq.getPrefixLength(), termLength);
        String suffix =
            UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
        LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
        String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
        Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
        list.add(
            new CharacterRunAutomaton(automaton) {
              @Override
              public String toString() {
                return fq.toString();
              }
            });
      }
    } else if (query instanceof TermRangeQuery) {
      final TermRangeQuery tq = (TermRangeQuery) query;
      if (tq.getField().equals(field)) {
        final CharsRef lowerBound;
        if (tq.getLowerTerm() == null) {
          lowerBound = null;
        } else {
          lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
        }

        final CharsRef upperBound;
        if (tq.getUpperTerm() == null) {
          upperBound = null;
        } else {
          upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
        }

        final boolean includeLower = tq.includesLower();
        final boolean includeUpper = tq.includesUpper();
        final CharsRef scratch = new CharsRef();
        final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();

        // this is *not* an automaton, but it's very simple
        list.add(
            new CharacterRunAutomaton(Automata.makeEmpty()) {
              @Override
              public boolean run(char[] s, int offset, int length) {
                scratch.chars = s;
                scratch.offset = offset;
                scratch.length = length;

                if (lowerBound != null) {
                  int cmp = comparator.compare(scratch, lowerBound);
                  if (cmp < 0 || (!includeLower && cmp == 0)) {
                    return false;
                  }
                }

                if (upperBound != null) {
                  int cmp = comparator.compare(scratch, upperBound);
                  if (cmp > 0 || (!includeUpper && cmp == 0)) {
                    return false;
                  }
                }
                return true;
              }

              @Override
              public String toString() {
                return tq.toString();
              }
            });
      }
    }
    return list.toArray(new CharacterRunAutomaton[list.size()]);
  }
  public void testWildcard() throws Exception {
    assertQueryEquals("term*", null, "term*");
    assertQueryEquals("term*^2", null, "term*^2.0");
    assertQueryEquals("term~", null, "term~0.5");
    assertQueryEquals("term~0.7", null, "term~0.7");
    assertQueryEquals("term~^2", null, "term~0.5^2.0");
    assertQueryEquals("term^2~", null, "term~0.5^2.0");
    assertQueryEquals("term*germ", null, "term*germ");
    assertQueryEquals("term*germ^3", null, "term*germ^3.0");

    assertTrue(getQuery("term*", null) instanceof PrefixQuery);
    assertTrue(getQuery("term*^2", null) instanceof PrefixQuery);
    assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
    assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
    FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null);
    assertEquals(0.7f, fq.getMinSimilarity(), 0.1f);
    assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
    fq = (FuzzyQuery) getQuery("term~", null);
    assertEquals(0.5f, fq.getMinSimilarity(), 0.1f);
    assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());

    assertParseException("term~1.1"); // value > 1, throws exception

    assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);

    /* Tests to see that wild card terms are (or are not) properly
     * lower-cased with propery parser configuration
     */
    // First prefix queries:
    // by default, convert to lowercase:
    assertWildcardQueryEquals("Term*", true, "term*");
    // explicitly set lowercase:
    assertWildcardQueryEquals("term*", true, "term*");
    assertWildcardQueryEquals("Term*", true, "term*");
    assertWildcardQueryEquals("TERM*", true, "term*");
    // explicitly disable lowercase conversion:
    assertWildcardQueryEquals("term*", false, "term*");
    assertWildcardQueryEquals("Term*", false, "Term*");
    assertWildcardQueryEquals("TERM*", false, "TERM*");
    // Then 'full' wildcard queries:
    // by default, convert to lowercase:
    assertWildcardQueryEquals("Te?m", "te?m");
    // explicitly set lowercase:
    assertWildcardQueryEquals("te?m", true, "te?m");
    assertWildcardQueryEquals("Te?m", true, "te?m");
    assertWildcardQueryEquals("TE?M", true, "te?m");
    assertWildcardQueryEquals("Te?m*gerM", true, "te?m*germ");
    // explicitly disable lowercase conversion:
    assertWildcardQueryEquals("te?m", false, "te?m");
    assertWildcardQueryEquals("Te?m", false, "Te?m");
    assertWildcardQueryEquals("TE?M", false, "TE?M");
    assertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM");
    //  Fuzzy queries:
    assertWildcardQueryEquals("Term~", "term~0.5");
    assertWildcardQueryEquals("Term~", true, "term~0.5");
    assertWildcardQueryEquals("Term~", false, "Term~0.5");
    //  Range queries:
    assertWildcardQueryEquals("[A TO C]", "[a TO c]");
    assertWildcardQueryEquals("[A TO C]", true, "[a TO c]");
    assertWildcardQueryEquals("[A TO C]", false, "[A TO C]");
    // Test suffix queries: first disallow
    try {
      assertWildcardQueryEquals("*Term", true, "*term");
      fail();
    } catch (ParseException pe) {
      // expected exception
    }
    try {
      assertWildcardQueryEquals("?Term", true, "?term");
      fail();
    } catch (ParseException pe) {
      // expected exception
    }
    // Test suffix queries: then allow
    assertWildcardQueryEquals("*Term", true, "*term", true);
    assertWildcardQueryEquals("?Term", true, "?term", true);
  }