Exemplo n.º 1
0
  public void testOverlappedTokensSausage() throws Exception {

    // Two tokens on top of each other (sausage):
    final TokenStream ts =
        new CannedTokenStream(new Token[] {token("abc", 1, 1), token("xyz", 0, 1)});
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicAutomata.makeString("abc");
    final Automaton a2 = BasicAutomata.makeString("xyz");
    final Automaton expected = BasicOperations.union(a1, a2);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
Exemplo n.º 2
0
 public void testSynHangingOverEnd() throws Exception {
   final TokenStream ts =
       new CannedTokenStream(
           new Token[] {
             token("a", 1, 1), token("X", 0, 10),
           });
   final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
   final Automaton expected =
       BasicOperations.union(BasicAutomata.makeString("a"), BasicAutomata.makeString("X"));
   assertTrue(BasicOperations.sameLanguage(expected, actual));
 }
Exemplo n.º 3
0
 /** Test a configuration that behaves a lot like KeepWordFilter */
 public void testKeep() throws Exception {
   CharacterRunAutomaton keepWords =
       new CharacterRunAutomaton(
           BasicOperations.complement(
               Automaton.union(
                   Arrays.asList(
                       BasicAutomata.makeString("foo"), BasicAutomata.makeString("bar")))));
   Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true, keepWords, true);
   assertAnalyzesTo(
       a,
       "quick foo brown bar bar fox foo",
       new String[] {"foo", "bar", "bar", "foo"},
       new int[] {2, 2, 1, 2});
 }
Exemplo n.º 4
0
 private Automaton join(String... strings) {
   List<Automaton> as = new ArrayList<Automaton>();
   for (String s : strings) {
     as.add(BasicAutomata.makeString(s));
     as.add(SEP_A);
   }
   as.remove(as.size() - 1);
   return BasicOperations.concatenate(as);
 }
Exemplo n.º 5
0
  public void testSingleToken() throws Exception {

    final TokenStream ts =
        new CannedTokenStream(
            new Token[] {
              token("abc", 1, 1),
            });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton expected = BasicAutomata.makeString("abc");
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
Exemplo n.º 6
0
  public void testSynOverHole2() throws Exception {

    final TokenStream ts =
        new CannedTokenStream(
            new Token[] {
              token("xyz", 1, 1), token("abc", 0, 3), token("def", 2, 1),
            });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton expected =
        BasicOperations.union(
            join(s2a("xyz"), SEP_A, HOLE_A, SEP_A, s2a("def")), BasicAutomata.makeString("abc"));
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
Exemplo n.º 7
0
  public void testOverlappedTokensLattice2() throws Exception {

    final TokenStream ts =
        new CannedTokenStream(
            new Token[] {
              token("abc", 1, 1), token("xyz", 0, 3), token("def", 1, 1), token("ghi", 1, 1),
            });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicAutomata.makeString("xyz");
    final Automaton a2 = join("abc", "def", "ghi");
    final Automaton expected = BasicOperations.union(a1, a2);
    // toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
Exemplo n.º 8
0
  public void testSynOverHole() throws Exception {

    final TokenStream ts =
        new CannedTokenStream(
            new Token[] {
              token("a", 1, 1), token("X", 0, 2), token("b", 2, 1),
            });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 =
        BasicOperations.union(join(s2a("a"), SEP_A, HOLE_A), BasicAutomata.makeString("X"));
    final Automaton expected = BasicOperations.concatenate(a1, join(SEP_A, s2a("b")));
    // toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
Exemplo n.º 9
0
 private Automaton s2a(String s) {
   return BasicAutomata.makeString(s);
 }
  /**
   * Extracts all MultiTermQueries for {@code field}, and returns equivalent automata that will
   * match terms.
   */
  static CharacterRunAutomaton[] extractAutomata(Query query, String field) {
    List<CharacterRunAutomaton> list = new ArrayList<>();
    if (query instanceof BooleanQuery) {
      BooleanClause clauses[] = ((BooleanQuery) query).getClauses();
      for (BooleanClause clause : clauses) {
        if (!clause.isProhibited()) {
          list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field)));
        }
      }
    } else if (query instanceof DisjunctionMaxQuery) {
      for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
        list.addAll(Arrays.asList(extractAutomata(sub, field)));
      }
    } else if (query instanceof SpanOrQuery) {
      for (Query sub : ((SpanOrQuery) query).getClauses()) {
        list.addAll(Arrays.asList(extractAutomata(sub, field)));
      }
    } else if (query instanceof SpanNearQuery) {
      for (Query sub : ((SpanNearQuery) query).getClauses()) {
        list.addAll(Arrays.asList(extractAutomata(sub, field)));
      }
    } else if (query instanceof SpanNotQuery) {
      list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field)));
    } else if (query instanceof SpanPositionCheckQuery) {
      list.addAll(
          Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field)));
    } else if (query instanceof SpanMultiTermQueryWrapper) {
      list.addAll(
          Arrays.asList(
              extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field)));
    } else if (query instanceof AutomatonQuery) {
      final AutomatonQuery aq = (AutomatonQuery) query;
      if (aq.getField().equals(field)) {
        list.add(
            new CharacterRunAutomaton(aq.getAutomaton()) {
              @Override
              public String toString() {
                return aq.toString();
              }
            });
      }
    } else if (query instanceof PrefixQuery) {
      final PrefixQuery pq = (PrefixQuery) query;
      Term prefix = pq.getPrefix();
      if (prefix.field().equals(field)) {
        list.add(
            new CharacterRunAutomaton(
                BasicOperations.concatenate(
                    BasicAutomata.makeString(prefix.text()), BasicAutomata.makeAnyString())) {
              @Override
              public String toString() {
                return pq.toString();
              }
            });
      }
    } else if (query instanceof FuzzyQuery) {
      final FuzzyQuery fq = (FuzzyQuery) query;
      if (fq.getField().equals(field)) {
        String utf16 = fq.getTerm().text();
        int termText[] = new int[utf16.codePointCount(0, utf16.length())];
        for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
          termText[j++] = cp = utf16.codePointAt(i);
        }
        int termLength = termText.length;
        int prefixLength = Math.min(fq.getPrefixLength(), termLength);
        String suffix =
            UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
        LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
        Automaton automaton = builder.toAutomaton(fq.getMaxEdits());
        if (prefixLength > 0) {
          Automaton prefix =
              BasicAutomata.makeString(UnicodeUtil.newString(termText, 0, prefixLength));
          automaton = BasicOperations.concatenate(prefix, automaton);
        }
        list.add(
            new CharacterRunAutomaton(automaton) {
              @Override
              public String toString() {
                return fq.toString();
              }
            });
      }
    } else if (query instanceof TermRangeQuery) {
      final TermRangeQuery tq = (TermRangeQuery) query;
      if (tq.getField().equals(field)) {
        final CharsRef lowerBound;
        if (tq.getLowerTerm() == null) {
          lowerBound = null;
        } else {
          lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
        }

        final CharsRef upperBound;
        if (tq.getUpperTerm() == null) {
          upperBound = null;
        } else {
          upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
        }

        final boolean includeLower = tq.includesLower();
        final boolean includeUpper = tq.includesUpper();
        final CharsRef scratch = new CharsRef();
        final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();

        // this is *not* an automaton, but its very simple
        list.add(
            new CharacterRunAutomaton(BasicAutomata.makeEmpty()) {
              @Override
              public boolean run(char[] s, int offset, int length) {
                scratch.chars = s;
                scratch.offset = offset;
                scratch.length = length;

                if (lowerBound != null) {
                  int cmp = comparator.compare(scratch, lowerBound);
                  if (cmp < 0 || (!includeLower && cmp == 0)) {
                    return false;
                  }
                }

                if (upperBound != null) {
                  int cmp = comparator.compare(scratch, upperBound);
                  if (cmp > 0 || (!includeUpper && cmp == 0)) {
                    return false;
                  }
                }
                return true;
              }

              @Override
              public String toString() {
                return tq.toString();
              }
            });
      }
    }
    return list.toArray(new CharacterRunAutomaton[list.size()]);
  }