예제 #1
0
 private Automaton join(String... strings) {
   List<Automaton> as = new ArrayList<Automaton>();
   for (String s : strings) {
     as.add(BasicAutomata.makeString(s));
     as.add(SEP_A);
   }
   as.remove(as.size() - 1);
   return BasicOperations.concatenate(as);
 }
예제 #2
0
  public void testSynOverHole() throws Exception {

    final TokenStream ts =
        new CannedTokenStream(
            new Token[] {
              token("a", 1, 1), token("X", 0, 2), token("b", 2, 1),
            });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 =
        BasicOperations.union(join(s2a("a"), SEP_A, HOLE_A), BasicAutomata.makeString("X"));
    final Automaton expected = BasicOperations.concatenate(a1, join(SEP_A, s2a("b")));
    // toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
예제 #3
0
    /**
     * Create a automaton for a given context query this automaton will be used to find the matching
     * paths with the fst
     *
     * @param preserveSep set an additional char (<code>XAnalyzingSuggester.SEP_LABEL</code>)
     *     between each context query
     * @param queries list of {@link ContextQuery} defining the lookup context
     * @return Automaton matching the given Query
     */
    public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) {
      Automaton a = BasicAutomata.makeEmptyString();

      Automaton gap = BasicAutomata.makeChar(ContextMapping.SEPARATOR);
      if (preserveSep) {
        // if separators are preserved the fst contains a SEP_LABEL
        // behind each gap. To have a matching automaton, we need to
        // include the SEP_LABEL in the query as well
        gap =
            BasicOperations.concatenate(gap, BasicAutomata.makeChar(XAnalyzingSuggester.SEP_LABEL));
      }

      for (ContextQuery query : queries) {
        a = Automaton.concatenate(Arrays.asList(query.toAutomaton(), gap, a));
      }
      BasicOperations.determinize(a);
      return a;
    }
  /** return a random NFA/DFA for testing */
  public static Automaton randomAutomaton(Random random) {
    // get two random Automata from regexps
    Automaton a1 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton();
    if (random.nextBoolean()) a1 = BasicOperations.complement(a1);

    Automaton a2 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton();
    if (random.nextBoolean()) a2 = BasicOperations.complement(a2);

    // combine them in random ways
    switch (random.nextInt(4)) {
      case 0:
        return BasicOperations.concatenate(a1, a2);
      case 1:
        return BasicOperations.union(a1, a2);
      case 2:
        return BasicOperations.intersection(a1, a2);
      default:
        return BasicOperations.minus(a1, a2);
    }
  }
예제 #5
0
 private Automaton join(Automaton... as) {
   return BasicOperations.concatenate(Arrays.asList(as));
 }
  /**
   * Extracts all MultiTermQueries for {@code field}, and returns equivalent automata that will
   * match terms.
   */
  static CharacterRunAutomaton[] extractAutomata(Query query, String field) {
    List<CharacterRunAutomaton> list = new ArrayList<>();
    if (query instanceof BooleanQuery) {
      BooleanClause clauses[] = ((BooleanQuery) query).getClauses();
      for (BooleanClause clause : clauses) {
        if (!clause.isProhibited()) {
          list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field)));
        }
      }
    } else if (query instanceof DisjunctionMaxQuery) {
      for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
        list.addAll(Arrays.asList(extractAutomata(sub, field)));
      }
    } else if (query instanceof SpanOrQuery) {
      for (Query sub : ((SpanOrQuery) query).getClauses()) {
        list.addAll(Arrays.asList(extractAutomata(sub, field)));
      }
    } else if (query instanceof SpanNearQuery) {
      for (Query sub : ((SpanNearQuery) query).getClauses()) {
        list.addAll(Arrays.asList(extractAutomata(sub, field)));
      }
    } else if (query instanceof SpanNotQuery) {
      list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field)));
    } else if (query instanceof SpanPositionCheckQuery) {
      list.addAll(
          Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field)));
    } else if (query instanceof SpanMultiTermQueryWrapper) {
      list.addAll(
          Arrays.asList(
              extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field)));
    } else if (query instanceof AutomatonQuery) {
      final AutomatonQuery aq = (AutomatonQuery) query;
      if (aq.getField().equals(field)) {
        list.add(
            new CharacterRunAutomaton(aq.getAutomaton()) {
              @Override
              public String toString() {
                return aq.toString();
              }
            });
      }
    } else if (query instanceof PrefixQuery) {
      final PrefixQuery pq = (PrefixQuery) query;
      Term prefix = pq.getPrefix();
      if (prefix.field().equals(field)) {
        list.add(
            new CharacterRunAutomaton(
                BasicOperations.concatenate(
                    BasicAutomata.makeString(prefix.text()), BasicAutomata.makeAnyString())) {
              @Override
              public String toString() {
                return pq.toString();
              }
            });
      }
    } else if (query instanceof FuzzyQuery) {
      final FuzzyQuery fq = (FuzzyQuery) query;
      if (fq.getField().equals(field)) {
        String utf16 = fq.getTerm().text();
        int termText[] = new int[utf16.codePointCount(0, utf16.length())];
        for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
          termText[j++] = cp = utf16.codePointAt(i);
        }
        int termLength = termText.length;
        int prefixLength = Math.min(fq.getPrefixLength(), termLength);
        String suffix =
            UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
        LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
        Automaton automaton = builder.toAutomaton(fq.getMaxEdits());
        if (prefixLength > 0) {
          Automaton prefix =
              BasicAutomata.makeString(UnicodeUtil.newString(termText, 0, prefixLength));
          automaton = BasicOperations.concatenate(prefix, automaton);
        }
        list.add(
            new CharacterRunAutomaton(automaton) {
              @Override
              public String toString() {
                return fq.toString();
              }
            });
      }
    } else if (query instanceof TermRangeQuery) {
      final TermRangeQuery tq = (TermRangeQuery) query;
      if (tq.getField().equals(field)) {
        final CharsRef lowerBound;
        if (tq.getLowerTerm() == null) {
          lowerBound = null;
        } else {
          lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
        }

        final CharsRef upperBound;
        if (tq.getUpperTerm() == null) {
          upperBound = null;
        } else {
          upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
        }

        final boolean includeLower = tq.includesLower();
        final boolean includeUpper = tq.includesUpper();
        final CharsRef scratch = new CharsRef();
        final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();

        // this is *not* an automaton, but its very simple
        list.add(
            new CharacterRunAutomaton(BasicAutomata.makeEmpty()) {
              @Override
              public boolean run(char[] s, int offset, int length) {
                scratch.chars = s;
                scratch.offset = offset;
                scratch.length = length;

                if (lowerBound != null) {
                  int cmp = comparator.compare(scratch, lowerBound);
                  if (cmp < 0 || (!includeLower && cmp == 0)) {
                    return false;
                  }
                }

                if (upperBound != null) {
                  int cmp = comparator.compare(scratch, upperBound);
                  if (cmp > 0 || (!includeUpper && cmp == 0)) {
                    return false;
                  }
                }
                return true;
              }

              @Override
              public String toString() {
                return tq.toString();
              }
            });
      }
    }
    return list.toArray(new CharacterRunAutomaton[list.size()]);
  }