private Automaton join(String... strings) { List<Automaton> as = new ArrayList<Automaton>(); for (String s : strings) { as.add(BasicAutomata.makeString(s)); as.add(SEP_A); } as.remove(as.size() - 1); return BasicOperations.concatenate(as); }
public void testSynOverHole() throws Exception { final TokenStream ts = new CannedTokenStream( new Token[] { token("a", 1, 1), token("X", 0, 2), token("b", 2, 1), }); final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts); final Automaton a1 = BasicOperations.union(join(s2a("a"), SEP_A, HOLE_A), BasicAutomata.makeString("X")); final Automaton expected = BasicOperations.concatenate(a1, join(SEP_A, s2a("b"))); // toDot(actual); assertTrue(BasicOperations.sameLanguage(expected, actual)); }
/** * Create a automaton for a given context query this automaton will be used to find the matching * paths with the fst * * @param preserveSep set an additional char (<code>XAnalyzingSuggester.SEP_LABEL</code>) * between each context query * @param queries list of {@link ContextQuery} defining the lookup context * @return Automaton matching the given Query */ public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) { Automaton a = BasicAutomata.makeEmptyString(); Automaton gap = BasicAutomata.makeChar(ContextMapping.SEPARATOR); if (preserveSep) { // if separators are preserved the fst contains a SEP_LABEL // behind each gap. To have a matching automaton, we need to // include the SEP_LABEL in the query as well gap = BasicOperations.concatenate(gap, BasicAutomata.makeChar(XAnalyzingSuggester.SEP_LABEL)); } for (ContextQuery query : queries) { a = Automaton.concatenate(Arrays.asList(query.toAutomaton(), gap, a)); } BasicOperations.determinize(a); return a; }
/** return a random NFA/DFA for testing */ public static Automaton randomAutomaton(Random random) { // get two random Automata from regexps Automaton a1 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton(); if (random.nextBoolean()) a1 = BasicOperations.complement(a1); Automaton a2 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton(); if (random.nextBoolean()) a2 = BasicOperations.complement(a2); // combine them in random ways switch (random.nextInt(4)) { case 0: return BasicOperations.concatenate(a1, a2); case 1: return BasicOperations.union(a1, a2); case 2: return BasicOperations.intersection(a1, a2); default: return BasicOperations.minus(a1, a2); } }
private Automaton join(Automaton... as) { return BasicOperations.concatenate(Arrays.asList(as)); }
/** * Extracts all MultiTermQueries for {@code field}, and returns equivalent automata that will * match terms. */ static CharacterRunAutomaton[] extractAutomata(Query query, String field) { List<CharacterRunAutomaton> list = new ArrayList<>(); if (query instanceof BooleanQuery) { BooleanClause clauses[] = ((BooleanQuery) query).getClauses(); for (BooleanClause clause : clauses) { if (!clause.isProhibited()) { list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field))); } } } else if (query instanceof DisjunctionMaxQuery) { for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) { list.addAll(Arrays.asList(extractAutomata(sub, field))); } } else if (query instanceof SpanOrQuery) { for (Query sub : ((SpanOrQuery) query).getClauses()) { list.addAll(Arrays.asList(extractAutomata(sub, field))); } } else if (query instanceof SpanNearQuery) { for (Query sub : ((SpanNearQuery) query).getClauses()) { list.addAll(Arrays.asList(extractAutomata(sub, field))); } } else if (query instanceof SpanNotQuery) { list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field))); } else if (query instanceof SpanPositionCheckQuery) { list.addAll( Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field))); } else if (query instanceof SpanMultiTermQueryWrapper) { list.addAll( Arrays.asList( extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field))); } else if (query instanceof AutomatonQuery) { final AutomatonQuery aq = (AutomatonQuery) query; if (aq.getField().equals(field)) { list.add( new CharacterRunAutomaton(aq.getAutomaton()) { @Override public String toString() { return aq.toString(); } }); } } else if (query instanceof PrefixQuery) { final PrefixQuery pq = (PrefixQuery) query; Term prefix = pq.getPrefix(); if (prefix.field().equals(field)) { list.add( new CharacterRunAutomaton( BasicOperations.concatenate( BasicAutomata.makeString(prefix.text()), BasicAutomata.makeAnyString())) { @Override public String toString() { return pq.toString(); } }); } } else if (query instanceof FuzzyQuery) { final FuzzyQuery fq = (FuzzyQuery) query; if (fq.getField().equals(field)) { String utf16 = fq.getTerm().text(); int termText[] = new int[utf16.codePointCount(0, utf16.length())]; for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) { termText[j++] = cp = utf16.codePointAt(i); } int termLength = termText.length; int prefixLength = Math.min(fq.getPrefixLength(), termLength); String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength); LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions()); Automaton automaton = builder.toAutomaton(fq.getMaxEdits()); if (prefixLength > 0) { Automaton prefix = BasicAutomata.makeString(UnicodeUtil.newString(termText, 0, prefixLength)); automaton = BasicOperations.concatenate(prefix, automaton); } list.add( new CharacterRunAutomaton(automaton) { @Override public String toString() { return fq.toString(); } }); } } else if (query instanceof TermRangeQuery) { final TermRangeQuery tq = (TermRangeQuery) query; if (tq.getField().equals(field)) { final CharsRef lowerBound; if (tq.getLowerTerm() == null) { lowerBound = null; } else { lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString()); } final CharsRef upperBound; if (tq.getUpperTerm() == null) { upperBound = null; } else { upperBound = new CharsRef(tq.getUpperTerm().utf8ToString()); } final boolean includeLower = tq.includesLower(); final boolean includeUpper = tq.includesUpper(); final CharsRef scratch = new CharsRef(); final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator(); // this is *not* an automaton, but its very simple list.add( new CharacterRunAutomaton(BasicAutomata.makeEmpty()) { @Override public boolean run(char[] s, int offset, int length) { scratch.chars = s; scratch.offset = offset; scratch.length = length; if (lowerBound != null) { int cmp = comparator.compare(scratch, lowerBound); if (cmp < 0 || (!includeLower && cmp == 0)) { return false; } } if (upperBound != null) { int cmp = comparator.compare(scratch, upperBound); if (cmp > 0 || (!includeUpper && cmp == 0)) { return false; } } return true; } @Override public String toString() { return tq.toString(); } }); } } return list.toArray(new CharacterRunAutomaton[list.size()]); }