@Override public void setUp() throws Exception { super.setUp(); numIterations = atLeast(50); dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter( random(), dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)) .setMaxBufferedDocs(_TestUtil.nextInt(random(), 50, 1000))); Document doc = new Document(); Field field = newStringField("field", "", Field.Store.YES); doc.add(field); terms = new TreeSet<BytesRef>(); int num = atLeast(200); for (int i = 0; i < num; i++) { String s = _TestUtil.randomUnicodeString(random()); field.setStringValue(s); terms.add(new BytesRef(s)); writer.addDocument(doc); } termsAutomaton = BasicAutomata.makeStringUnion(terms); reader = writer.getReader(); searcher = newSearcher(reader); writer.close(); }
/** tests intersect: TODO start at a random term! */ public void testIntersect() throws Exception { for (int i = 0; i < numIterations; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton(); CompiledAutomaton ca = new CompiledAutomaton(automaton, SpecialOperations.isFinite(automaton), false); TermsEnum te = MultiFields.getTerms(reader, "field").intersect(ca, null); Automaton expected = BasicOperations.intersection(termsAutomaton, automaton); TreeSet<BytesRef> found = new TreeSet<BytesRef>(); while (te.next() != null) { found.add(BytesRef.deepCopyOf(te.term())); } Automaton actual = BasicAutomata.makeStringUnion(found); assertTrue(BasicOperations.sameLanguage(expected, actual)); } }
/** tests a pre-intersected automaton against the original */ public void testFiniteVersusInfinite() throws Exception { for (int i = 0; i < numIterations; i++) { String reg = AutomatonTestUtil.randomRegexp(random()); Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton(); final List<BytesRef> matchedTerms = new ArrayList<BytesRef>(); for (BytesRef t : terms) { if (BasicOperations.run(automaton, t.utf8ToString())) { matchedTerms.add(t); } } Automaton alternate = BasicAutomata.makeStringUnion(matchedTerms); // System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + " // states, sigma=" + alternate.getStartPoints().length); // AutomatonTestUtil.minimizeSimple(alternate); // System.out.println("minmize done"); AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton); AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate); CheckHits.checkEqual( a1, searcher.search(a1, 25).scoreDocs, searcher.search(a2, 25).scoreDocs); } }