public void testP6() throws Exception { PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.setSlop(2); phraseQuery.add(new Term(FIELD, "w3")); phraseQuery.add(new Term(FIELD, "w2")); qtest(phraseQuery, new int[] {0, 1, 3}); }
protected Query eq(String field, String[] terms, boolean ignoreCase) { if (terms.length > 1) { PhraseQuery pq = new PhraseQuery(); for (String s : terms) { pq.add(new Term(field, s)); } return pq; } return new TermQuery(new Term(field, terms[0])); }
@Override public Query construct(IndexEnvironment env, Map<String, String[]> querySource) throws ParseException { if ("1".equals(StringTools.arrayToString(querySource.get("queryversion"), ""))) { // preserving old stuff: // 1. all lucene special chars to be quoted // 2. if "wholewords" is "on" or "true" -> don't add *_*, otherwise add *_* BooleanQuery result = new BooleanQuery(); String wholeWords = StringTools.arrayToString(querySource.get("wholewords"), ""); boolean useWildcards = !(null != wholeWords && StringTools.stringToBoolean(wholeWords)); for (Map.Entry<String, String[]> queryItem : querySource.entrySet()) { String field = queryItem.getKey(); if (env.fields.containsKey(field) && queryItem.getValue().length > 0) { for (String value : queryItem.getValue()) { if (null != value) { value = value.trim().toLowerCase(); if (0 != value.length()) { if ("keywords".equals(field) && ACCESSION_REGEX.test(value)) { result.add(new TermQuery(new Term("accession", value)), BooleanClause.Occur.MUST); } else if ("keywords".equals(field) && '"' == value.charAt(0) && '"' == value.charAt(value.length() - 1)) { value = value.substring(1, value.length() - 1); PhraseQuery q = new PhraseQuery(); String[] tokens = value.split("\\s+"); for (String token : tokens) { q.add(new Term(field, token)); } result.add(q, BooleanClause.Occur.MUST); } else { String[] tokens = value.split("\\s+"); for (String token : tokens) { // we use wildcards for keywords depending on "wholewords" switch, // *ALWAYS* for other fields, *NEVER* for user id and accession or boolean // fields Query q = !"boolean".equals(env.fields.get(field).type) && !" userid accession ".contains(" " + field + " ") && (useWildcards || (!" keywords ".contains(" " + field + " "))) ? new WildcardQuery(new Term(field, "*" + token + "*")) : new TermQuery(new Term(field, token)); result.add(q, BooleanClause.Occur.MUST); } } } } } } } return result; } else { return super.construct(env, querySource); } }
public void testMultiFieldBQofPQ2() throws Exception { BooleanQuery query = new BooleanQuery(); PhraseQuery leftChild = new PhraseQuery(); leftChild.add(new Term(FIELD, "w1")); leftChild.add(new Term(FIELD, "w3")); query.add(leftChild, BooleanClause.Occur.SHOULD); PhraseQuery rightChild = new PhraseQuery(); rightChild.add(new Term(ALTFIELD, "w1")); rightChild.add(new Term(ALTFIELD, "w3")); query.add(rightChild, BooleanClause.Occur.SHOULD); qtest(query, new int[] {1, 3}); }
public void testToQueryPhraseQueryBoostAndSlop() throws IOException { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); QueryStringQueryBuilder queryStringQueryBuilder = new QueryStringQueryBuilder("\"test phrase\"~2").field(STRING_FIELD_NAME, 5f); Query query = queryStringQueryBuilder.toQuery(createShardContext()); assertThat(query, instanceOf(DisjunctionMaxQuery.class)); DisjunctionMaxQuery disjunctionMaxQuery = (DisjunctionMaxQuery) query; assertThat(disjunctionMaxQuery.getDisjuncts().size(), equalTo(1)); assertThat(disjunctionMaxQuery.getDisjuncts().get(0), instanceOf(BoostQuery.class)); BoostQuery boostQuery = (BoostQuery) disjunctionMaxQuery.getDisjuncts().get(0); assertThat(boostQuery.getBoost(), equalTo(5f)); assertThat(boostQuery.getQuery(), instanceOf(PhraseQuery.class)); PhraseQuery phraseQuery = (PhraseQuery) boostQuery.getQuery(); assertThat(phraseQuery.getSlop(), Matchers.equalTo(2)); assertThat(phraseQuery.getTerms().length, equalTo(2)); }
public void testToQueryPhraseQuery() throws IOException { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); Query query = queryStringQuery("\"term1 term2\"") .defaultField(STRING_FIELD_NAME) .phraseSlop(3) .toQuery(createShardContext()); assertThat(query, instanceOf(DisjunctionMaxQuery.class)); DisjunctionMaxQuery disjunctionMaxQuery = (DisjunctionMaxQuery) query; assertThat(disjunctionMaxQuery.getDisjuncts().size(), equalTo(1)); assertThat(disjunctionMaxQuery.getDisjuncts().get(0), instanceOf(PhraseQuery.class)); PhraseQuery phraseQuery = (PhraseQuery) disjunctionMaxQuery.getDisjuncts().get(0); assertThat(phraseQuery.getTerms().length, equalTo(2)); assertThat(phraseQuery.getTerms()[0], equalTo(new Term(STRING_FIELD_NAME, "term1"))); assertThat(phraseQuery.getTerms()[1], equalTo(new Term(STRING_FIELD_NAME, "term2"))); assertThat(phraseQuery.getSlop(), equalTo(3)); }
public int doSloppyPhrase(IndexSearcher s, int termsInIndex, int maxClauses, int iter) throws IOException { int ret = 0; for (int i = 0; i < iter; i++) { int nClauses = r.nextInt(maxClauses - 1) + 2; // min 2 clauses PhraseQuery q = new PhraseQuery(); for (int j = 0; j < nClauses; j++) { int tnum = r.nextInt(termsInIndex); q.add(new Term("f", Character.toString((char) (tnum + 'A'))), j); } q.setSlop(termsInIndex); // this could be random too CountingHitCollector hc = new CountingHitCollector(); s.search(q, hc); ret += hc.getSum(); } return ret; }
public void testSimilarity() throws Exception { RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.setSimilarity(new SimpleSimilarity()); Document d1 = new Document(); d1.add(new Field("field", "a c", Field.Store.YES, Field.Index.ANALYZED)); Document d2 = new Document(); d2.add(new Field("field", "a b c", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(d1); writer.addDocument(d2); writer.optimize(); writer.close(); Searcher searcher = new IndexSearcher(store); searcher.setSimilarity(new SimpleSimilarity()); Term a = new Term("field", "a"); Term b = new Term("field", "b"); Term c = new Term("field", "c"); searcher.search( new TermQuery(b), new Collector() { private Scorer scorer; public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } public final void collect(int doc) throws IOException { assertTrue(scorer.score() == 1.0f); } public void setNextReader(IndexReader reader, int docBase) {} public boolean acceptsDocsOutOfOrder() { return true; } }); BooleanQuery bq = new BooleanQuery(); bq.add(new TermQuery(a), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(b), BooleanClause.Occur.SHOULD); // System.out.println(bq.toString("field")); searcher.search( bq, new Collector() { private int base = 0; private Scorer scorer; public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } public final void collect(int doc) throws IOException { // System.out.println("Doc=" + doc + " score=" + score); assertTrue(scorer.score() == (float) doc + base + 1); } public void setNextReader(IndexReader reader, int docBase) { base = docBase; } public boolean acceptsDocsOutOfOrder() { return true; } }); PhraseQuery pq = new PhraseQuery(); pq.add(a); pq.add(c); // System.out.println(pq.toString("field")); searcher.search( pq, new Collector() { private Scorer scorer; public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } public final void collect(int doc) throws IOException { // System.out.println("Doc=" + doc + " score=" + score); assertTrue(scorer.score() == 1.0f); } public void setNextReader(IndexReader reader, int docBase) {} public boolean acceptsDocsOutOfOrder() { return true; } }); pq.setSlop(2); // System.out.println(pq.toString("field")); searcher.search( pq, new Collector() { private Scorer scorer; public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } public final void collect(int doc) throws IOException { // System.out.println("Doc=" + doc + " score=" + score); assertTrue(scorer.score() == 2.0f); } public void setNextReader(IndexReader reader, int docBase) {} public boolean acceptsDocsOutOfOrder() { return true; } }); }
// Test scores with one field with Term Freqs and one without, otherwise with equal content public void testBasic() throws Exception { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setMaxBufferedDocs(2) .setSimilarity(new SimpleSimilarity()) .setMergePolicy(newLogMergePolicy(2))); StringBuilder sb = new StringBuilder(265); String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.append(term).append(" "); String content = sb.toString(); Field noTf = newField("noTf", content + (i % 2 == 0 ? "" : " notf"), omitType); d.add(noTf); Field tf = newField("tf", content + (i % 2 == 0 ? " tf" : ""), normalType); d.add(tf); writer.addDocument(d); // System.out.println(d); } writer.forceMerge(1); // flush writer.close(); /* * Verify the index */ IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); searcher.setSimilarity(new SimpleSimilarity()); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d); PhraseQuery pq = new PhraseQuery(); pq.add(a); pq.add(c); try { searcher.search(pq, 10); fail("did not hit expected exception"); } catch (Exception e) { Throwable cause = e; // If the searcher uses an executor service, the IAE is wrapped into other exceptions while (cause.getCause() != null) { cause = cause.getCause(); } assertTrue("Expected an IAE, got " + cause, cause instanceof IllegalStateException); } searcher.search( q1, new CountingHitCollector() { private Scorer scorer; @Override public final void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public final void collect(int doc) throws IOException { // System.out.println("Q1: Doc=" + doc + " score=" + score); float score = scorer.score(); assertTrue("got score=" + score, score == 1.0f); super.collect(doc); } }); // System.out.println(CountingHitCollector.getCount()); searcher.search( q2, new CountingHitCollector() { private Scorer scorer; @Override public final void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public final void collect(int doc) throws IOException { // System.out.println("Q2: Doc=" + doc + " score=" + score); float score = scorer.score(); assertEquals(1.0f + doc, score, 0.00001f); super.collect(doc); } }); // System.out.println(CountingHitCollector.getCount()); searcher.search( q3, new CountingHitCollector() { private Scorer scorer; @Override public final void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public final void collect(int doc) throws IOException { // System.out.println("Q1: Doc=" + doc + " score=" + score); float score = scorer.score(); assertTrue(score == 1.0f); assertFalse(doc % 2 == 0); super.collect(doc); } }); // System.out.println(CountingHitCollector.getCount()); searcher.search( q4, new CountingHitCollector() { private Scorer scorer; @Override public final void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public final void collect(int doc) throws IOException { float score = scorer.score(); // System.out.println("Q1: Doc=" + doc + " score=" + score); assertTrue(score == 1.0f); assertTrue(doc % 2 == 0); super.collect(doc); } }); // System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.add(q1, Occur.MUST); bq.add(q4, Occur.MUST); searcher.search( bq, new CountingHitCollector() { @Override public final void collect(int doc) throws IOException { // System.out.println("BQ: Doc=" + doc + " score=" + score); super.collect(doc); } }); assertEquals(15, CountingHitCollector.getCount()); reader.close(); dir.close(); }
public void testP2() throws Exception { PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.add(new Term(FIELD, "w1")); phraseQuery.add(new Term(FIELD, "w3")); qtest(phraseQuery, new int[] {1, 3}); }