/** * This is best effort only: the PhraseQuery may contain multiple terms at the same position * (think synonyms) or gaps (think stopwords) and it's in this case impossible to translate it * into a correct ElasticsearchQuery. */ private static JsonObject convertPhraseQuery(PhraseQuery query) { Term[] terms = query.getTerms(); if (terms.length == 0) { throw LOG.cannotQueryOnEmptyPhraseQuery(); } String field = terms[0].field(); // phrase queries are only supporting one field StringBuilder phrase = new StringBuilder(); for (Term term : terms) { phrase.append(" ").append(term.text()); } JsonObject phraseQuery = JsonBuilder.object() .add( "match_phrase", JsonBuilder.object() .add( field, JsonBuilder.object() .addProperty("query", phrase.toString().trim()) .addProperty("slop", query.getSlop()) .addProperty("boost", query.getBoost()))) .build(); return wrapQueryForNestedIfRequired(field, phraseQuery); }
public void testPositionIncrement() throws Exception { boolean dflt = StopFilter.getEnablePositionIncrementsDefault(); StopFilter.setEnablePositionIncrementsDefault(true); try { QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(new String[] {"the", "in", "are", "this"})); qp.setEnablePositionIncrements(true); String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\""; // 0 2 5 7 8 int expectedPositions[] = {1, 3, 4, 6, 9}; PhraseQuery pq = (PhraseQuery) qp.parse(qtxt); // System.out.println("Query text: "+qtxt); // System.out.println("Result: "+pq); Term t[] = pq.getTerms(); int pos[] = pq.getPositions(); for (int i = 0; i < t.length; i++) { // System.out.println(i+". "+t[i]+" pos: "+pos[i]); assertEquals( "term " + i + " = " + t[i] + " has wrong term-position!", expectedPositions[i], pos[i]); } } finally { StopFilter.setEnablePositionIncrementsDefault(dflt); } }
public Query createPhrasePrefixQuery( String field, String queryText, int phraseSlop, int maxExpansions) { final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop); final MultiPhrasePrefixQuery prefixQuery = new MultiPhrasePrefixQuery(); prefixQuery.setMaxExpansions(maxExpansions); prefixQuery.setSlop(phraseSlop); if (query instanceof PhraseQuery) { PhraseQuery pq = (PhraseQuery) query; Term[] terms = pq.getTerms(); int[] positions = pq.getPositions(); for (int i = 0; i < terms.length; i++) { prefixQuery.add(new Term[] {terms[i]}, positions[i]); } return prefixQuery; } else if (query instanceof MultiPhraseQuery) { MultiPhraseQuery pq = (MultiPhraseQuery) query; Term[][] terms = pq.getTermArrays(); int[] positions = pq.getPositions(); for (int i = 0; i < terms.length; i++) { prefixQuery.add(terms[i], positions[i]); } return prefixQuery; } else if (query instanceof TermQuery) { prefixQuery.add(((TermQuery) query).getTerm()); return prefixQuery; } return query; }
/** * Construct a span phrase query. TODO While we have code set up to check for wildcard characters * in the query, they have already been stripped so this will never happen. This is one thing that * would need to be addressed. * * @param scope * @param q * @return */ private SpanQuery buildPhraseQuery(String scope, Query q) { PhraseQuery pq = (PhraseQuery) q; Term[] terms = pq.getTerms(); ArrayList<SpanQuery> spans = new ArrayList<SpanQuery>(); spans.add( new SpanTermQuery(new Term(scope, getTag(scope, terms[0].field(), TAG_TYPE.BEG_TAG)))); spans.add( new SpanTermQuery(new Term(scope, getTag(scope, terms[0].field(), TAG_TYPE.END_TAG)))); ArrayList<SpanQuery> phraseSpan = new ArrayList<SpanQuery>(); for (int i = 0; i < terms.length; i++) { String term = terms[i].text(); if (term.endsWith("*")) { WildcardQuery wildcard = new WildcardQuery(new Term(scope, term)); phraseSpan.add(new SpanMultiTermQueryWrapper<WildcardQuery>(wildcard)); } else if (term.contains(singleCs) || term.contains(multiCs)) { WildcardQuery wildcard = new WildcardQuery(new Term(scope, term)); phraseSpan.add(new SpanMultiTermQueryWrapper<WildcardQuery>(wildcard)); } else { phraseSpan.add(new SpanTermQuery(new Term(scope, term))); } } spans.add( new SpanNearQuery( phraseSpan.toArray(new SpanQuery[phraseSpan.size()]), phraseSpan.size(), true)); return new SpanBetweenQuery(spans.toArray(new SpanQuery[spans.size()])); }
protected PhraseQuery toPhraseQuery(List<BytesRef> bytesRefs, String field) { PhraseQuery phraseQuery = new PhraseQuery(); for (BytesRef bytesRef : bytesRefs) { phraseQuery.add(new Term(field, bytesRef)); } return phraseQuery; }
public void testSpanNearQuery() throws Exception { SpanQuery[] quick_brown_dog = new SpanQuery[] {quick, brown, dog}; SpanNearQuery snq = new SpanNearQuery(quick_brown_dog, 0, true); // #1 assertNoMatches(snq); dumpSpans(snq); snq = new SpanNearQuery(quick_brown_dog, 4, true); // #2 assertNoMatches(snq); dumpSpans(snq); snq = new SpanNearQuery(quick_brown_dog, 5, true); // #3 assertOnlyBrownFox(snq); dumpSpans(snq); // interesting - even a sloppy phrase query would require // more slop to match snq = new SpanNearQuery(new SpanQuery[] {lazy, fox}, 3, false); // #4 assertOnlyBrownFox(snq); dumpSpans(snq); PhraseQuery pq = new PhraseQuery(); // #5 pq.add(new Term("f", "lazy")); // #5 pq.add(new Term("f", "fox")); // #5 pq.setSlop(4); // #5 assertNoMatches(pq); pq.setSlop(5); // #6 assertOnlyBrownFox(pq); // #6 }
protected void smokeTestSearcher(IndexSearcher s) throws Exception { runQuery(s, new TermQuery(new Term("body", "united"))); runQuery(s, new TermQuery(new Term("titleTokenized", "states"))); PhraseQuery pq = new PhraseQuery(); pq.add(new Term("body", "united")); pq.add(new Term("body", "states")); runQuery(s, pq); }
private static PhraseQuery makePhraseQuery(String terms) { PhraseQuery query = new PhraseQuery(); String[] t = terms.split(" +"); for (int i = 0; i < t.length; i++) { query.add(new Term("f", t[i])); } return query; }
protected Query pq(float boost, int slop, String field, String... texts) { PhraseQuery query = new PhraseQuery(); for (String text : texts) { query.add(new Term(field, text)); } query.setBoost(boost); query.setSlop(slop); return query; }
public void testCJKPhrase() throws Exception { // individual CJK chars as terms StandardAnalyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT); PhraseQuery expected = new PhraseQuery(); expected.add(new Term("field", "ä¸")); expected.add(new Term("field", "国")); assertEquals(expected, getQuery("\"ä¸å›½\"", analyzer)); }
protected Query eq(String field, String[] terms, boolean ignoreCase) { if (terms.length > 1) { PhraseQuery pq = new PhraseQuery(); for (String s : terms) { pq.add(new Term(field, s)); } return pq; } return new TermQuery(new Term(field, terms[0])); }
public void testAutoGeneratePhraseQueriesOn() throws Exception { // individual CJK chars as terms StandardAnalyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT); PhraseQuery expected = new PhraseQuery(); expected.add(new Term("field", "ä¸")); expected.add(new Term("field", "国")); QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "field", analyzer); parser.setAutoGeneratePhraseQueries(true); assertEquals(expected, parser.parse("ä¸å›½")); }
@Override public Query construct(IndexEnvironment env, Map<String, String[]> querySource) throws ParseException { if ("1".equals(StringTools.arrayToString(querySource.get("queryversion"), ""))) { // preserving old stuff: // 1. all lucene special chars to be quoted // 2. if "wholewords" is "on" or "true" -> don't add *_*, otherwise add *_* BooleanQuery result = new BooleanQuery(); String wholeWords = StringTools.arrayToString(querySource.get("wholewords"), ""); boolean useWildcards = !(null != wholeWords && StringTools.stringToBoolean(wholeWords)); for (Map.Entry<String, String[]> queryItem : querySource.entrySet()) { String field = queryItem.getKey(); if (env.fields.containsKey(field) && queryItem.getValue().length > 0) { for (String value : queryItem.getValue()) { if (null != value) { value = value.trim().toLowerCase(); if (0 != value.length()) { if ("keywords".equals(field) && ACCESSION_REGEX.test(value)) { result.add(new TermQuery(new Term("accession", value)), BooleanClause.Occur.MUST); } else if ("keywords".equals(field) && '"' == value.charAt(0) && '"' == value.charAt(value.length() - 1)) { value = value.substring(1, value.length() - 1); PhraseQuery q = new PhraseQuery(); String[] tokens = value.split("\\s+"); for (String token : tokens) { q.add(new Term(field, token)); } result.add(q, BooleanClause.Occur.MUST); } else { String[] tokens = value.split("\\s+"); for (String token : tokens) { // we use wildcards for keywords depending on "wholewords" switch, // *ALWAYS* for other fields, *NEVER* for user id and accession or boolean // fields Query q = !"boolean".equals(env.fields.get(field).type) && !" userid accession ".contains(" " + field + " ") && (useWildcards || (!" keywords ".contains(" " + field + " "))) ? new WildcardQuery(new Term(field, "*" + token + "*")) : new TermQuery(new Term(field, token)); result.add(q, BooleanClause.Occur.MUST); } } } } } } } return result; } else { return super.construct(env, querySource); } }
@Test public void testLotsOfPhrases() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer( random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); String[] terms = {"org", "apache", "lucene"}; int iters = atLeast(1000); StringBuilder builder = new StringBuilder(); for (int i = 0; i < iters; i++) { builder.append(terms[random().nextInt(terms.length)]).append(" "); if (random().nextInt(6) == 3) { builder.append("elasticsearch").append(" "); } } Document doc = new Document(); Field field = new Field("field", builder.toString(), type); doc.add(field); writer.addDocument(doc); PhraseQuery query = new PhraseQuery(); query.add(new Term("field", "org")); query.add(new Term("field", "apache")); query.add(new Term("field", "lucene")); XFastVectorHighlighter highlighter = new XFastVectorHighlighter(); IndexReader reader = DirectoryReader.open(writer, true); IndexSearcher searcher = newSearcher(reader); TopDocs hits = searcher.search(query, 10); assertEquals(1, hits.totalHits); XFieldQuery fieldQuery = highlighter.getFieldQuery(query, reader); String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1); for (int i = 0; i < bestFragments.length; i++) { String result = bestFragments[i].replaceAll("<b>org apache lucene</b>", "FOOBAR"); assertFalse(result.contains("org apache lucene")); } reader.close(); writer.close(); dir.close(); }
BooleanClause partialMatch(SearchTerm s, int slop) { String[] phrase = getPhrase(s.getValue().toLowerCase().trim()); PhraseQuery query = new PhraseQuery(); BooleanClause partialMatchClause = null; query.setSlop(slop); for (int i = 0; i < phrase.length; i++) { query.add(new Term(s.getFieldName(), phrase[i].toLowerCase().trim())); } if (s.getOperator().equalsIgnoreCase("=")) { partialMatchClause = new BooleanClause(query, BooleanClause.Occur.MUST); } else if (s.getOperator().equalsIgnoreCase("-")) { partialMatchClause = new BooleanClause(query, BooleanClause.Occur.MUST_NOT); } return partialMatchClause; }
private float checkPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults) throws Exception { query.setSlop(slop); Directory ramDir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, ramDir, new MockAnalyzer(MockTokenizer.WHITESPACE, false)); writer.addDocument(doc); IndexReader reader = writer.getReader(); IndexSearcher searcher = newSearcher(reader); TopDocs td = searcher.search(query, null, 10); // System.out.println("slop: "+slop+" query: "+query+" doc: "+doc+" Expecting number of hits: // "+expectedNumResults+" maxScore="+td.getMaxScore()); assertEquals( "slop: " + slop + " query: " + query + " doc: " + doc + " Wrong number of hits", expectedNumResults, td.totalHits); // QueryUtils.check(query,searcher); writer.close(); searcher.close(); reader.close(); ramDir.close(); return td.getMaxScore(); }
public void testToQueryPhraseQueryBoostAndSlop() throws IOException { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); QueryStringQueryBuilder queryStringQueryBuilder = new QueryStringQueryBuilder("\"test phrase\"~2").field(STRING_FIELD_NAME, 5f); Query query = queryStringQueryBuilder.toQuery(createShardContext()); assertThat(query, instanceOf(DisjunctionMaxQuery.class)); DisjunctionMaxQuery disjunctionMaxQuery = (DisjunctionMaxQuery) query; assertThat(disjunctionMaxQuery.getDisjuncts().size(), equalTo(1)); assertThat(disjunctionMaxQuery.getDisjuncts().get(0), instanceOf(BoostQuery.class)); BoostQuery boostQuery = (BoostQuery) disjunctionMaxQuery.getDisjuncts().get(0); assertThat(boostQuery.getBoost(), equalTo(5f)); assertThat(boostQuery.getQuery(), instanceOf(PhraseQuery.class)); PhraseQuery phraseQuery = (PhraseQuery) boostQuery.getQuery(); assertThat(phraseQuery.getSlop(), Matchers.equalTo(2)); assertThat(phraseQuery.getTerms().length, equalTo(2)); }
public void testToQueryPhraseQuery() throws IOException { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); Query query = queryStringQuery("\"term1 term2\"") .defaultField(STRING_FIELD_NAME) .phraseSlop(3) .toQuery(createShardContext()); assertThat(query, instanceOf(DisjunctionMaxQuery.class)); DisjunctionMaxQuery disjunctionMaxQuery = (DisjunctionMaxQuery) query; assertThat(disjunctionMaxQuery.getDisjuncts().size(), equalTo(1)); assertThat(disjunctionMaxQuery.getDisjuncts().get(0), instanceOf(PhraseQuery.class)); PhraseQuery phraseQuery = (PhraseQuery) disjunctionMaxQuery.getDisjuncts().get(0); assertThat(phraseQuery.getTerms().length, equalTo(2)); assertThat(phraseQuery.getTerms()[0], equalTo(new Term(STRING_FIELD_NAME, "term1"))); assertThat(phraseQuery.getTerms()[1], equalTo(new Term(STRING_FIELD_NAME, "term2"))); assertThat(phraseQuery.getSlop(), equalTo(3)); }
protected Taxon[] findTaxon(String fieldName1, String fieldValue) throws IOException { Taxon[] terms = new TaxonImpl[0]; if (StringUtils.isNotBlank(fieldValue) && indexSearcher != null) { PhraseQuery query = new PhraseQuery(); query.add(new Term(fieldName1, fieldValue)); int maxHits = 3; TopDocs docs = indexSearcher.search(query, maxHits); if (docs.totalHits > 0) { terms = new TaxonImpl[docs.totalHits]; for (int i = 0; i < docs.totalHits && i < maxHits; i++) { ScoreDoc scoreDoc = docs.scoreDocs[i]; Document foundDoc = indexSearcher.doc(scoreDoc.doc); Taxon term = new TaxonImpl(); Fieldable idField = foundDoc.getFieldable(FIELD_ID); if (idField != null) { term.setExternalId(idField.stringValue()); } Fieldable rankPathField = foundDoc.getFieldable(FIELD_RANK_PATH); if (rankPathField != null) { term.setPath(rankPathField.stringValue()); } Fieldable rankPathIdsField = foundDoc.getFieldable(FIELD_RANK_PATH_IDS); if (rankPathIdsField != null) { term.setPathIds(rankPathIdsField.stringValue()); } Fieldable rankPathNamesField = foundDoc.getFieldable(FIELD_RANK_PATH_NAMES); if (rankPathNamesField != null) { term.setPathNames(rankPathNamesField.stringValue()); } Fieldable commonNamesFields = foundDoc.getFieldable(FIELD_COMMON_NAMES); if (commonNamesFields != null) { term.setCommonNames(commonNamesFields.stringValue()); } Fieldable fieldName = foundDoc.getFieldable(FIELD_RECOMMENDED_NAME); if (fieldName != null) { term.setName(fieldName.stringValue()); } terms[i] = term; } } } return terms; }
@Test public void testPhraseQuery() { Map<String, ColumnMapper> map = new HashMap<>(); map.put("name", new ColumnMapperBoolean()); Schema mappers = new Schema(map, null, EnglishAnalyzer.class.getName()); List<String> values = new ArrayList<>(); values.add("hola"); values.add("adios"); PhraseCondition phraseCondition = new PhraseCondition(0.5f, "name", values, 2); Query query = phraseCondition.query(mappers); Assert.assertNotNull(query); Assert.assertEquals(org.apache.lucene.search.PhraseQuery.class, query.getClass()); org.apache.lucene.search.PhraseQuery luceneQuery = (org.apache.lucene.search.PhraseQuery) query; Assert.assertEquals(values.size(), luceneQuery.getTerms().length); Assert.assertEquals(2, luceneQuery.getSlop()); Assert.assertEquals(0.5f, query.getBoost(), 0); }
public void testWithPendingDeletes3() throws IOException { // main directory Directory dir = newDirectory(); // auxiliary directory Directory aux = newDirectory(); setUpDirs(dir, aux); IndexWriter writer = newWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND)); // Adds 10 docs, then replaces them with another 10 // docs, so 10 pending deletes: for (int i = 0; i < 20; i++) { Document doc = new Document(); doc.add(newStringField("id", "" + (i % 10), Field.Store.NO)); doc.add(newTextField("content", "bbb " + i, Field.Store.NO)); writer.updateDocument(new Term("id", "" + (i % 10)), doc); } // Deletes one of the 10 added docs, leaving 9: PhraseQuery q = new PhraseQuery(); q.add(new Term("content", "bbb")); q.add(new Term("content", "14")); writer.deleteDocuments(q); writer.addIndexes(aux); writer.forceMerge(1); writer.commit(); verifyNumDocs(dir, 1039); verifyTermDocs(dir, new Term("content", "aaa"), 1030); verifyTermDocs(dir, new Term("content", "bbb"), 9); writer.close(); dir.close(); aux.close(); }
@Override public void execute(String indexDir) { ConsoleUtilities.printHeader(); try { Directory indexDirectory = FSDirectory.open(new File(indexDir)); IndexReader indexReader = DirectoryReader.open(indexDirectory); IndexSearcher searcher = new IndexSearcher(indexReader); System.out.println("[BooleanQueryOperation.execute]"); // TODO: Implement and execute BooleanQuery PhraseQuery phraseQuery1 = new PhraseQuery(); phraseQuery1.add(new Term("description", "legendary")); phraseQuery1.add(new Term("description", "sacred")); phraseQuery1.add(new Term("description", "realm")); // cursed land PhraseQuery phraseQuery2 = new PhraseQuery(); phraseQuery2.add(new Term("description", "cursed")); phraseQuery2.add(new Term("description", "land")); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(new BooleanClause(phraseQuery1, Occur.SHOULD)); booleanQuery.add(new BooleanClause(phraseQuery2, Occur.SHOULD)); int topHitNum = 30; TopDocs topDocs = searcher.search(booleanQuery, topHitNum); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document doc = searcher.doc(scoreDoc.doc); String path = doc.get("path"); Game game = gameBuilder.build(new File(path)); System.out.println(ResultFormatter.format(game, scoreDoc.score)); } // BooleanClause.Occur.SHOULD means that the clause is optional, // whereas BooleanClause.Occur.Must means that the clause is required. // However, if a boolean query only has optional clauses, at least one // clause must match for a document to appear in the results. // For better control over what documents match a BooleanQuery, there is // also a minimumShouldMatch parameter which lets you tell Lucene that at // least minimumShouldMatch BooleanClause.Occur.SHOULD clauses must match // for a document to appear in the results. } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
public void testPositionIncrement() throws Exception { AqpQueryParser qp = getParser(); qp.setAnalyzer( new StopAnalyzer( TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "in", "are", "this"))); qp.setEnablePositionIncrements(true); String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\""; // 0 2 5 7 8 int expectedPositions[] = {1, 3, 4, 6, 9}; PhraseQuery pq = (PhraseQuery) qp.parse(qtxt, "a"); // System.out.println("Query text: "+qtxt); // System.out.println("Result: "+pq); Term t[] = pq.getTerms(); int pos[] = pq.getPositions(); for (int i = 0; i < t.length; i++) { // System.out.println(i+". "+t[i]+" pos: "+pos[i]); assertEquals( "term " + i + " = " + t[i] + " has wrong term-position!", expectedPositions[i], pos[i]); } }
BooleanQuery orPhraseQuery(List<SearchTerm> orSearchTerms) { BooleanQuery orTerms = new BooleanQuery(); orTerms.setMaxClauseCount(dvnMaxClauseCount); for (Iterator it = orSearchTerms.iterator(); it.hasNext(); ) { SearchTerm elem = (SearchTerm) it.next(); String[] phrase = getPhrase(elem.getValue().toLowerCase().trim()); if (phrase.length > 1) { BooleanClause partialMatchClause = null; PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.setSlop(3); for (int i = 0; i < phrase.length; i++) { phraseQuery.add(new Term(elem.getFieldName(), phrase[i].toLowerCase().trim())); } orTerms.add(phraseQuery, BooleanClause.Occur.SHOULD); } else { Term t = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim()); TermQuery orQuery = new TermQuery(t); orTerms.add(orQuery, BooleanClause.Occur.SHOULD); } } return orTerms; }
BooleanQuery andSearchTermClause(List<SearchTerm> andSearchTerms) { BooleanQuery andTerms = new BooleanQuery(); andTerms.setMaxClauseCount(dvnMaxClauseCount); Query rQuery = null; for (Iterator it = andSearchTerms.iterator(); it.hasNext(); ) { SearchTerm elem = (SearchTerm) it.next(); if (elem.getOperator().equals("<")) { Term end = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim()); Term begin = null; rQuery = new RangeQuery(begin, end, true); andTerms.add(rQuery, BooleanClause.Occur.MUST); } else if (elem.getOperator().equals(">")) { Term end = null; Term begin = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim()); rQuery = new RangeQuery(begin, end, true); andTerms.add(rQuery, BooleanClause.Occur.MUST); } else if (elem.getFieldName().equalsIgnoreCase("any")) { andTerms = buildAnyQuery(elem.getValue().toLowerCase().trim()); } else { String[] phrase = getPhrase(elem.getValue().toLowerCase().trim()); if (phrase.length > 1) { PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.setSlop(0); andTerms.add(partialMatch(elem, 3)); } else { Term t = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim()); TermQuery andQuery = new TermQuery(t); if (elem.getOperator().equals("=")) { andTerms.add(andQuery, BooleanClause.Occur.MUST); } else if (elem.getOperator().equalsIgnoreCase("-")) { andTerms.add(andQuery, BooleanClause.Occur.MUST_NOT); } } } } return andTerms; }
private Query createQuery(SearchEngineFilter filter) { BooleanQuery fieldQuery = new BooleanQuery(); String key = filter.getKey(); String attachmentKey = key + IIndexerDAO.ATTACHMENT_FIELD_SUFFIX; Object value = filter.getValue(); if (null != value) { if (value instanceof String) { SearchEngineFilter.TextSearchOption option = filter.getTextSearchOption(); if (null == option) { option = SearchEngineFilter.TextSearchOption.AT_LEAST_ONE_WORD; } String stringValue = value.toString(); String[] values = stringValue.split("\\s+"); if (!option.equals(SearchEngineFilter.TextSearchOption.EXACT)) { BooleanClause.Occur bc = BooleanClause.Occur.SHOULD; if (option.equals(SearchEngineFilter.TextSearchOption.ALL_WORDS)) { bc = BooleanClause.Occur.MUST; } else if (option.equals(SearchEngineFilter.TextSearchOption.ANY_WORD)) { bc = BooleanClause.Occur.MUST_NOT; } for (int i = 0; i < values.length; i++) { TermQuery term = new TermQuery(new Term(key, values[i].toLowerCase())); // NOTE: search lower case.... if (filter.isIncludeAttachments()) { BooleanQuery compositeQuery = new BooleanQuery(); compositeQuery.add(term, BooleanClause.Occur.SHOULD); TermQuery termAttachment = new TermQuery(new Term(attachmentKey, values[i].toLowerCase())); compositeQuery.add(termAttachment, BooleanClause.Occur.SHOULD); fieldQuery.add(compositeQuery, bc); } else { fieldQuery.add(term, bc); } } } else { PhraseQuery phraseQuery = new PhraseQuery(); for (int i = 0; i < values.length; i++) { // NOTE: search lower case.... phraseQuery.add(new Term(key, values[i].toLowerCase())); } if (filter.isIncludeAttachments()) { fieldQuery.add(phraseQuery, BooleanClause.Occur.SHOULD); PhraseQuery phraseQuery2 = new PhraseQuery(); for (int i = 0; i < values.length; i++) { // NOTE: search lower case.... phraseQuery2.add(new Term(attachmentKey, values[i].toLowerCase())); } fieldQuery.add(phraseQuery2, BooleanClause.Occur.SHOULD); } else { return phraseQuery; } } } else if (value instanceof Date) { String toString = DateTools.timeToString(((Date) value).getTime(), DateTools.Resolution.MINUTE); TermQuery term = new TermQuery(new Term(filter.getKey(), toString)); fieldQuery.add(term, BooleanClause.Occur.MUST); } else if (value instanceof Number) { TermQuery term = new TermQuery(new Term(filter.getKey(), value.toString())); fieldQuery.add(term, BooleanClause.Occur.MUST); } } else { if (filter.getStart() instanceof Number || filter.getEnd() instanceof Number) { // .............................. TODO } else { String start = null; String end = null; if (filter.getStart() instanceof Date || filter.getEnd() instanceof Date) { if (null != filter.getStart()) { start = DateTools.timeToString( ((Date) filter.getStart()).getTime(), DateTools.Resolution.MINUTE); } if (null != filter.getEnd()) { end = DateTools.timeToString( ((Date) filter.getEnd()).getTime(), DateTools.Resolution.MINUTE); } } else { start = (null != filter.getStart()) ? filter.getStart().toString().toLowerCase() : null; end = (null != filter.getEnd()) ? filter.getEnd().toString().toLowerCase() : null; } BytesRef byteStart = (null != start) ? new BytesRef(start.getBytes()) : null; BytesRef byteEnd = (null != end) ? new BytesRef(end.getBytes()) : null; TermRangeQuery range = new TermRangeQuery(filter.getKey(), byteStart, byteEnd, true, true); fieldQuery.add(range, BooleanClause.Occur.MUST); } } return fieldQuery; }
public Query createQuery(FieldContext fieldContext) { final Query perFieldQuery; final String fieldName = fieldContext.getField(); /* * Store terms per position and detect if for a given position more than one term is present */ TokenStream stream = null; boolean isMultiPhrase = false; Map<Integer, List<Term>> termsPerPosition = new HashMap<Integer, List<Term>>(); final String sentence = phraseContext.getSentence(); try { Reader reader = new StringReader(sentence); stream = queryContext.getQueryAnalyzer().tokenStream(fieldName, reader); CharTermAttribute termAttribute = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute positionAttribute = stream.addAttribute(PositionIncrementAttribute.class); stream.reset(); int position = -1; // start at -1 since we apply at least one increment List<Term> termsAtSamePosition = null; while (stream.incrementToken()) { int positionIncrement = 1; if (positionAttribute != null) { positionIncrement = positionAttribute.getPositionIncrement(); } if (positionIncrement > 0) { position += positionIncrement; termsAtSamePosition = termsPerPosition.get(position); } if (termsAtSamePosition == null) { termsAtSamePosition = new ArrayList<Term>(); termsPerPosition.put(position, termsAtSamePosition); } String termString = new String(termAttribute.buffer(), 0, termAttribute.length()); termsAtSamePosition.add(new Term(fieldName, termString)); if (termsAtSamePosition.size() > 1) { isMultiPhrase = true; } } } catch (IOException e) { throw new AssertionFailure("IOException while reading a string. Doh!", e); } finally { if (stream != null) { try { stream.end(); stream.close(); } catch (IOException e) { throw new AssertionFailure("IOException while reading a string. Doh!", e); } } } /* * Create the appropriate query depending on the conditions * note that a MultiPhraseQuery is needed if several terms share the same position * as it will do a OR and not a AND like PhraseQuery */ final int size = termsPerPosition.size(); if (size == 0) { perFieldQuery = new BooleanQuery.Builder().build(); } else if (size <= 1) { final List<Term> terms = termsPerPosition.values().iterator().next(); if (terms.size() == 1) { perFieldQuery = new TermQuery(terms.get(0)); } else { BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder(); for (Term term : terms) { booleanQueryBuilder.add(new TermQuery(term), BooleanClause.Occur.SHOULD); } perFieldQuery = booleanQueryBuilder.build(); } } else { if (isMultiPhrase) { MultiPhraseQuery query = new MultiPhraseQuery(); query.setSlop(phraseContext.getSlop()); for (Map.Entry<Integer, List<Term>> entry : termsPerPosition.entrySet()) { final List<Term> value = entry.getValue(); query.add(value.toArray(new Term[value.size()]), entry.getKey()); } perFieldQuery = query; } else { PhraseQuery query = new PhraseQuery(); query.setSlop(phraseContext.getSlop()); for (Map.Entry<Integer, List<Term>> entry : termsPerPosition.entrySet()) { final List<Term> value = entry.getValue(); query.add(value.get(0), entry.getKey()); } perFieldQuery = query; } } return fieldContext.getFieldCustomizer().setWrappedQuery(perFieldQuery).createQuery(); }
// Test scores with one field with Term Freqs and one without, otherwise with equal content public void testBasic() throws Exception { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setMaxBufferedDocs(2) .setSimilarity(new SimpleSimilarity()) .setMergePolicy(newLogMergePolicy(2))); StringBuilder sb = new StringBuilder(265); String term = "term"; for (int i = 0; i < 30; i++) { Document d = new Document(); sb.append(term).append(" "); String content = sb.toString(); Field noTf = newField("noTf", content + (i % 2 == 0 ? "" : " notf"), omitType); d.add(noTf); Field tf = newField("tf", content + (i % 2 == 0 ? " tf" : ""), normalType); d.add(tf); writer.addDocument(d); // System.out.println(d); } writer.forceMerge(1); // flush writer.close(); /* * Verify the index */ IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); searcher.setSimilarity(new SimpleSimilarity()); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d); PhraseQuery pq = new PhraseQuery(); pq.add(a); pq.add(c); try { searcher.search(pq, 10); fail("did not hit expected exception"); } catch (Exception e) { Throwable cause = e; // If the searcher uses an executor service, the IAE is wrapped into other exceptions while (cause.getCause() != null) { cause = cause.getCause(); } assertTrue("Expected an IAE, got " + cause, cause instanceof IllegalStateException); } searcher.search( q1, new CountingHitCollector() { private Scorer scorer; @Override public final void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public final void collect(int doc) throws IOException { // System.out.println("Q1: Doc=" + doc + " score=" + score); float score = scorer.score(); assertTrue("got score=" + score, score == 1.0f); super.collect(doc); } }); // System.out.println(CountingHitCollector.getCount()); searcher.search( q2, new CountingHitCollector() { private Scorer scorer; @Override public final void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public final void collect(int doc) throws IOException { // System.out.println("Q2: Doc=" + doc + " score=" + score); float score = scorer.score(); assertEquals(1.0f + doc, score, 0.00001f); super.collect(doc); } }); // System.out.println(CountingHitCollector.getCount()); searcher.search( q3, new CountingHitCollector() { private Scorer scorer; @Override public final void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public final void collect(int doc) throws IOException { // System.out.println("Q1: Doc=" + doc + " score=" + score); float score = scorer.score(); assertTrue(score == 1.0f); assertFalse(doc % 2 == 0); super.collect(doc); } }); // System.out.println(CountingHitCollector.getCount()); searcher.search( q4, new CountingHitCollector() { private Scorer scorer; @Override public final void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public final void collect(int doc) throws IOException { float score = scorer.score(); // System.out.println("Q1: Doc=" + doc + " score=" + score); assertTrue(score == 1.0f); assertTrue(doc % 2 == 0); super.collect(doc); } }); // System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.add(q1, Occur.MUST); bq.add(q4, Occur.MUST); searcher.search( bq, new CountingHitCollector() { @Override public final void collect(int doc) throws IOException { // System.out.println("BQ: Doc=" + doc + " score=" + score); super.collect(doc); } }); assertEquals(15, CountingHitCollector.getCount()); reader.close(); dir.close(); }