@ImmutableCopy @NotNull @ThreadSafe public List<ResultDocument> list(@NotNull Set<String> uids) throws SearchException, CheckedOutOfMemoryError { // Construct a filter that only matches documents with the given UIDs TermsFilter uidFilter = new TermsFilter(); String fieldName = Fields.UID.key(); for (String uid : uids) uidFilter.addTerm(new Term(fieldName, uid)); Query query = new MatchAllDocsQuery(); readLock.lock(); try { checkIndexesExist(); // Perform search; might throw OutOfMemoryError ScoreDoc[] scoreDocs = luceneSearcher.search(query, uidFilter, MAX_RESULTS).scoreDocs; // Create result documents ResultDocument[] results = new ResultDocument[scoreDocs.length]; for (int i = 0; i < results.length; i++) { Document doc = luceneSearcher.doc(scoreDocs[i].doc); float score = scoreDocs[i].score; LuceneIndex index = indexes.get(luceneSearcher.subSearcher(i)); IndexingConfig config = index.getConfig(); results[i] = new ResultDocument(doc, score, query, true, config, fileFactory, outlookMailFactory); } // Sort results by title Arrays.sort( results, new Comparator<ResultDocument>() { public int compare(ResultDocument o1, ResultDocument o2) { return AlphanumComparator.ignoreCaseInstance.compare(o1.getTitle(), o2.getTitle()); } }); return Arrays.asList(results); } catch (IllegalArgumentException e) { throw wrapEmptyIndexException(e); } catch (IOException e) { throw new SearchException(e.getMessage()); // TODO i18n } catch (OutOfMemoryError e) { throw new CheckedOutOfMemoryError(e); } finally { readLock.unlock(); } }
/** * Skip elements, values of which repeat in the given field. * * <p>Only the first element will be included in the results. * * <p><b>Note:</b> For performance reasons you should call this method as last when constructing a * query. When called it will project the query and create a filter to eliminate duplicates. * * @param field * @return */ public LuceneQuery<T> skipSame(String field) { String idPropertyName = getSession().getSessionFactory().getClassMetadata(getType()).getIdentifierPropertyName(); List<Object> documents = listProjection(idPropertyName, field); Set<Object> uniqueFieldValues = new HashSet<Object>(); TermsFilter termsFilter = new TermsFilter(); for (Object document : documents) { Object[] row = (Object[]) document; if (uniqueFieldValues.add(row[1])) { termsFilter.addTerm(new Term(idPropertyName, row[0].toString())); } } buildQuery(); fullTextQuery.setFilter(termsFilter); return this; }
protected Filter createBaseFilter(QueryProcessor q, String suffix) { if (q.wantPlaces() && q.wantTimes()) { TermsFilter timeFilter = new TermsFilter(); if (q.isTime_key()) timeFilter.addTerm(new Term(Config.S_HAS_TIME_POINTS_KEY + suffix, "true")); else timeFilter.addTerm( new Term(Config.S_HAS_ANY_TIME_POINT + suffix, "true")); // OPTAR POR DURACOES TermsFilter filterGeo = new TermsFilter(); filterGeo.addTerm(new Term(Config.S_GEO_INDEXED + suffix, "true")); Filter[] filterChain = new Filter[] {filterGeo, timeFilter}; int[] actionType = new int[] {SerialChainFilter.AND, SerialChainFilter.AND}; return new SerialChainFilter(filterChain, actionType); } else if (q.wantPlaces()) { TermsFilter filter = new TermsFilter(); filter.addTerm(new Term(Config.S_GEO_INDEXED + suffix, "true")); return filter; } else if (q.wantTimes()) { if (q.isTime_key()) { TermsFilter filter = new TermsFilter(); filter.addTerm(new Term(Config.S_HAS_YYYY_KEY + suffix, "true")); return filter; } else { TermsFilter filter = new TermsFilter(); filter.addTerm( new Term(Config.S_HAS_ANY_TIME_POINT + suffix, "true")); // OPTAR POR DURACOES return filter; } } else return null; }
/** * For the given query, returns the requested page of results. This method should not be called * anymore after {@link #shutdown()} has been called, otherwise an IOException will be thrown. */ @NotNull @ThreadSafe public ResultPage search(@NotNull WebQuery webQuery) throws IOException, SearchException, CheckedOutOfMemoryError { Util.checkNotNull(webQuery); if (ioException != null) throw ioException; List<Filter> filters = new ArrayList<Filter>(3); // Add size filter to filter chain if (webQuery.minSize != null || webQuery.maxSize != null) { filters.add( NumericRangeFilter.newLongRange( Fields.SIZE.key(), webQuery.minSize, webQuery.maxSize, true, true)); } // Add type filter to filter chain if (webQuery.parsers != null) { TermsFilter typeFilter = new TermsFilter(); String fieldName = Fields.PARSER.key(); typeFilter.addTerm(new Term(fieldName, Fields.EMAIL_PARSER)); for (Parser parser : webQuery.parsers) { String parserName = parser.getClass().getSimpleName(); typeFilter.addTerm(new Term(fieldName, parserName)); } filters.add(typeFilter); } // Add location filter to filter chain if (webQuery.indexes != null) { Filter[] indexFilters = new Filter[webQuery.indexes.size()]; int i = 0; for (LuceneIndex index : webQuery.indexes) { Path path = index.getRootFolder().getPath(); String uid = index.getDocumentType().createUniqueId(path); Term prefix = new Term(Fields.UID.key(), uid + "/"); indexFilters[i++] = new PrefixFilter(prefix); } filters.add(new ChainedFilter(indexFilters, ChainedFilter.OR)); } // Construct filter chain Filter filter = filters.size() == 0 ? null : new ChainedFilter(filters.toArray(new Filter[filters.size()]), ChainedFilter.AND); // Create query QueryWrapper queryWrapper = createQuery(webQuery.query); Query query = queryWrapper.query; boolean isPhraseQuery = queryWrapper.isPhraseQuery; readLock.lock(); try { checkIndexesExist(); // Perform search; might throw OutOfMemoryError int maxResults = (webQuery.pageIndex + 1) * PAGE_SIZE; TopDocs topDocs = luceneSearcher.search(query, filter, maxResults); ScoreDoc[] scoreDocs = topDocs.scoreDocs; // Compute start and end indices of returned page int start; int end = scoreDocs.length; if (end <= PAGE_SIZE) { start = 0; } else { int r = end % PAGE_SIZE; start = end - (r == 0 ? PAGE_SIZE : r); } // Create and fill list of result documents to return ResultDocument[] results = new ResultDocument[end - start]; for (int i = start; i < end; i++) { Document doc = luceneSearcher.doc(scoreDocs[i].doc); float score = scoreDocs[i].score; LuceneIndex index = indexes.get(luceneSearcher.subSearcher(i)); IndexingConfig config = index.getConfig(); results[i - start] = new ResultDocument( doc, score, query, isPhraseQuery, config, fileFactory, outlookMailFactory); } int hitCount = topDocs.totalHits; int newPageIndex = start / PAGE_SIZE; int pageCount = (int) Math.ceil((float) hitCount / PAGE_SIZE); return new ResultPage(Arrays.asList(results), newPageIndex, pageCount, hitCount); } catch (IllegalArgumentException e) { throw wrapEmptyIndexException(e); } catch (OutOfMemoryError e) { throw new CheckedOutOfMemoryError(e); } finally { readLock.unlock(); } }
public void testTimeFilter() throws IOException, DocumentException, ParseException { LgteIndexWriter writer = new LgteIndexWriter(path + "Contents", true); LgteDocumentWrapper doc1 = new LgteDocumentWrapper(); doc1.indexText(Globals.DOCUMENT_ID_FIELD, "1"); doc1.indexText("contents", "word1 word2 word3"); doc1.indexStringNoStore(Config.S_HAS_TIMEXES, "true"); LgteDocumentWrapper doc2 = new LgteDocumentWrapper(); doc2.indexText(Globals.DOCUMENT_ID_FIELD, "2"); doc2.indexText("contents", "word2 word3 word4 word55 word96 word2 word54 word33 wordss"); writer.addDocument(doc1); writer.addDocument(doc2); writer.close(); LgteIndexWriter writer2 = new LgteIndexWriter(path + "Sentences", true); LgteDocumentWrapper sentence0 = new LgteDocumentWrapper(); sentence0.indexText(Globals.DOCUMENT_ID_FIELD, "1_0"); sentence0.indexText("doc_id", "1"); sentence0.indexText("sentences", "word1 word3"); LgteDocumentWrapper sentence1 = new LgteDocumentWrapper(); sentence1.indexText(Globals.DOCUMENT_ID_FIELD, "1_1"); sentence1.indexText("doc_id", "1"); sentence1.indexText("sentences", "word1 word2 word3"); LgteDocumentWrapper sentence2 = new LgteDocumentWrapper(); sentence2.indexStringNoStore(Config.S_HAS_TIMEXES + "_sentences", "true"); sentence2.indexText(Globals.DOCUMENT_ID_FIELD, "2_1"); sentence2.indexText("doc_id", "2"); sentence2.indexText("sentences", "word2 word3 word4 word55 word96 word2 word54 word33 wordss"); writer2.addDocument(sentence0); writer2.addDocument(sentence1); writer2.addDocument(sentence2); writer2.close(); IndexReader readerContents = LgteIndexManager.openReader(path + "Contents", Model.OkapiBM25Model); IndexReader readerSentences = LgteIndexManager.openReader(path + "Sentences", Model.OkapiBM25Model); Map<String, IndexReader> readers = new HashMap<String, IndexReader>(); readers.put("contents", readerContents); readers.put("sentences", readerSentences); readers.put(Config.S_HAS_TIMEXES, readerContents); readers.put(Config.S_HAS_TIMEXES + "_sentences", readerSentences); readers.put("doc_id", readerSentences); readers.put("id", readerSentences); LgteIsolatedIndexReader lgteIsolatedIndexReader = new LgteIsolatedIndexReader(readers); lgteIsolatedIndexReader.addTreeMapping(readerContents, readerSentences, "doc_id"); LgteIndexSearcherWrapper searcher = new LgteIndexSearcherWrapper(Model.OkapiBM25Model, lgteIsolatedIndexReader); QueryConfiguration queryConfiguration = new QueryConfiguration(); queryConfiguration.setProperty("bm25.idf.policy", "floor_epslon"); queryConfiguration.setProperty("bm25.idf.epslon", "0.01"); queryConfiguration.setProperty("bm25.k1", "2.0"); queryConfiguration.setProperty("bm25.b", "0.75"); queryConfiguration.setProperty("index.tree", "true"); QueryFilter queryFilter = new QueryFilter( org.apache.lucene.queryParser.QueryParser.parse( "true", Config.S_HAS_TIMEXES, new LgteNothingAnalyzer())); LgteQuery lgteQuery = LgteQueryParser.parseQuery( "sentences:word2", new LgteNothingAnalyzer(), searcher, queryConfiguration); LgteHits lgteHits = searcher.search(lgteQuery, queryFilter); assertEquals(lgteHits.length(), 1); assertEquals(lgteHits.id(0), 1); TermsFilter termsFilter = new TermsFilter(); termsFilter.addTerm(new Term(Config.S_HAS_TIMEXES, "true")); lgteHits = searcher.search(lgteQuery, termsFilter); assertEquals(lgteHits.length(), 1); assertEquals(lgteHits.id(0), 1); termsFilter = new TermsFilter(); termsFilter.addTerm(new Term(Config.S_HAS_TIMEXES + "_sentences", "true")); lgteHits = searcher.search(lgteQuery, termsFilter); assertEquals(lgteHits.length(), 1); assertEquals(lgteHits.id(0), 2); searcher.close(); Files.delDirsE(path + "Contents"); Files.delDirsE(path + "Sentences"); }