private Path dumpSearchResults(User user, Request request) throws ParseException, IOException, FileNotFoundException { user = userService.validate(user); Query query = searchService.getQuery(request); IndexSearcher searcher = getSearcher(request, user); Filter f = NumericRangeFilter.newLongRange( MailSchemaField.date.name(), request.startTime, request.endTime, true, true); TopDocs docs = searcher.search(query, f, request.scanSize); Path tempFile = Files.createTempFile(format("kw-%s-%s", user.id, request.keywordField), ".mailytics.temp"); PrintWriter writer = new PrintWriter(tempFile.toFile()); TIntHashSet dupSet = new TIntHashSet(); for (ScoreDoc sd : docs.scoreDocs) { Document doc = searcher.doc(sd.doc); String string = doc.get(request.keywordField.name()); int hash = string.hashCode(); if (!dupSet.contains(hash)) { writer.println(string); dupSet.add(hash); } } dupSet.clear(); writer.close(); return tempFile; }
@Override public Filter getFilter(Element e) throws ParserException { String field = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String lowerTerm = DOMUtils.getAttributeOrFail(e, "lowerTerm"); String upperTerm = DOMUtils.getAttributeOrFail(e, "upperTerm"); boolean lowerInclusive = DOMUtils.getAttribute(e, "includeLower", true); boolean upperInclusive = DOMUtils.getAttribute(e, "includeUpper", true); int precisionStep = DOMUtils.getAttribute(e, "precisionStep", NumericUtils.PRECISION_STEP_DEFAULT); String type = DOMUtils.getAttribute(e, "type", "int"); try { Filter filter; if (type.equalsIgnoreCase("int")) { filter = NumericRangeFilter.newIntRange( field, precisionStep, Integer.valueOf(lowerTerm), Integer.valueOf(upperTerm), lowerInclusive, upperInclusive); } else if (type.equalsIgnoreCase("long")) { filter = NumericRangeFilter.newLongRange( field, precisionStep, Long.valueOf(lowerTerm), Long.valueOf(upperTerm), lowerInclusive, upperInclusive); } else if (type.equalsIgnoreCase("double")) { filter = NumericRangeFilter.newDoubleRange( field, precisionStep, Double.valueOf(lowerTerm), Double.valueOf(upperTerm), lowerInclusive, upperInclusive); } else if (type.equalsIgnoreCase("float")) { filter = NumericRangeFilter.newFloatRange( field, precisionStep, Float.valueOf(lowerTerm), Float.valueOf(upperTerm), lowerInclusive, upperInclusive); } else { throw new ParserException("type attribute must be one of: [long, int, double, float]"); } return filter; } catch (NumberFormatException nfe) { if (strictMode) { throw new ParserException("Could not parse lowerTerm or upperTerm into a number", nfe); } return NO_MATCH_FILTER; } }
@Override public Filter nullValueFilter() { if (nullValue == null) { return null; } return NumericRangeFilter.newLongRange( names.indexName(), precisionStep, nullValue, nullValue, true, true); }
public void testInverseRange() throws Exception { NumericRangeFilter<Long> f = NumericRangeFilter.newLongRange("field8", 8, 1000L, -1000L, true, true); assertSame( "A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); f = NumericRangeFilter.newLongRange("field8", 8, Long.MAX_VALUE, null, false, false); assertSame( "A exclusive range starting with Long.MAX_VALUE should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); f = NumericRangeFilter.newLongRange("field8", 8, null, Long.MIN_VALUE, false, false); assertSame( "A exclusive range ending with Long.MIN_VALUE should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(searcher.getIndexReader())); }
@Override public Filter nullValueFilter() { if (nullValue == null) { return null; } long value = parseStringValue(nullValue); return NumericRangeFilter.newLongRange( names.indexName(), precisionStep, value, value, true, true); }
/** * test for constant score + boolean query + filter, the other tests only use the constant score * mode */ private void testRange(int precisionStep) throws Exception { String field = "field" + precisionStep; int count = 3000; long lower = (distance * 3 / 2) + startOffset, upper = lower + count * distance + (distance / 3); NumericRangeQuery<Long> q = NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true); NumericRangeFilter<Long> f = NumericRangeFilter.newLongRange(field, precisionStep, lower, upper, true, true); int lastTerms = 0; for (byte i = 0; i < 3; i++) { TopDocs topDocs; int terms; String type; q.clearTotalNumberOfTerms(); f.clearTotalNumberOfTerms(); switch (i) { case 0: type = " (constant score filter rewrite)"; q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); terms = q.getTotalNumberOfTerms(); break; case 1: type = " (constant score boolean rewrite)"; q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); terms = q.getTotalNumberOfTerms(); break; case 2: type = " (filter)"; topDocs = searcher.search(new MatchAllDocsQuery(), f, noDocs, Sort.INDEXORDER); terms = f.getTotalNumberOfTerms(); break; default: return; } if (VERBOSE) System.out.println( "Found " + terms + " distinct terms in range for field '" + field + "'" + type + "."); ScoreDoc[] sd = topDocs.scoreDocs; assertNotNull(sd); assertEquals("Score doc count" + type, count, sd.length); Document doc = searcher.doc(sd[0].doc); assertEquals("First doc" + type, 2 * distance + startOffset, Long.parseLong(doc.get(field))); doc = searcher.doc(sd[sd.length - 1].doc); assertEquals( "Last doc" + type, (1 + count) * distance + startOffset, Long.parseLong(doc.get(field))); if (i > 0 && searcher.getIndexReader().getSequentialSubReaders().length == 1) { assertEquals("Distinct term number is equal for all query types", lastTerms, terms); } lastTerms = terms; } }
@Override public Filter rangeFilter( Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) { return NumericRangeFilter.newLongRange( names.indexName(), precisionStep, lowerTerm == null ? null : parseValue(lowerTerm), upperTerm == null ? null : parseValue(upperTerm), includeLower, includeUpper); }
@Override public Filter termFilter(Object value, @Nullable QueryParseContext context) { long iValue = parseValue(value); return NumericRangeFilter.newLongRange( names.indexName(), precisionStep, iValue, iValue, true, true); }
/** * For the given query, returns the requested page of results. This method should not be called * anymore after {@link #shutdown()} has been called, otherwise an IOException will be thrown. */ @NotNull @ThreadSafe public ResultPage search(@NotNull WebQuery webQuery) throws IOException, SearchException, CheckedOutOfMemoryError { Util.checkNotNull(webQuery); if (ioException != null) throw ioException; List<Filter> filters = new ArrayList<Filter>(3); // Add size filter to filter chain if (webQuery.minSize != null || webQuery.maxSize != null) { filters.add( NumericRangeFilter.newLongRange( Fields.SIZE.key(), webQuery.minSize, webQuery.maxSize, true, true)); } // Add type filter to filter chain if (webQuery.parsers != null) { TermsFilter typeFilter = new TermsFilter(); String fieldName = Fields.PARSER.key(); typeFilter.addTerm(new Term(fieldName, Fields.EMAIL_PARSER)); for (Parser parser : webQuery.parsers) { String parserName = parser.getClass().getSimpleName(); typeFilter.addTerm(new Term(fieldName, parserName)); } filters.add(typeFilter); } // Add location filter to filter chain if (webQuery.indexes != null) { Filter[] indexFilters = new Filter[webQuery.indexes.size()]; int i = 0; for (LuceneIndex index : webQuery.indexes) { Path path = index.getRootFolder().getPath(); String uid = index.getDocumentType().createUniqueId(path); Term prefix = new Term(Fields.UID.key(), uid + "/"); indexFilters[i++] = new PrefixFilter(prefix); } filters.add(new ChainedFilter(indexFilters, ChainedFilter.OR)); } // Construct filter chain Filter filter = filters.size() == 0 ? null : new ChainedFilter(filters.toArray(new Filter[filters.size()]), ChainedFilter.AND); // Create query QueryWrapper queryWrapper = createQuery(webQuery.query); Query query = queryWrapper.query; boolean isPhraseQuery = queryWrapper.isPhraseQuery; readLock.lock(); try { checkIndexesExist(); // Perform search; might throw OutOfMemoryError int maxResults = (webQuery.pageIndex + 1) * PAGE_SIZE; TopDocs topDocs = luceneSearcher.search(query, filter, maxResults); ScoreDoc[] scoreDocs = topDocs.scoreDocs; // Compute start and end indices of returned page int start; int end = scoreDocs.length; if (end <= PAGE_SIZE) { start = 0; } else { int r = end % PAGE_SIZE; start = end - (r == 0 ? PAGE_SIZE : r); } // Create and fill list of result documents to return ResultDocument[] results = new ResultDocument[end - start]; for (int i = start; i < end; i++) { Document doc = luceneSearcher.doc(scoreDocs[i].doc); float score = scoreDocs[i].score; LuceneIndex index = indexes.get(luceneSearcher.subSearcher(i)); IndexingConfig config = index.getConfig(); results[i - start] = new ResultDocument( doc, score, query, isPhraseQuery, config, fileFactory, outlookMailFactory); } int hitCount = topDocs.totalHits; int newPageIndex = start / PAGE_SIZE; int pageCount = (int) Math.ceil((float) hitCount / PAGE_SIZE); return new ResultPage(Arrays.asList(results), newPageIndex, pageCount, hitCount); } catch (IllegalArgumentException e) { throw wrapEmptyIndexException(e); } catch (OutOfMemoryError e) { throw new CheckedOutOfMemoryError(e); } finally { readLock.unlock(); } }
private Filter innerRangeFilter( IndexNumericFieldData fieldData, Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, @Nullable DateTimeZone timeZone, @Nullable DateMathParser forcedDateParser, @Nullable Boolean explicitCaching) { boolean cache; boolean cacheable = true; Long lowerVal = null; Long upperVal = null; if (lowerTerm != null) { if (lowerTerm instanceof Number) { lowerVal = ((Number) lowerTerm).longValue(); } else { String value = convertToString(lowerTerm); cacheable = !hasDateExpressionWithNoRounding(value); lowerVal = parseToMilliseconds(value, false, timeZone, forcedDateParser); } } if (upperTerm != null) { if (upperTerm instanceof Number) { upperVal = ((Number) upperTerm).longValue(); } else { String value = convertToString(upperTerm); cacheable = cacheable && !hasDateExpressionWithNoRounding(value); upperVal = parseToMilliseconds(value, includeUpper, timeZone, forcedDateParser); } } if (explicitCaching != null) { if (explicitCaching) { cache = cacheable; } else { cache = false; } } else { cache = cacheable; } Filter filter; if (fieldData != null) { filter = NumericRangeFieldDataFilter.newLongRange( fieldData, lowerVal, upperVal, includeLower, includeUpper); } else { filter = NumericRangeFilter.newLongRange( names.indexName(), precisionStep, lowerVal, upperVal, includeLower, includeUpper); } if (!cache) { // We don't cache range filter if `now` date expression is used and also when a compound // filter wraps // a range filter with a `now` date expressions. return NoCacheFilter.wrap(filter); } else { return filter; } }
@Override public Filter termFilter(Object value, @Nullable QueryParseContext context) { final long lValue = parseToMilliseconds(value); return NumericRangeFilter.newLongRange( names.indexName(), precisionStep, lValue, lValue, true, true); }