@Override public Facet facet() { TShortIntHashMap facets = aggregator.facets(); if (facets.isEmpty()) { pushFacets(facets); return new InternalShortTermsFacet( facetName, comparatorType, size, ImmutableList.<InternalShortTermsFacet.ShortEntry>of(), aggregator.missing()); } else { // we need to fetch facets of "size * numberOfShards" because of problems in how they are // distributed across shards BoundedTreeSet<InternalShortTermsFacet.ShortEntry> ordered = new BoundedTreeSet<InternalShortTermsFacet.ShortEntry>( comparatorType.comparator(), size * numberOfShards); for (TShortIntIterator it = facets.iterator(); it.hasNext(); ) { it.advance(); ordered.add(new InternalShortTermsFacet.ShortEntry(it.key(), it.value())); } pushFacets(facets); return new InternalShortTermsFacet( facetName, comparatorType, size, ordered, aggregator.missing()); } }
@Override public InternalFacet buildFacet(String facetName) { if (facets.v().isEmpty()) { facets.close(); return new InternalLongTermsFacet( facetName, comparatorType, size, ImmutableList.<InternalLongTermsFacet.LongEntry>of(), missing, total); } else { LongIntOpenHashMap facetEntries = facets.v(); final boolean[] states = facets.v().allocated; final long[] keys = facets.v().keys; final int[] values = facets.v().values; if (size < EntryPriorityQueue.LIMIT) { EntryPriorityQueue ordered = new EntryPriorityQueue(shardSize, comparatorType.comparator()); for (int i = 0; i < states.length; i++) { if (states[i]) { ordered.insertWithOverflow(new InternalLongTermsFacet.LongEntry(keys[i], values[i])); } } InternalLongTermsFacet.LongEntry[] list = new InternalLongTermsFacet.LongEntry[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; i--) { list[i] = (InternalLongTermsFacet.LongEntry) ordered.pop(); } facets.close(); return new InternalLongTermsFacet( facetName, comparatorType, size, Arrays.asList(list), missing, total); } else { BoundedTreeSet<InternalLongTermsFacet.LongEntry> ordered = new BoundedTreeSet<>(comparatorType.comparator(), shardSize); for (int i = 0; i < states.length; i++) { if (states[i]) { ordered.add(new InternalLongTermsFacet.LongEntry(keys[i], values[i])); } } facets.close(); return new InternalLongTermsFacet(facetName, comparatorType, size, ordered, missing, total); } } }
@Override public Facet facet() { TIntIntHashMap facets = aggregator.facets(); if (facets.isEmpty()) { CacheRecycler.pushIntIntMap(facets); return new InternalIntTermsFacet( facetName, comparatorType, size, ImmutableList.<InternalIntTermsFacet.IntEntry>of(), aggregator.missing()); } else { if (size < EntryPriorityQueue.LIMIT) { EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); for (TIntIntIterator it = facets.iterator(); it.hasNext(); ) { it.advance(); ordered.insertWithOverflow(new InternalIntTermsFacet.IntEntry(it.key(), it.value())); } InternalIntTermsFacet.IntEntry[] list = new InternalIntTermsFacet.IntEntry[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; i--) { list[i] = (InternalIntTermsFacet.IntEntry) ordered.pop(); } CacheRecycler.pushIntIntMap(facets); return new InternalIntTermsFacet( facetName, comparatorType, size, Arrays.asList(list), aggregator.missing()); } else { BoundedTreeSet<InternalIntTermsFacet.IntEntry> ordered = new BoundedTreeSet<InternalIntTermsFacet.IntEntry>(comparatorType.comparator(), size); for (TIntIntIterator it = facets.iterator(); it.hasNext(); ) { it.advance(); ordered.add(new InternalIntTermsFacet.IntEntry(it.key(), it.value())); } CacheRecycler.pushIntIntMap(facets); return new InternalIntTermsFacet( facetName, comparatorType, size, ordered, aggregator.missing()); } } }
@Override public Facet facet() { if (current != null) { missing += current.counts[0]; total += current.total - current.counts[0]; // if we have values for this one, add it if (current.values.ordinals().getNumOrds() > 1) { aggregators.add(current); } } AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size()); for (ReaderAggregator aggregator : aggregators) { if (aggregator.nextPosition()) { queue.add(aggregator); } } // YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes if (size < EntryPriorityQueue.LIMIT) { // optimize to use priority size EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); while (queue.size() > 0) { ReaderAggregator agg = queue.top(); BytesRef value = agg.values.makeSafe( agg.current); // we need to makeSafe it, since we end up pushing it... (can we get // around this?) int count = 0; do { count += agg.counts[agg.position]; if (agg.nextPosition()) { agg = queue.updateTop(); } else { // we are done with this reader queue.pop(); agg = queue.top(); } } while (agg != null && value.equals(agg.current)); if (count > minCount) { if (excluded != null && excluded.contains(value)) { continue; } // LUCENE 4 UPGRADE: use Lucene's RegexCapabilities if (matcher != null && !matcher.reset(value.utf8ToString()).matches()) { continue; } InternalStringTermsFacet.TermEntry entry = new InternalStringTermsFacet.TermEntry(value, count); ordered.insertWithOverflow(entry); } } InternalStringTermsFacet.TermEntry[] list = new InternalStringTermsFacet.TermEntry[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; i--) { list[i] = (InternalStringTermsFacet.TermEntry) ordered.pop(); } for (ReaderAggregator aggregator : aggregators) { CacheRecycler.pushIntArray(aggregator.counts); } return new InternalStringTermsFacet( facetName, comparatorType, size, Arrays.asList(list), missing, total); } BoundedTreeSet<InternalStringTermsFacet.TermEntry> ordered = new BoundedTreeSet<InternalStringTermsFacet.TermEntry>(comparatorType.comparator(), size); while (queue.size() > 0) { ReaderAggregator agg = queue.top(); BytesRef value = agg.values.makeSafe( agg.current); // we need to makeSafe it, since we end up pushing it... (can we work // around that?) int count = 0; do { count += agg.counts[agg.position]; if (agg.nextPosition()) { agg = queue.updateTop(); } else { // we are done with this reader queue.pop(); agg = queue.top(); } } while (agg != null && value.equals(agg.current)); if (count > minCount) { if (excluded != null && excluded.contains(value)) { continue; } // LUCENE 4 UPGRADE: use Lucene's RegexCapabilities if (matcher != null && !matcher.reset(value.utf8ToString()).matches()) { continue; } InternalStringTermsFacet.TermEntry entry = new InternalStringTermsFacet.TermEntry(value, count); ordered.add(entry); } } for (ReaderAggregator aggregator : aggregators) { CacheRecycler.pushIntArray(aggregator.counts); } return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, missing, total); }
@Override public Facet facet() { if (current != null) { missing += current.counts[0]; total += current.total - current.counts[0]; // if we have values for this one, add it if (current.values.length > 1) { aggregators.add(current); } } AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size()); for (ReaderAggregator aggregator : aggregators) { if (aggregator.nextPosition()) { queue.add(aggregator); } } // YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes if (size < EntryPriorityQueue.LIMIT) { // optimize to use priority size EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); while (queue.size() > 0) { ReaderAggregator agg = queue.top(); short value = agg.current; int count = 0; do { count += agg.counts[agg.position]; if (agg.nextPosition()) { agg = queue.updateTop(); } else { // we are done with this reader queue.pop(); agg = queue.top(); } } while (agg != null && value == agg.current); if (count > minCount) { if (excluded == null || !excluded.contains(value)) { InternalShortTermsFacet.ShortEntry entry = new InternalShortTermsFacet.ShortEntry(value, count); ordered.insertWithOverflow(entry); } } } InternalShortTermsFacet.ShortEntry[] list = new InternalShortTermsFacet.ShortEntry[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; i--) { list[i] = (InternalShortTermsFacet.ShortEntry) ordered.pop(); } for (ReaderAggregator aggregator : aggregators) { CacheRecycler.pushIntArray(aggregator.counts); } return new InternalShortTermsFacet( facetName, comparatorType, size, Arrays.asList(list), missing, total); } BoundedTreeSet<InternalShortTermsFacet.ShortEntry> ordered = new BoundedTreeSet<InternalShortTermsFacet.ShortEntry>(comparatorType.comparator(), size); while (queue.size() > 0) { ReaderAggregator agg = queue.top(); short value = agg.current; int count = 0; do { count += agg.counts[agg.position]; if (agg.nextPosition()) { agg = queue.updateTop(); } else { // we are done with this reader queue.pop(); agg = queue.top(); } } while (agg != null && value == agg.current); if (count > minCount) { if (excluded == null || !excluded.contains(value)) { InternalShortTermsFacet.ShortEntry entry = new InternalShortTermsFacet.ShortEntry(value, count); ordered.add(entry); } } } for (ReaderAggregator aggregator : aggregators) { CacheRecycler.pushIntArray(aggregator.counts); } return new InternalShortTermsFacet(facetName, comparatorType, size, ordered, missing, total); }
@Test @Slow public void testTermFacet_stringFields() throws Throwable { prepareCreate("test") .addMapping( "type1", jsonBuilder() .startObject() .startObject("type1") .startObject("properties") .startObject("field1_paged") .field("type", "string") .field("index", "not_analyzed") .startObject("fielddata") .field("format", "paged_bytes") .endObject() .endObject() .startObject("field1_fst") .field("type", "string") .field("index", "not_analyzed") .startObject("fielddata") .field("format", "fst") .endObject() .endObject() .startObject("field2") .field("type", "string") .field("index", "not_analyzed") .startObject("fielddata") .field("format", "fst") .endObject() .endObject() .startObject("q_field") .field("type", "string") .field("index", "not_analyzed") .endObject() .endObject() .endObject() .endObject()) .execute() .actionGet(); Random random = getRandom(); int numOfQueryValues = 50; String[] queryValues = new String[numOfQueryValues]; for (int i = 0; i < numOfQueryValues; i++) { queryValues[i] = randomAsciiOfLength(5); } Set<String> uniqueValuesSet = new HashSet<String>(); int numOfVals = 400; for (int i = 0; i < numOfVals; i++) { uniqueValuesSet.add(randomAsciiOfLength(10)); } String[] allUniqueFieldValues = uniqueValuesSet.toArray(new String[uniqueValuesSet.size()]); Set<String> allField1Values = new HashSet<String>(); Set<String> allField1AndField2Values = new HashSet<String>(); Map<String, Map<String, Integer>> queryValToField1FacetEntries = new HashMap<String, Map<String, Integer>>(); Map<String, Map<String, Integer>> queryValToField1and2FacetEntries = new HashMap<String, Map<String, Integer>>(); for (int i = 1; i <= numDocs(); i++) { int numField1Values = random.nextInt(17); Set<String> field1Values = new HashSet<String>(numField1Values); for (int j = 0; j <= numField1Values; j++) { boolean added = false; while (!added) { added = field1Values.add(allUniqueFieldValues[random.nextInt(numOfVals)]); } } allField1Values.addAll(field1Values); allField1AndField2Values.addAll(field1Values); String field2Val = allUniqueFieldValues[random.nextInt(numOfVals)]; allField1AndField2Values.add(field2Val); String queryVal = queryValues[random.nextInt(numOfQueryValues)]; client() .prepareIndex("test", "type1", Integer.toString(i)) .setSource( jsonBuilder() .startObject() .field("field1_paged", field1Values) .field("field1_fst", field1Values) .field("field2", field2Val) .field("q_field", queryVal) .endObject()) .execute() .actionGet(); if (random.nextInt(2000) == 854) { client().admin().indices().prepareFlush("test").execute().actionGet(); } addControlValues(queryValToField1FacetEntries, field1Values, queryVal); addControlValues(queryValToField1and2FacetEntries, field1Values, queryVal); addControlValues(queryValToField1and2FacetEntries, field2Val, queryVal); } client().admin().indices().prepareRefresh().execute().actionGet(); String[] facetFields = new String[] {"field1_paged", "field1_fst"}; TermsFacet.ComparatorType[] compTypes = TermsFacet.ComparatorType.values(); for (String facetField : facetFields) { for (String queryVal : queryValToField1FacetEntries.keySet()) { Set<String> allFieldValues; Map<String, Integer> queryControlFacets; TermsFacet.ComparatorType compType = compTypes[random.nextInt(compTypes.length)]; TermsFacetBuilder termsFacetBuilder = FacetBuilders.termsFacet("facet1").order(compType); boolean useFields; if (random.nextInt(4) == 3) { useFields = true; queryControlFacets = queryValToField1and2FacetEntries.get(queryVal); allFieldValues = allField1AndField2Values; termsFacetBuilder.fields(facetField, "field2"); } else { queryControlFacets = queryValToField1FacetEntries.get(queryVal); allFieldValues = allField1Values; useFields = false; termsFacetBuilder.field(facetField); } int size; if (numberOfShards() == 1 || compType == TermsFacet.ComparatorType.TERM || compType == TermsFacet.ComparatorType.REVERSE_TERM) { size = random.nextInt(queryControlFacets.size()); } else { size = allFieldValues.size(); } termsFacetBuilder.size(size); if (random.nextBoolean()) { termsFacetBuilder.executionHint("map"); } List<String> excludes = new ArrayList<String>(); if (random.nextBoolean()) { int numExcludes = random.nextInt(5) + 1; List<String> facetValues = new ArrayList<String>(queryControlFacets.keySet()); for (int i = 0; i < numExcludes; i++) { excludes.add(facetValues.get(random.nextInt(facetValues.size()))); } termsFacetBuilder.exclude(excludes.toArray()); } String regex = null; if (random.nextBoolean()) { List<String> facetValues = new ArrayList<String>(queryControlFacets.keySet()); regex = facetValues.get(random.nextInt(facetValues.size())); regex = "^" + regex.substring(0, regex.length() / 2) + ".*"; termsFacetBuilder.regex(regex); } boolean allTerms = random.nextInt(10) == 3; termsFacetBuilder.allTerms(allTerms); SearchResponse response = client() .prepareSearch("test") .setQuery(QueryBuilders.termQuery("q_field", queryVal)) .addFacet(termsFacetBuilder) .execute() .actionGet(); TermsFacet actualFacetEntries = response.getFacets().facet("facet1"); List<Tuple<Text, Integer>> expectedFacetEntries = getExpectedFacetEntries( allFieldValues, queryControlFacets, size, compType, excludes, regex, allTerms); String reason = String.format( Locale.ROOT, "query: [%s] field: [%s] size: [%d] order: [%s] all_terms: [%s] fields: [%s] regex: [%s] excludes: [%s]", queryVal, facetField, size, compType, allTerms, useFields, regex, excludes); assertThat( reason, actualFacetEntries.getEntries().size(), equalTo(expectedFacetEntries.size())); for (int i = 0; i < expectedFacetEntries.size(); i++) { assertThat( reason, actualFacetEntries.getEntries().get(i).getTerm(), equalTo(expectedFacetEntries.get(i).v1())); assertThat( reason, actualFacetEntries.getEntries().get(i).getCount(), equalTo(expectedFacetEntries.get(i).v2())); } } } }