@Override
    protected void doSetNextReader(LeafReaderContext context) throws IOException {
      if (segmentFacetCounts != null) {
        segmentResults.add(createSegmentResult());
      }

      groupFieldTermsIndex = DocValues.getSorted(context.reader(), groupField);
      facetFieldTermsIndex = DocValues.getSorted(context.reader(), facetField);

      // 1+ to allow for the -1 "not set":
      segmentFacetCounts = new int[facetFieldTermsIndex.getValueCount() + 1];
      segmentTotalCount = 0;

      segmentGroupedFacetHits.clear();
      for (GroupedFacetHit groupedFacetHit : groupedFacetHits) {
        int facetOrd =
            groupedFacetHit.facetValue == null
                ? -1
                : facetFieldTermsIndex.lookupTerm(groupedFacetHit.facetValue);
        if (groupedFacetHit.facetValue != null && facetOrd < 0) {
          continue;
        }

        int groupOrd =
            groupedFacetHit.groupValue == null
                ? -1
                : groupFieldTermsIndex.lookupTerm(groupedFacetHit.groupValue);
        if (groupedFacetHit.groupValue != null && groupOrd < 0) {
          continue;
        }

        int segmentGroupedFacetsIndex =
            groupOrd * (facetFieldTermsIndex.getValueCount() + 1) + facetOrd;
        segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
      }

      if (facetPrefix != null) {
        startFacetOrd = facetFieldTermsIndex.lookupTerm(facetPrefix);
        if (startFacetOrd < 0) {
          // Points to the ord one higher than facetPrefix
          startFacetOrd = -startFacetOrd - 1;
        }
        BytesRefBuilder facetEndPrefix = new BytesRefBuilder();
        facetEndPrefix.append(facetPrefix);
        facetEndPrefix.append(UnicodeUtil.BIG_TERM);
        endFacetOrd = facetFieldTermsIndex.lookupTerm(facetEndPrefix.get());
        assert endFacetOrd < 0;
        endFacetOrd = -endFacetOrd - 1; // Points to the ord one higher than facetEndPrefix
      } else {
        startFacetOrd = -1;
        endFacetOrd = facetFieldTermsIndex.getValueCount();
      }
    }
  @Test
  public void testSvValues() throws IOException {
    int numDocs = 1000000;
    int numOrdinals = numDocs / 4;
    Map<Integer, Long> controlDocToOrdinal = new HashMap<>();
    OrdinalsBuilder builder = new OrdinalsBuilder(numDocs);
    long ordinal = builder.currentOrdinal();
    for (int doc = 0; doc < numDocs; doc++) {
      if (doc % numOrdinals == 0) {
        ordinal = builder.nextOrdinal();
      }
      controlDocToOrdinal.put(doc, ordinal);
      builder.addDoc(doc);
    }

    Ordinals ords = builder.build(ImmutableSettings.EMPTY);
    assertThat(ords, instanceOf(SinglePackedOrdinals.class));
    RandomAccessOrds docs = ords.ordinals();
    final SortedDocValues singleOrds = DocValues.unwrapSingleton(docs);
    assertNotNull(singleOrds);

    for (Map.Entry<Integer, Long> entry : controlDocToOrdinal.entrySet()) {
      assertThat(entry.getValue(), equalTo((long) singleOrds.getOrd(entry.getKey())));
    }
  }
 @Override
 public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
   final Bits docsWithField = DocValues.getDocsWithField(context.reader(), field);
   if (negate) {
     if (docsWithField instanceof MatchAllBits) {
       return null;
     }
     return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
       @Override
       protected final boolean matchDoc(int doc) {
         return !docsWithField.get(doc);
       }
     };
   } else {
     if (docsWithField instanceof MatchNoBits) {
       return null;
     }
     if (docsWithField instanceof BitSet) {
       // UweSays: this is always the case for our current impl - but who knows
       // :-)
       return BitsFilteredDocIdSet.wrap(new BitDocIdSet((BitSet) docsWithField), acceptDocs);
     }
     return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
       @Override
       protected final boolean matchDoc(int doc) {
         return docsWithField.get(doc);
       }
     };
   }
 }
Example #4
0
 /** Returns a SortedSetDocValues view of this instance */
 public SortedSetDocValues iterator(LeafReader reader) throws IOException {
   if (isEmpty()) {
     return DocValues.emptySortedSet();
   } else {
     return new Iterator(reader);
   }
 }
  private static SortedNumericDocValues sparseSingles(
      final NumericDocValues deltas,
      final long minValue,
      final long missingValue,
      final int maxDoc) {
    final NumericDocValues values =
        new NumericDocValues() {
          @Override
          public long get(int docID) {
            final long delta = deltas.get(docID);
            if (delta == missingValue) {
              return 0;
            }
            return minValue + delta;
          }
        };
    final Bits docsWithFields =
        new Bits() {
          @Override
          public boolean get(int index) {
            return deltas.get(index) != missingValue;
          }

          @Override
          public int length() {
            return maxDoc;
          }
        };
    return DocValues.singleton(values, docsWithFields);
  }
 @Override
 public Bits getDocsWithField(FieldInfo field) throws IOException {
   if (field.getDocValuesType() == DocValuesType.SORTED_SET) {
     return DocValues.docsWithValue(getSortedSet(field), maxDoc);
   } else {
     return new Bits.MatchAllBits(maxDoc);
   }
 }
  @Override
  public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
    final NumericDocValues arr = DocValues.getNumeric(readerContext.reader(), field);
    final Bits valid = DocValues.getDocsWithField(readerContext.reader(), field);
    return new LongDocValues(this) {
      @Override
      public long longVal(int doc) {
        return arr.get(doc);
      }

      @Override
      public boolean exists(int doc) {
        return valid.get(doc);
      }

      @Override
      public Object objectVal(int doc) {
        return exists(doc) ? longToObject(arr.get(doc)) : null;
      }

      @Override
      public String strVal(int doc) {
        return exists(doc) ? longToString(arr.get(doc)) : null;
      }

      @Override
      public ValueFiller getValueFiller() {
        return new ValueFiller() {
          private final MutableValueDate mval = new MutableValueDate();

          @Override
          public MutableValue getValue() {
            return mval;
          }

          @Override
          public void fillValue(int doc) {
            mval.value = arr.get(doc);
            mval.exists = exists(doc);
          }
        };
      }
    };
  }
 @Override
 public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
   LeafReader reader = context.reader();
   FieldInfo info = reader.getFieldInfos().fieldInfo(field);
   if (info != null) {
     Geo3DDocValuesField.checkCompatible(info);
   }
   currentDocs = DocValues.getSortedNumeric(reader, field);
   return this;
 }
    @Override
    public MultiGeoPointValues getGeoPointValues() {
      final RandomAccessOrds ords = ordinals.ordinals();
      final SortedDocValues singleOrds = DocValues.unwrapSingleton(ords);
      if (singleOrds != null) {
        final GeoPoint point = new GeoPoint();
        final GeoPointValues values =
            new GeoPointValues() {
              @Override
              public GeoPoint get(int docID) {
                final int ord = singleOrds.getOrd(docID);
                if (ord >= 0) {
                  return point.reset(lat.get(ord), lon.get(ord));
                }
                return point.reset(Double.NaN, Double.NaN);
              }
            };
        return FieldData.singleton(values, DocValues.docsWithValue(singleOrds, maxDoc));
      } else {
        final GeoPoint point = new GeoPoint();
        return new MultiGeoPointValues() {

          @Override
          public GeoPoint valueAt(int index) {
            final long ord = ords.ordAt(index);
            if (ord >= 0) {
              return point.reset(lat.get(ord), lon.get(ord));
            }
            return point.reset(Double.NaN, Double.NaN);
          }

          @Override
          public void setDocument(int docId) {
            ords.setDocument(docId);
          }

          @Override
          public int count() {
            return ords.cardinality();
          }
        };
      }
    }
 @Override
 public Explanation explain(LeafReaderContext context, int doc) throws IOException {
   SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
   if (values != null) {
     int segmentOrd = values.getOrd(doc);
     if (segmentOrd != -1) {
       BytesRef joinValue = values.lookupOrd(segmentOrd);
       return Explanation.match(
           queryNorm, "Score based on join value " + joinValue.utf8ToString());
     }
   }
   return Explanation.noMatch("Not a match");
 }
  private static SortedNumericDocValues withOrdinals(
      Ordinals ordinals, final LongValues values, int maxDoc) {
    final RandomAccessOrds ords = ordinals.ordinals();
    final SortedDocValues singleOrds = DocValues.unwrapSingleton(ords);
    if (singleOrds != null) {
      final NumericDocValues singleValues =
          new NumericDocValues() {
            @Override
            public long get(int docID) {
              final int ord = singleOrds.getOrd(docID);
              if (ord >= 0) {
                return values.get(singleOrds.getOrd(docID));
              } else {
                return 0;
              }
            }
          };
      return DocValues.singleton(singleValues, DocValues.docsWithValue(ords, maxDoc));
    } else {
      return new SortedNumericDocValues() {
        @Override
        public long valueAt(int index) {
          return values.get(ords.ordAt(index));
        }

        @Override
        public void setDocument(int doc) {
          ords.setDocument(doc);
        }

        @Override
        public int count() {
          return ords.cardinality();
        }
      };
    }
  }
 private static SortedNumericDocValues pagedSingles(
     final PackedLongValues values, final Bits docsWithValue) {
   return DocValues.singleton(
       new NumericDocValues() {
         // we need to wrap since NumericDocValues must return 0 when a doc has no value
         @Override
         public long get(int docID) {
           if (docsWithValue == null || docsWithValue.get(docID)) {
             return values.get(docID);
           } else {
             return 0;
           }
         }
       },
       docsWithValue);
 }
 private static SortedNumericDocValues singles(
     final NumericDocValues deltas, final long minValue) {
   final NumericDocValues values;
   if (minValue == 0) {
     values = deltas;
   } else {
     values =
         new NumericDocValues() {
           @Override
           public long get(int docID) {
             return minValue + deltas.get(docID);
           }
         };
   }
   return DocValues.singleton(values, null);
 }
    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
      if (values == null) {
        return null;
      }

      Scorer approximationScorer = approximationWeight.scorer(context);
      if (approximationScorer == null) {
        return null;
      } else if (globalOrds != null) {
        return new OrdinalMapScorer(
            this, collector, values, approximationScorer, globalOrds.getGlobalOrds(context.ord));
      } else {
        return new SegmentOrdinalScorer(this, collector, values, approximationScorer);
      }
    }
  @Override
  protected void doSetNextReader(LeafReaderContext context) throws IOException {
    index = DocValues.getSorted(context.reader(), groupField);

    // Clear ordSet and fill it with previous encountered groups that can occur in the current
    // segment.
    ordSet.clear();
    for (BytesRef countedGroup : groups) {
      if (countedGroup == null) {
        ordSet.put(-1);
      } else {
        int ord = index.lookupTerm(countedGroup);
        if (ord >= 0) {
          ordSet.put(ord);
        }
      }
    }
  }
    @Override
    public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
      final int base = context.docBase;
      final NumericDocValues values = DocValues.getNumeric(context.reader(), "sort_i");
      return new LeafCollector() {

        @Override
        public void setScorer(Scorer scorer) throws IOException {}

        public boolean acceptsDocsOutOfOrder() {
          return false;
        }

        public void collect(int doc) {
          list.add(new ScoreDoc(doc + base, (float) values.get(doc)));
        }
      };
    }
 @Override
 public Explanation explain(LeafReaderContext context, int doc) throws IOException {
   SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
   if (values != null) {
     int segmentOrd = values.getOrd(doc);
     if (segmentOrd != -1) {
       final float score;
       if (globalOrds != null) {
         long globalOrd = globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
         score = collector.score((int) globalOrd);
       } else {
         score = collector.score(segmentOrd);
       }
       BytesRef joinValue = values.lookupOrd(segmentOrd);
       return Explanation.match(score, "Score based on join value " + joinValue.utf8ToString());
     }
   }
   return Explanation.noMatch("Not a match");
 }
  @Override
  public AtomicNumericFieldData load(LeafReaderContext context) {
    try {
      final BinaryDocValues values = DocValues.getBinary(context.reader(), fieldNames.indexName());
      if (numericType.isFloatingPoint()) {
        return new AtomicDoubleFieldData(-1) {

          @Override
          public SortedNumericDoubleValues getDoubleValues() {
            switch (numericType) {
              case FLOAT:
                return new BinaryAsSortedNumericFloatValues(values);
              case DOUBLE:
                return new BinaryAsSortedNumericDoubleValues(values);
              default:
                throw new ElasticsearchIllegalArgumentException("" + numericType);
            }
          }

          @Override
          public Collection<Accountable> getChildResources() {
            return Collections.emptyList();
          }
        };
      } else {
        return new AtomicLongFieldData(0) {

          @Override
          public SortedNumericDocValues getLongValues() {
            return new BinaryAsSortedNumericDocValues(values);
          }

          @Override
          public Collection<Accountable> getChildResources() {
            return Collections.emptyList();
          }
        };
      }
    } catch (IOException e) {
      throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
    }
  }
  @Override
  public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
    final int off = readerContext.docBase;
    final LeafReader r;
    Object o = context.get("searcher");
    if (o instanceof SolrIndexSearcher) {
      SolrIndexSearcher is = (SolrIndexSearcher) o;
      SchemaField sf = is.getSchema().getFieldOrNull(field);
      if (sf != null
          && sf.hasDocValues() == false
          && sf.multiValued() == false
          && sf.getType().getNumericType() != null) {
        // it's a single-valued numeric field: we must currently create insanity :(
        List<LeafReaderContext> leaves = is.getIndexReader().leaves();
        LeafReader insaneLeaves[] = new LeafReader[leaves.size()];
        int upto = 0;
        for (LeafReaderContext raw : leaves) {
          insaneLeaves[upto++] = Insanity.wrapInsanity(raw.reader(), field);
        }
        r = SlowCompositeReaderWrapper.wrap(new MultiReader(insaneLeaves));
      } else {
        // reuse ordinalmap
        r = ((SolrIndexSearcher) o).getLeafReader();
      }
    } else {
      IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader();
      r = SlowCompositeReaderWrapper.wrap(topReader);
    }
    // if it's e.g. tokenized/multivalued, emulate old behavior of single-valued fc
    final SortedDocValues sindex =
        SortedSetSelector.wrap(DocValues.getSortedSet(r, field), SortedSetSelector.Type.MIN);
    final int end = sindex.getValueCount();

    return new IntDocValues(this) {
      @Override
      public int intVal(int doc) {
        return (end - sindex.getOrd(doc + off) - 1);
      }
    };
  }
    @Override
    public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
      SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
      if (values == null) {
        return null;
      }

      Scorer approximationScorer = approximationWeight.scorer(context, acceptDocs);
      if (approximationScorer == null) {
        return null;
      }
      if (globalOrds != null) {
        return new OrdinalMapScorer(
            this,
            queryNorm,
            foundOrds,
            values,
            approximationScorer,
            globalOrds.getGlobalOrds(context.ord));
      }
      {
        return new SegmentOrdinalScorer(this, queryNorm, foundOrds, values, approximationScorer);
      }
    }
 public void setNextReader(LeafReaderContext context) throws IOException {
   this.vals = DocValues.getNumeric(context.reader(), this.field);
 }
    public DelegatingCollector getFilterCollector(IndexSearcher indexSearcher) {
      try {

        SolrIndexSearcher searcher = (SolrIndexSearcher) indexSearcher;

        SortedDocValues docValues = null;
        FunctionQuery funcQuery = null;
        docValues = DocValues.getSorted(searcher.getLeafReader(), this.field);

        FieldType fieldType = null;

        if (this.max != null) {
          if (this.max.indexOf("(") == -1) {
            fieldType = searcher.getSchema().getField(this.max).getType();
          } else {
            LocalSolrQueryRequest request = null;
            try {
              SolrParams params = new ModifiableSolrParams();
              request = new LocalSolrQueryRequest(searcher.getCore(), params);
              FunctionQParser functionQParser = new FunctionQParser(this.max, null, null, request);
              funcQuery = (FunctionQuery) functionQParser.parse();
            } catch (Exception e) {
              throw new IOException(e);
            } finally {
              request.close();
            }
          }
        }

        if (this.min != null) {
          if (this.min.indexOf("(") == -1) {
            fieldType = searcher.getSchema().getField(this.min).getType();
          } else {
            LocalSolrQueryRequest request = null;
            try {
              SolrParams params = new ModifiableSolrParams();
              request = new LocalSolrQueryRequest(searcher.getCore(), params);
              FunctionQParser functionQParser = new FunctionQParser(this.min, null, null, request);
              funcQuery = (FunctionQuery) functionQParser.parse();
            } catch (Exception e) {
              throw new IOException(e);
            } finally {
              request.close();
            }
          }
        }

        int maxDoc = searcher.maxDoc();
        int leafCount = searcher.getTopReaderContext().leaves().size();

        // Deal with boosted docs.
        // We have to deal with it here rather then the constructor because
        // because the QueryElevationComponent runs after the Queries are constructed.

        IntIntOpenHashMap boostDocs = null;
        Map context = null;
        SolrRequestInfo info = SolrRequestInfo.getRequestInfo();
        if (info != null) {
          context = info.getReq().getContext();
        }

        if (this.boosted == null && context != null) {
          this.boosted =
              (Map<BytesRef, Integer>) context.get(QueryElevationComponent.BOOSTED_PRIORITY);
        }

        boostDocs = getBoostDocs(searcher, this.boosted, context);

        if (this.min != null || this.max != null) {

          return new CollapsingFieldValueCollector(
              maxDoc,
              leafCount,
              docValues,
              this.nullPolicy,
              max != null ? this.max : this.min,
              max != null,
              this.needsScores,
              fieldType,
              boostDocs,
              funcQuery,
              searcher);
        } else {
          return new CollapsingScoreCollector(
              maxDoc, leafCount, docValues, this.nullPolicy, boostDocs);
        }
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    }
  @Override
  public NumericDocValues getNorms(FieldInfo field) throws IOException {
    final NormsEntry entry = norms.get(field.number);
    if (entry.docsWithFieldOffset == -2) {
      // empty
      return DocValues.emptyNumeric();
    } else if (entry.docsWithFieldOffset == -1) {
      // dense
      final LongValues normValues = getNormValues(entry);
      return new NumericDocValues() {

        int doc = -1;

        @Override
        public long longValue() throws IOException {
          return normValues.get(doc);
        }

        @Override
        public int docID() {
          return doc;
        }

        @Override
        public int nextDoc() throws IOException {
          return advance(doc + 1);
        }

        @Override
        public int advance(int target) throws IOException {
          if (target >= maxDoc) {
            return doc = NO_MORE_DOCS;
          }
          return doc = target;
        }

        @Override
        public long cost() {
          return maxDoc;
        }
      };
    } else {
      // sparse
      final LongValues normValues = getNormValues(entry);
      final SparseDISI disi;
      synchronized (data) {
        disi = new SparseDISI(maxDoc, data, entry.docsWithFieldOffset, entry.numDocsWithField);
      }
      return new NumericDocValues() {

        @Override
        public int advance(int target) throws IOException {
          return disi.advance(target);
        }

        @Override
        public int nextDoc() throws IOException {
          return disi.nextDoc();
        }

        @Override
        public int docID() {
          return disi.docID();
        }

        @Override
        public long cost() {
          return entry.numDocsWithField;
        }

        @Override
        public long longValue() throws IOException {
          return normValues.get(disi.index());
        }
      };
    }
  }
  public static NamedList<Integer> getCounts(
      SolrIndexSearcher searcher,
      DocSet docs,
      String fieldName,
      int offset,
      int limit,
      int mincount,
      boolean missing,
      String sort,
      String prefix,
      String contains,
      boolean ignoreCase)
      throws IOException {
    SchemaField schemaField = searcher.getSchema().getField(fieldName);
    FieldType ft = schemaField.getType();
    NamedList<Integer> res = new NamedList<>();

    // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
    final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();

    final SortedSetDocValues si; // for term lookups only
    OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones
    if (multiValued) {
      si = searcher.getLeafReader().getSortedSetDocValues(fieldName);
      if (si instanceof MultiSortedSetDocValues) {
        ordinalMap = ((MultiSortedSetDocValues) si).mapping;
      }
    } else {
      SortedDocValues single = searcher.getLeafReader().getSortedDocValues(fieldName);
      si = single == null ? null : DocValues.singleton(single);
      if (single instanceof MultiSortedDocValues) {
        ordinalMap = ((MultiSortedDocValues) single).mapping;
      }
    }
    if (si == null) {
      return finalize(res, searcher, schemaField, docs, -1, missing);
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
      throw new UnsupportedOperationException(
          "Currently this faceting method is limited to " + Integer.MAX_VALUE + " unique terms");
    }

    final BytesRefBuilder prefixRef;
    if (prefix == null) {
      prefixRef = null;
    } else if (prefix.length() == 0) {
      prefix = null;
      prefixRef = null;
    } else {
      prefixRef = new BytesRefBuilder();
      prefixRef.copyChars(prefix);
    }

    int startTermIndex, endTermIndex;
    if (prefix != null) {
      startTermIndex = (int) si.lookupTerm(prefixRef.get());
      if (startTermIndex < 0) startTermIndex = -startTermIndex - 1;
      prefixRef.append(UnicodeUtil.BIG_TERM);
      endTermIndex = (int) si.lookupTerm(prefixRef.get());
      assert endTermIndex < 0;
      endTermIndex = -endTermIndex - 1;
    } else {
      startTermIndex = -1;
      endTermIndex = (int) si.getValueCount();
    }

    final int nTerms = endTermIndex - startTermIndex;
    int missingCount = -1;
    final CharsRefBuilder charsRef = new CharsRefBuilder();
    if (nTerms > 0 && docs.size() >= mincount) {

      // count collection array only needs to be as big as the number of terms we are
      // going to collect counts for.
      final int[] counts = new int[nTerms];

      Filter filter = docs.getTopFilter();
      List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
      for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        LeafReaderContext leaf = leaves.get(subIndex);
        DocIdSet dis =
            filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs
        DocIdSetIterator disi = null;
        if (dis != null) {
          disi = dis.iterator();
        }
        if (disi != null) {
          if (multiValued) {
            SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
            if (sub == null) {
              sub = DocValues.emptySortedSet();
            }
            final SortedDocValues singleton = DocValues.unwrapSingleton(sub);
            if (singleton != null) {
              // some codecs may optimize SORTED_SET storage for single-valued fields
              accumSingle(counts, startTermIndex, singleton, disi, subIndex, ordinalMap);
            } else {
              accumMulti(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
            }
          } else {
            SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
            if (sub == null) {
              sub = DocValues.emptySorted();
            }
            accumSingle(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
          }
        }
      }

      if (startTermIndex == -1) {
        missingCount = counts[0];
      }

      // IDEA: we could also maintain a count of "other"... everything that fell outside
      // of the top 'N'

      int off = offset;
      int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

      if (sort.equals(FacetParams.FACET_SORT_COUNT)
          || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
        int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
        maxsize = Math.min(maxsize, nTerms);
        LongPriorityQueue queue =
            new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);

        int min = mincount - 1; // the smallest value in the top 'N' values
        for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
          int c = counts[i];
          if (contains != null) {
            final BytesRef term = si.lookupOrd(startTermIndex + i);
            if (!SimpleFacets.contains(term.utf8ToString(), contains, ignoreCase)) {
              continue;
            }
          }
          if (c > min) {
            // NOTE: we use c>min rather than c>=min as an optimization because we are going in
            // index order, so we already know that the keys are ordered.  This can be very
            // important if a lot of the counts are repeated (like zero counts would be).

            // smaller term numbers sort higher, so subtract the term number instead
            long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
            boolean displaced = queue.insert(pair);
            if (displaced) min = (int) (queue.top() >>> 32);
          }
        }

        // if we are deep paging, we don't have to order the highest "offset" counts.
        int collectCount = Math.max(0, queue.size() - off);
        assert collectCount <= lim;

        // the start and end indexes of our list "sorted" (starting with the highest value)
        int sortedIdxStart = queue.size() - (collectCount - 1);
        int sortedIdxEnd = queue.size() + 1;
        final long[] sorted = queue.sort(collectCount);

        for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
          long pair = sorted[i];
          int c = (int) (pair >>> 32);
          int tnum = Integer.MAX_VALUE - (int) pair;
          final BytesRef term = si.lookupOrd(startTermIndex + tnum);
          ft.indexedToReadable(term, charsRef);
          res.add(charsRef.toString(), c);
        }

      } else {
        // add results in index order
        int i = (startTermIndex == -1) ? 1 : 0;
        if (mincount <= 0 && contains == null) {
          // if mincount<=0 and we're not examining the values for contains, then
          // we won't discard any terms and we know exactly where to start.
          i += off;
          off = 0;
        }

        for (; i < nTerms; i++) {
          int c = counts[i];
          if (c < mincount) continue;
          BytesRef term = null;
          if (contains != null) {
            term = si.lookupOrd(startTermIndex + i);
            if (!SimpleFacets.contains(term.utf8ToString(), contains, ignoreCase)) {
              continue;
            }
          }
          if (--off >= 0) continue;
          if (--lim < 0) break;
          if (term == null) {
            term = si.lookupOrd(startTermIndex + i);
          }
          ft.indexedToReadable(term, charsRef);
          res.add(charsRef.toString(), c);
        }
      }
    }

    return finalize(res, searcher, schemaField, docs, missingCount, missing);
  }
  @Override
  public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
    final FSTEntry entry = fsts.get(field.name);
    if (entry.numOrds == 0) {
      return DocValues.emptySortedSet(); // empty FST!
    }
    FST<Long> instance;
    synchronized (this) {
      instance = fstInstances.get(field.name);
      if (instance == null) {
        data.seek(entry.offset);
        instance = new FST<>(data, PositiveIntOutputs.getSingleton());
        if (!merging) {
          ramBytesUsed.addAndGet(instance.ramBytesUsed());
          fstInstances.put(field.name, instance);
        }
      }
    }
    final BinaryDocValues docToOrds = getBinary(field);
    final FST<Long> fst = instance;

    // per-thread resources
    final BytesReader in = fst.getBytesReader();
    final Arc<Long> firstArc = new Arc<>();
    final Arc<Long> scratchArc = new Arc<>();
    final IntsRefBuilder scratchInts = new IntsRefBuilder();
    final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
    final ByteArrayDataInput input = new ByteArrayDataInput();
    return new SortedSetDocValues() {
      final BytesRefBuilder term = new BytesRefBuilder();
      BytesRef ordsRef;
      long currentOrd;

      @Override
      public long nextOrd() {
        if (input.eof()) {
          return NO_MORE_ORDS;
        } else {
          currentOrd += input.readVLong();
          return currentOrd;
        }
      }

      @Override
      public void setDocument(int docID) {
        ordsRef = docToOrds.get(docID);
        input.reset(ordsRef.bytes, ordsRef.offset, ordsRef.length);
        currentOrd = 0;
      }

      @Override
      public BytesRef lookupOrd(long ord) {
        try {
          in.setPosition(0);
          fst.getFirstArc(firstArc);
          IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
          term.grow(output.length);
          term.clear();
          return Util.toBytesRef(output, term);
        } catch (IOException bogus) {
          throw new RuntimeException(bogus);
        }
      }

      @Override
      public long lookupTerm(BytesRef key) {
        try {
          InputOutput<Long> o = fstEnum.seekCeil(key);
          if (o == null) {
            return -getValueCount() - 1;
          } else if (o.input.equals(key)) {
            return o.output.intValue();
          } else {
            return -o.output - 1;
          }
        } catch (IOException bogus) {
          throw new RuntimeException(bogus);
        }
      }

      @Override
      public long getValueCount() {
        return entry.numOrds;
      }

      @Override
      public TermsEnum termsEnum() {
        return new FSTTermsEnum(fst);
      }
    };
  }
Example #26
0
 @Override
 protected boolean tryBulkMerge(DocValues docValues) {
   // only bulk merge if value type is the same otherwise size differs
   return super.tryBulkMerge(docValues) && docValues.getType() == template.getType();
 }
    @Override
    protected void doSetNextReader(LeafReaderContext context) throws IOException {
      if (segmentFacetCounts != null) {
        segmentResults.add(createSegmentResult());
      }

      groupFieldTermsIndex = DocValues.getSorted(context.reader(), groupField);
      facetFieldDocTermOrds = DocValues.getSortedSet(context.reader(), facetField);
      facetFieldNumTerms = (int) facetFieldDocTermOrds.getValueCount();
      if (facetFieldNumTerms == 0) {
        facetOrdTermsEnum = null;
      } else {
        facetOrdTermsEnum = facetFieldDocTermOrds.termsEnum();
      }
      // [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet
      // field
      segmentFacetCounts = new int[facetFieldNumTerms + 1];
      segmentTotalCount = 0;

      segmentGroupedFacetHits.clear();
      for (GroupedFacetHit groupedFacetHit : groupedFacetHits) {
        int groupOrd =
            groupedFacetHit.groupValue == null
                ? -1
                : groupFieldTermsIndex.lookupTerm(groupedFacetHit.groupValue);
        if (groupedFacetHit.groupValue != null && groupOrd < 0) {
          continue;
        }

        int facetOrd;
        if (groupedFacetHit.facetValue != null) {
          if (facetOrdTermsEnum == null
              || !facetOrdTermsEnum.seekExact(groupedFacetHit.facetValue)) {
            continue;
          }
          facetOrd = (int) facetOrdTermsEnum.ord();
        } else {
          facetOrd = facetFieldNumTerms;
        }

        // (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not
        // containing facet field
        int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd;
        segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
      }

      if (facetPrefix != null) {
        TermsEnum.SeekStatus seekStatus;
        if (facetOrdTermsEnum != null) {
          seekStatus = facetOrdTermsEnum.seekCeil(facetPrefix);
        } else {
          seekStatus = TermsEnum.SeekStatus.END;
        }

        if (seekStatus != TermsEnum.SeekStatus.END) {
          startFacetOrd = (int) facetOrdTermsEnum.ord();
        } else {
          startFacetOrd = 0;
          endFacetOrd = 0;
          return;
        }

        BytesRefBuilder facetEndPrefix = new BytesRefBuilder();
        facetEndPrefix.append(facetPrefix);
        facetEndPrefix.append(UnicodeUtil.BIG_TERM);
        seekStatus = facetOrdTermsEnum.seekCeil(facetEndPrefix.get());
        if (seekStatus != TermsEnum.SeekStatus.END) {
          endFacetOrd = (int) facetOrdTermsEnum.ord();
        } else {
          endFacetOrd = facetFieldNumTerms; // Don't include null...
        }
      } else {
        startFacetOrd = 0;
        endFacetOrd = facetFieldNumTerms + 1;
      }
    }