public FieldValueCollapse(
     int maxDoc,
     String field,
     int nullPolicy,
     boolean max,
     boolean needsScores,
     IntIntOpenHashMap boostDocs,
     SortedDocValues values) {
   this.field = field;
   this.nullPolicy = nullPolicy;
   this.max = max;
   this.needsScores = needsScores;
   this.collapsedSet = new FixedBitSet(maxDoc);
   this.boostDocs = boostDocs;
   if (this.boostDocs != null) {
     IntOpenHashSet boostG = new IntOpenHashSet();
     Iterator<IntIntCursor> it = boostDocs.iterator();
     while (it.hasNext()) {
       IntIntCursor cursor = it.next();
       int i = cursor.key;
       this.collapsedSet.set(i);
       int ord = values.getOrd(i);
       if (ord > -1) {
         boostG.add(ord);
       }
     }
     this.boostOrds = boostG.toArray();
     Arrays.sort(this.boostOrds);
   }
 }
    @Override
    public void collect(int docId) throws IOException {
      int globalDoc = docId + this.docBase;
      int ord = values.getOrd(globalDoc);

      if (ord > -1) {
        float score = scorer.score();
        if (score > scores[ord]) {
          ords[ord] = globalDoc;
          scores[ord] = score;
        }
      } else if (this.collapsedSet.get(globalDoc)) {
        // The doc is elevated so score does not matter
        // We just want to be sure it doesn't fall into the null policy
      } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) {
        float score = scorer.score();
        if (score > nullScore) {
          nullScore = score;
          nullDoc = globalDoc;
        }
      } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
        collapsedSet.set(globalDoc);
        nullScores.add(scorer.score());
      }
    }
  @Test
  public void testSvValues() throws IOException {
    int numDocs = 1000000;
    int numOrdinals = numDocs / 4;
    Map<Integer, Long> controlDocToOrdinal = new HashMap<>();
    OrdinalsBuilder builder = new OrdinalsBuilder(numDocs);
    long ordinal = builder.currentOrdinal();
    for (int doc = 0; doc < numDocs; doc++) {
      if (doc % numOrdinals == 0) {
        ordinal = builder.nextOrdinal();
      }
      controlDocToOrdinal.put(doc, ordinal);
      builder.addDoc(doc);
    }

    Ordinals ords = builder.build(ImmutableSettings.EMPTY);
    assertThat(ords, instanceOf(SinglePackedOrdinals.class));
    RandomAccessOrds docs = ords.ordinals();
    final SortedDocValues singleOrds = DocValues.unwrapSingleton(docs);
    assertNotNull(singleOrds);

    for (Map.Entry<Integer, Long> entry : controlDocToOrdinal.entrySet()) {
      assertThat(entry.getValue(), equalTo((long) singleOrds.getOrd(entry.getKey())));
    }
  }
 @Override
 public void collect(int doc) throws IOException {
   if (values != null) {
     int globalOrdinal = values.getOrd(doc);
     // TODO: oversize the long bitset and remove the branch
     if (globalOrdinal >= 0) {
       parentOrds.set(globalOrdinal);
     }
   }
 }
    public void finish() throws IOException {
      if (contexts.length == 0) {
        return;
      }

      int currentContext = 0;
      int currentDocBase = 0;
      int nextDocBase =
          currentContext + 1 < contexts.length ? contexts[currentContext + 1].docBase : maxDoc;
      leafDelegate = delegate.getLeafCollector(contexts[currentContext]);
      DummyScorer dummy = new DummyScorer();
      leafDelegate.setScorer(dummy);
      DocIdSetIterator it =
          new BitSetIterator(fieldValueCollapse.getCollapsedSet(), 0); // cost is not useful here
      int docId = -1;
      int nullScoreIndex = 0;
      float[] scores = fieldValueCollapse.getScores();
      FloatArrayList nullScores = fieldValueCollapse.getNullScores();
      float nullScore = fieldValueCollapse.getNullScore();
      while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {

        if (this.needsScores) {
          int ord = values.getOrd(docId);
          if (ord > -1) {
            dummy.score = scores[ord];
          } else if (boostDocs != null && boostDocs.containsKey(docId)) {
            // It's an elevated doc so no score is needed
            dummy.score = 0F;
          } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) {
            dummy.score = nullScore;
          } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
            dummy.score = nullScores.get(nullScoreIndex++);
          }
        }

        while (docId >= nextDocBase) {
          currentContext++;
          currentDocBase = contexts[currentContext].docBase;
          nextDocBase =
              currentContext + 1 < contexts.length ? contexts[currentContext + 1].docBase : maxDoc;
          leafDelegate = delegate.getLeafCollector(contexts[currentContext]);
          leafDelegate.setScorer(dummy);
        }

        int contextDoc = docId - currentDocBase;
        dummy.docId = contextDoc;
        leafDelegate.collect(contextDoc);
      }

      if (delegate instanceof DelegatingCollector) {
        ((DelegatingCollector) delegate).finish();
      }
    }
 @Override
 public Explanation explain(LeafReaderContext context, int doc) throws IOException {
   SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
   if (values != null) {
     int segmentOrd = values.getOrd(doc);
     if (segmentOrd != -1) {
       BytesRef joinValue = values.lookupOrd(segmentOrd);
       return Explanation.match(
           queryNorm, "Score based on join value " + joinValue.utf8ToString());
     }
   }
   return Explanation.noMatch("Not a match");
 }
    @Override
    protected boolean match(int doc) {
      if (parentWeight.remaining == 0) {
        throw new CollectionTerminatedException();
      }

      long parentOrd = ordinals.getOrd(doc);
      if (parentOrd >= 0) {
        boolean match = parentOrds.get(parentOrd);
        if (match) {
          parentWeight.remaining--;
        }
        return match;
      }
      return false;
    }
Example #8
0
 // specialized since the single-valued case is different
 static void accumSingle(
     int counts[],
     int startTermIndex,
     SortedDocValues si,
     DocIdSetIterator disi,
     int subIndex,
     OrdinalMap map)
     throws IOException {
   int doc;
   while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
     int term = si.getOrd(doc);
     if (map != null && term >= 0) {
       term = (int) map.getGlobalOrd(subIndex, term);
     }
     int arrIdx = term - startTermIndex;
     if (arrIdx >= 0 && arrIdx < counts.length) counts[arrIdx]++;
   }
 }
 @Override
 public Explanation explain(LeafReaderContext context, int doc) throws IOException {
   SortedDocValues values = DocValues.getSorted(context.reader(), joinField);
   if (values != null) {
     int segmentOrd = values.getOrd(doc);
     if (segmentOrd != -1) {
       final float score;
       if (globalOrds != null) {
         long globalOrd = globalOrds.getGlobalOrds(context.ord).get(segmentOrd);
         score = collector.score((int) globalOrd);
       } else {
         score = collector.score(segmentOrd);
       }
       BytesRef joinValue = values.lookupOrd(segmentOrd);
       return Explanation.match(score, "Score based on join value " + joinValue.utf8ToString());
     }
   }
   return Explanation.noMatch("Not a match");
 }
  /**
   * "typical" single-valued faceting: not too many unique values, no prefixing. maps to global
   * ordinals as a separate step
   */
  static void accumSingleSeg(
      int counts[], SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map)
      throws IOException {
    // First count in seg-ord space:
    final int segCounts[];
    if (map == null) {
      segCounts = counts;
    } else {
      segCounts = new int[1 + si.getValueCount()];
    }

    int doc;
    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
      segCounts[1 + si.getOrd(doc)]++;
    }

    // migrate to global ords (if necessary)
    if (map != null) {
      migrateGlobal(counts, segCounts, subIndex, map);
    }
  }
 public CollapsingScoreCollector(
     int maxDoc,
     int segments,
     SortedDocValues values,
     int nullPolicy,
     IntIntOpenHashMap boostDocs) {
   this.maxDoc = maxDoc;
   this.contexts = new LeafReaderContext[segments];
   this.collapsedSet = new FixedBitSet(maxDoc);
   this.boostDocs = boostDocs;
   if (this.boostDocs != null) {
     // Set the elevated docs now.
     IntOpenHashSet boostG = new IntOpenHashSet();
     Iterator<IntIntCursor> it = this.boostDocs.iterator();
     while (it.hasNext()) {
       IntIntCursor cursor = it.next();
       int i = cursor.key;
       this.collapsedSet.set(i);
       int ord = values.getOrd(i);
       if (ord > -1) {
         boostG.add(ord);
       }
     }
     boostOrds = boostG.toArray();
     Arrays.sort(boostOrds);
   }
   this.values = values;
   int valueCount = values.getValueCount();
   this.ords = new int[valueCount];
   Arrays.fill(this.ords, -1);
   this.scores = new float[valueCount];
   Arrays.fill(this.scores, -Float.MAX_VALUE);
   this.nullPolicy = nullPolicy;
   if (nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
     nullScores = new FloatArrayList();
   }
 }
  /**
   * Merges the sorted docvalues from <code>toMerge</code>.
   *
   * <p>The default implementation calls {@link #addSortedField}, passing an Iterable that merges
   * ordinals and values and filters deleted documents .
   */
  public void mergeSortedField(
      FieldInfo fieldInfo, final MergeState mergeState, List<SortedDocValues> toMerge)
      throws IOException {

    mergeState.checkAbort.work(mergeState.segmentInfo.getDocCount());

    final AtomicReader readers[] = mergeState.readers.toArray(new AtomicReader[toMerge.size()]);
    final SortedDocValues dvs[] = toMerge.toArray(new SortedDocValues[toMerge.size()]);

    // step 1: iterate thru each sub and mark terms still in use
    TermsEnum liveTerms[] = new TermsEnum[dvs.length];
    long[] weights = new long[liveTerms.length];
    for (int sub = 0; sub < liveTerms.length; sub++) {
      AtomicReader reader = readers[sub];
      SortedDocValues dv = dvs[sub];
      Bits liveDocs = reader.getLiveDocs();
      if (liveDocs == null) {
        liveTerms[sub] = dv.termsEnum();
        weights[sub] = dv.getValueCount();
      } else {
        LongBitSet bitset = new LongBitSet(dv.getValueCount());
        for (int i = 0; i < reader.maxDoc(); i++) {
          if (liveDocs.get(i)) {
            int ord = dv.getOrd(i);
            if (ord >= 0) {
              bitset.set(ord);
            }
          }
        }
        liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
        weights[sub] = bitset.cardinality();
      }
    }

    // step 2: create ordinal map (this conceptually does the "merging")
    final OrdinalMap map = OrdinalMap.build(this, liveTerms, weights, PackedInts.COMPACT);

    // step 3: add field
    addSortedField(
        fieldInfo,
        // ord -> value
        new Iterable<BytesRef>() {
          @Override
          public Iterator<BytesRef> iterator() {
            return new Iterator<BytesRef>() {
              int currentOrd;

              @Override
              public boolean hasNext() {
                return currentOrd < map.getValueCount();
              }

              @Override
              public BytesRef next() {
                if (!hasNext()) {
                  throw new NoSuchElementException();
                }
                int segmentNumber = map.getFirstSegmentNumber(currentOrd);
                int segmentOrd = (int) map.getFirstSegmentOrd(currentOrd);
                final BytesRef term = dvs[segmentNumber].lookupOrd(segmentOrd);
                currentOrd++;
                return term;
              }

              @Override
              public void remove() {
                throw new UnsupportedOperationException();
              }
            };
          }
        },
        // doc -> ord
        new Iterable<Number>() {
          @Override
          public Iterator<Number> iterator() {
            return new Iterator<Number>() {
              int readerUpto = -1;
              int docIDUpto;
              int nextValue;
              AtomicReader currentReader;
              Bits currentLiveDocs;
              LongValues currentMap;
              boolean nextIsSet;

              @Override
              public boolean hasNext() {
                return nextIsSet || setNext();
              }

              @Override
              public void remove() {
                throw new UnsupportedOperationException();
              }

              @Override
              public Number next() {
                if (!hasNext()) {
                  throw new NoSuchElementException();
                }
                assert nextIsSet;
                nextIsSet = false;
                // TODO make a mutable number
                return nextValue;
              }

              private boolean setNext() {
                while (true) {
                  if (readerUpto == readers.length) {
                    return false;
                  }

                  if (currentReader == null || docIDUpto == currentReader.maxDoc()) {
                    readerUpto++;
                    if (readerUpto < readers.length) {
                      currentReader = readers[readerUpto];
                      currentLiveDocs = currentReader.getLiveDocs();
                      currentMap = map.getGlobalOrds(readerUpto);
                    }
                    docIDUpto = 0;
                    continue;
                  }

                  if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
                    nextIsSet = true;
                    int segOrd = dvs[readerUpto].getOrd(docIDUpto);
                    nextValue = segOrd == -1 ? -1 : (int) currentMap.get(segOrd);
                    docIDUpto++;
                    return true;
                  }

                  docIDUpto++;
                }
              }
            };
          }
        });
  }
 public void collect(int docId) throws IOException {
   int globalDoc = docId + this.docBase;
   int ord = values.getOrd(globalDoc);
   fieldValueCollapse.collapse(ord, docId, globalDoc);
 }
    @Override
    public void finish() throws IOException {
      if (contexts.length == 0) {
        return;
      }

      if (nullScore > 0) {
        this.collapsedSet.set(nullDoc);
      }

      if (this.boostOrds != null) {
        for (int i = 0; i < this.boostOrds.length; i++) {
          ords[boostOrds[i]] = -1;
        }
      }

      for (int i = 0; i < ords.length; i++) {
        int doc = ords[i];
        if (doc > -1) {
          collapsedSet.set(doc);
        }
      }

      int currentContext = 0;
      int currentDocBase = 0;
      int nextDocBase =
          currentContext + 1 < contexts.length ? contexts[currentContext + 1].docBase : maxDoc;
      leafDelegate = delegate.getLeafCollector(contexts[currentContext]);
      DummyScorer dummy = new DummyScorer();
      leafDelegate.setScorer(dummy);
      DocIdSetIterator it = new BitSetIterator(collapsedSet, 0L); // cost is not useful here
      int docId = -1;
      int nullScoreIndex = 0;
      while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {

        int ord = values.getOrd(docId);

        if (ord > -1) {
          dummy.score = scores[ord];
        } else if (this.boostDocs != null && boostDocs.containsKey(docId)) {
          // Elevated docs don't need a score.
          dummy.score = 0F;
        } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) {
          dummy.score = nullScore;
        } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
          dummy.score = nullScores.get(nullScoreIndex++);
        }

        while (docId >= nextDocBase) {
          currentContext++;
          currentDocBase = contexts[currentContext].docBase;
          nextDocBase =
              currentContext + 1 < contexts.length ? contexts[currentContext + 1].docBase : maxDoc;
          leafDelegate = delegate.getLeafCollector(contexts[currentContext]);
          leafDelegate.setScorer(dummy);
        }

        int contextDoc = docId - currentDocBase;
        dummy.docId = contextDoc;
        leafDelegate.collect(contextDoc);
      }

      if (delegate instanceof DelegatingCollector) {
        ((DelegatingCollector) delegate).finish();
      }
    }
  public void testDocValuesMemoryIndexVsNormalIndex() throws Exception {
    Document doc = new Document();
    long randomLong = random().nextLong();
    doc.add(new NumericDocValuesField("numeric", randomLong));
    if (random().nextBoolean()) {
      doc.add(new LegacyLongField("numeric", randomLong, Field.Store.NO));
    }
    int numValues = atLeast(5);
    for (int i = 0; i < numValues; i++) {
      randomLong = random().nextLong();
      doc.add(new SortedNumericDocValuesField("sorted_numeric", randomLong));
      if (random().nextBoolean()) {
        // randomly duplicate field/value
        doc.add(new SortedNumericDocValuesField("sorted_numeric", randomLong));
      }
      if (random().nextBoolean()) {
        doc.add(new LegacyLongField("numeric", randomLong, Field.Store.NO));
      }
    }
    BytesRef randomTerm = new BytesRef(randomTerm());
    doc.add(new BinaryDocValuesField("binary", randomTerm));
    if (random().nextBoolean()) {
      doc.add(new StringField("binary", randomTerm, Field.Store.NO));
    }
    randomTerm = new BytesRef(randomTerm());
    doc.add(new SortedDocValuesField("sorted", randomTerm));
    if (random().nextBoolean()) {
      doc.add(new StringField("sorted", randomTerm, Field.Store.NO));
    }
    numValues = atLeast(5);
    for (int i = 0; i < numValues; i++) {
      randomTerm = new BytesRef(randomTerm());
      doc.add(new SortedSetDocValuesField("sorted_set", randomTerm));
      if (random().nextBoolean()) {
        // randomly duplicate field/value
        doc.add(new SortedSetDocValuesField("sorted_set", randomTerm));
      }
      if (random().nextBoolean()) {
        // randomily just add a normal string field
        doc.add(new StringField("sorted_set", randomTerm, Field.Store.NO));
      }
    }

    MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
    MemoryIndex memoryIndex = MemoryIndex.fromDocument(doc, mockAnalyzer);
    IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();
    LeafReader leafReader = indexReader.leaves().get(0).reader();

    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), mockAnalyzer));
    writer.addDocument(doc);
    writer.close();
    IndexReader controlIndexReader = DirectoryReader.open(dir);
    LeafReader controlLeafReader = controlIndexReader.leaves().get(0).reader();

    NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric");
    NumericDocValues controlNumericDocValues = controlLeafReader.getNumericDocValues("numeric");
    assertEquals(controlNumericDocValues.get(0), numericDocValues.get(0));

    SortedNumericDocValues sortedNumericDocValues =
        leafReader.getSortedNumericDocValues("sorted_numeric");
    sortedNumericDocValues.setDocument(0);
    SortedNumericDocValues controlSortedNumericDocValues =
        controlLeafReader.getSortedNumericDocValues("sorted_numeric");
    controlSortedNumericDocValues.setDocument(0);
    assertEquals(controlSortedNumericDocValues.count(), sortedNumericDocValues.count());
    for (int i = 0; i < controlSortedNumericDocValues.count(); i++) {
      assertEquals(controlSortedNumericDocValues.valueAt(i), sortedNumericDocValues.valueAt(i));
    }

    BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary");
    BinaryDocValues controlBinaryDocValues = controlLeafReader.getBinaryDocValues("binary");
    assertEquals(controlBinaryDocValues.get(0), binaryDocValues.get(0));

    SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted");
    SortedDocValues controlSortedDocValues = controlLeafReader.getSortedDocValues("sorted");
    assertEquals(controlSortedDocValues.getValueCount(), sortedDocValues.getValueCount());
    assertEquals(controlSortedDocValues.get(0), sortedDocValues.get(0));
    assertEquals(controlSortedDocValues.getOrd(0), sortedDocValues.getOrd(0));
    assertEquals(controlSortedDocValues.lookupOrd(0), sortedDocValues.lookupOrd(0));

    SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
    sortedSetDocValues.setDocument(0);
    SortedSetDocValues controlSortedSetDocValues =
        controlLeafReader.getSortedSetDocValues("sorted_set");
    controlSortedSetDocValues.setDocument(0);
    assertEquals(controlSortedSetDocValues.getValueCount(), sortedSetDocValues.getValueCount());
    for (long controlOrd = controlSortedSetDocValues.nextOrd();
        controlOrd != SortedSetDocValues.NO_MORE_ORDS;
        controlOrd = controlSortedSetDocValues.nextOrd()) {
      assertEquals(controlOrd, sortedSetDocValues.nextOrd());
      assertEquals(
          controlSortedSetDocValues.lookupOrd(controlOrd),
          sortedSetDocValues.lookupOrd(controlOrd));
    }
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd());

    indexReader.close();
    controlIndexReader.close();
    dir.close();
  }