@Test
 public void testNoTerms() throws Exception {
   FieldDataTermsFilter hFilterBytes =
       FieldDataTermsFilter.newBytes(getFieldData(strMapper), new ObjectOpenHashSet<BytesRef>());
   FieldDataTermsFilter hFilterLongs =
       FieldDataTermsFilter.newLongs(getFieldData(lngMapper), new LongOpenHashSet());
   FieldDataTermsFilter hFilterDoubles =
       FieldDataTermsFilter.newDoubles(getFieldData(dblMapper), new DoubleOpenHashSet());
   assertNull(hFilterBytes.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
   assertNull(hFilterLongs.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
   assertNull(hFilterDoubles.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
 }
  @Test
  public void testBytes() throws Exception {
    List<Integer> docs = Arrays.asList(1, 5, 7);

    ObjectOpenHashSet<BytesRef> hTerms = new ObjectOpenHashSet<BytesRef>();
    List<BytesRef> cTerms = new ArrayList<BytesRef>(docs.size());
    for (int i = 0; i < docs.size(); i++) {
      BytesRef term = new BytesRef("str" + docs.get(i));
      hTerms.add(term);
      cTerms.add(term);
    }

    FieldDataTermsFilter hFilter = FieldDataTermsFilter.newBytes(getFieldData(strMapper), hTerms);

    int size = reader.maxDoc();
    FixedBitSet result = new FixedBitSet(size);

    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));
    result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
    assertThat(result.cardinality(), equalTo(docs.size()));
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertThat(result.get(i), equalTo(docs.contains(i)));
    }

    // filter from mapper
    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));
    result.or(
        strMapper
            .termsFilter(ifdService, cTerms, null)
            .getDocIdSet(reader.getContext(), reader.getLiveDocs())
            .iterator());
    assertThat(result.cardinality(), equalTo(docs.size()));
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertThat(result.get(i), equalTo(docs.contains(i)));
    }

    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));

    // filter on a numeric field using BytesRef terms
    // should not match any docs
    hFilter = FieldDataTermsFilter.newBytes(getFieldData(lngMapper), hTerms);
    result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
    assertThat(result.cardinality(), equalTo(0));

    // filter on a numeric field using BytesRef terms
    // should not match any docs
    hFilter = FieldDataTermsFilter.newBytes(getFieldData(dblMapper), hTerms);
    result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
    assertThat(result.cardinality(), equalTo(0));
  }
  @Test
  public void testDoubles() throws Exception {
    List<Integer> docs = Arrays.asList(1, 5, 7);

    DoubleOpenHashSet hTerms = new DoubleOpenHashSet();
    List<Double> cTerms = new ArrayList<Double>(docs.size());
    for (int i = 0; i < docs.size(); i++) {
      double term = Double.valueOf(docs.get(i));
      hTerms.add(term);
      cTerms.add(term);
    }

    FieldDataTermsFilter hFilter = FieldDataTermsFilter.newDoubles(getFieldData(dblMapper), hTerms);

    int size = reader.maxDoc();
    FixedBitSet result = new FixedBitSet(size);

    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));
    result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
    assertThat(result.cardinality(), equalTo(docs.size()));
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertThat(result.get(i), equalTo(docs.contains(i)));
    }

    // filter from mapper
    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));
    result.or(
        dblMapper
            .termsFilter(ifdService, cTerms, null)
            .getDocIdSet(reader.getContext(), reader.getLiveDocs())
            .iterator());
    assertThat(result.cardinality(), equalTo(docs.size()));
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertThat(result.get(i), equalTo(docs.contains(i)));
    }

    hFilter = FieldDataTermsFilter.newDoubles(getFieldData(lngMapper), hTerms);
    assertNull(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
  }
  protected int[] lookupDocIdByPK(final IndexSearcher searcher, final String... ids)
      throws IOException {
    final List<AtomicReaderContext> subReaders = searcher.getIndexReader().leaves();
    final TermsEnum[] termsEnums = new TermsEnum[subReaders.size()];
    final DocsEnum[] docsEnums = new DocsEnum[subReaders.size()];
    for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
      termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator(null);
    }

    int[] results = new int[ids.length];

    for (int i = 0; i < results.length; i++) {
      results[i] = -1;
    }

    for (int idx = 0; idx < ids.length; idx++) {
      int base = 0;
      final BytesRef id = new BytesRef(ids[idx]);
      for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
        final AtomicReader sub = subReaders.get(subIDX).reader();
        final TermsEnum termsEnum = termsEnums[subIDX];
        if (termsEnum.seekExact(id, false)) {
          final DocsEnum docs =
              docsEnums[subIDX] = termsEnum.docs(sub.getLiveDocs(), docsEnums[subIDX], 0);
          if (docs != null) {
            final int docID = docs.nextDoc();
            if (docID != DocIdSetIterator.NO_MORE_DOCS) {
              results[idx] = base + docID;
              break;
            }
          }
        }
        base += sub.maxDoc();
      }
    }

    return results;
  }
  private List<Document> lookupDocs(Term term, final LoadFieldCallback lfc) throws IOException {
    final List<Document> documents = new ArrayList<Document>();
    final TermFilter tf = new TermFilter(term);
    try {
      for (AtomicReaderContext arc : searcher.getIndexReader().leaves()) {
        AtomicReader ar = arc.reader();
        Bits liveDocs = ar.getLiveDocs();
        DocIdSet docSet = tf.getDocIdSet(arc, liveDocs);
        if (docSet != null) {
          DocIdSetIterator disi = docSet.iterator();
          if (disi != null) {
            int docId;
            while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
              DocumentStoredFieldVisitor fv =
                  new DocumentStoredFieldVisitor() {
                    @Override
                    public StoredFieldVisitor.Status needsField(FieldInfo fieldInfo)
                        throws IOException {
                      if (lfc == null || lfc.loadField(fieldInfo.name)) {
                        return StoredFieldVisitor.Status.YES;
                      }
                      return StoredFieldVisitor.Status.NO;
                    }
                  };
              ar.document(docId, fv);
              documents.add(fv.getDocument());
            }
          }
        }
      }
    } catch (IOException io) {
      throw new IndexException(io);
    }

    return documents;
  }
  /**
   * Merges the sortedset docvalues from <code>toMerge</code>.
   *
   * <p>The default implementation calls {@link #addSortedSetField}, passing an Iterable that merges
   * ordinals and values and filters deleted documents .
   */
  public void mergeSortedSetField(
      FieldInfo fieldInfo, final MergeState mergeState, List<SortedSetDocValues> toMerge)
      throws IOException {

    mergeState.checkAbort.work(mergeState.segmentInfo.getDocCount());

    final AtomicReader readers[] = mergeState.readers.toArray(new AtomicReader[toMerge.size()]);
    final SortedSetDocValues dvs[] = toMerge.toArray(new SortedSetDocValues[toMerge.size()]);

    // step 1: iterate thru each sub and mark terms still in use
    TermsEnum liveTerms[] = new TermsEnum[dvs.length];
    long[] weights = new long[liveTerms.length];
    for (int sub = 0; sub < liveTerms.length; sub++) {
      AtomicReader reader = readers[sub];
      SortedSetDocValues dv = dvs[sub];
      Bits liveDocs = reader.getLiveDocs();
      if (liveDocs == null) {
        liveTerms[sub] = dv.termsEnum();
        weights[sub] = dv.getValueCount();
      } else {
        LongBitSet bitset = new LongBitSet(dv.getValueCount());
        for (int i = 0; i < reader.maxDoc(); i++) {
          if (liveDocs.get(i)) {
            dv.setDocument(i);
            long ord;
            while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
              bitset.set(ord);
            }
          }
        }
        liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
        weights[sub] = bitset.cardinality();
      }
    }

    // step 2: create ordinal map (this conceptually does the "merging")
    final OrdinalMap map = OrdinalMap.build(this, liveTerms, weights, PackedInts.COMPACT);

    // step 3: add field
    addSortedSetField(
        fieldInfo,
        // ord -> value
        new Iterable<BytesRef>() {
          @Override
          public Iterator<BytesRef> iterator() {
            return new Iterator<BytesRef>() {
              long currentOrd;

              @Override
              public boolean hasNext() {
                return currentOrd < map.getValueCount();
              }

              @Override
              public BytesRef next() {
                if (!hasNext()) {
                  throw new NoSuchElementException();
                }
                int segmentNumber = map.getFirstSegmentNumber(currentOrd);
                long segmentOrd = map.getFirstSegmentOrd(currentOrd);
                final BytesRef term = dvs[segmentNumber].lookupOrd(segmentOrd);
                currentOrd++;
                return term;
              }

              @Override
              public void remove() {
                throw new UnsupportedOperationException();
              }
            };
          }
        },
        // doc -> ord count
        new Iterable<Number>() {
          @Override
          public Iterator<Number> iterator() {
            return new Iterator<Number>() {
              int readerUpto = -1;
              int docIDUpto;
              int nextValue;
              AtomicReader currentReader;
              Bits currentLiveDocs;
              boolean nextIsSet;

              @Override
              public boolean hasNext() {
                return nextIsSet || setNext();
              }

              @Override
              public void remove() {
                throw new UnsupportedOperationException();
              }

              @Override
              public Number next() {
                if (!hasNext()) {
                  throw new NoSuchElementException();
                }
                assert nextIsSet;
                nextIsSet = false;
                // TODO make a mutable number
                return nextValue;
              }

              private boolean setNext() {
                while (true) {
                  if (readerUpto == readers.length) {
                    return false;
                  }

                  if (currentReader == null || docIDUpto == currentReader.maxDoc()) {
                    readerUpto++;
                    if (readerUpto < readers.length) {
                      currentReader = readers[readerUpto];
                      currentLiveDocs = currentReader.getLiveDocs();
                    }
                    docIDUpto = 0;
                    continue;
                  }

                  if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
                    nextIsSet = true;
                    SortedSetDocValues dv = dvs[readerUpto];
                    dv.setDocument(docIDUpto);
                    nextValue = 0;
                    while (dv.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
                      nextValue++;
                    }
                    docIDUpto++;
                    return true;
                  }

                  docIDUpto++;
                }
              }
            };
          }
        },
        // ords
        new Iterable<Number>() {
          @Override
          public Iterator<Number> iterator() {
            return new Iterator<Number>() {
              int readerUpto = -1;
              int docIDUpto;
              long nextValue;
              AtomicReader currentReader;
              Bits currentLiveDocs;
              LongValues currentMap;
              boolean nextIsSet;
              long ords[] = new long[8];
              int ordUpto;
              int ordLength;

              @Override
              public boolean hasNext() {
                return nextIsSet || setNext();
              }

              @Override
              public void remove() {
                throw new UnsupportedOperationException();
              }

              @Override
              public Number next() {
                if (!hasNext()) {
                  throw new NoSuchElementException();
                }
                assert nextIsSet;
                nextIsSet = false;
                // TODO make a mutable number
                return nextValue;
              }

              private boolean setNext() {
                while (true) {
                  if (readerUpto == readers.length) {
                    return false;
                  }

                  if (ordUpto < ordLength) {
                    nextValue = ords[ordUpto];
                    ordUpto++;
                    nextIsSet = true;
                    return true;
                  }

                  if (currentReader == null || docIDUpto == currentReader.maxDoc()) {
                    readerUpto++;
                    if (readerUpto < readers.length) {
                      currentReader = readers[readerUpto];
                      currentLiveDocs = currentReader.getLiveDocs();
                      currentMap = map.getGlobalOrds(readerUpto);
                    }
                    docIDUpto = 0;
                    continue;
                  }

                  if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
                    assert docIDUpto < currentReader.maxDoc();
                    SortedSetDocValues dv = dvs[readerUpto];
                    dv.setDocument(docIDUpto);
                    ordUpto = ordLength = 0;
                    long ord;
                    while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                      if (ordLength == ords.length) {
                        ords = ArrayUtil.grow(ords, ordLength + 1);
                      }
                      ords[ordLength] = currentMap.get(ord);
                      ordLength++;
                    }
                    docIDUpto++;
                    continue;
                  }

                  docIDUpto++;
                }
              }
            };
          }
        });
  }
  @Test
  public void testRandom() throws Exception {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    int numUniqueChildValues = 1 + random().nextInt(TEST_NIGHTLY ? 10000 : 1000);
    String[] childValues = new String[numUniqueChildValues];
    for (int i = 0; i < numUniqueChildValues; i++) {
      childValues[i] = Integer.toString(i);
    }

    IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet();
    int childDocId = 0;
    int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000);
    ObjectObjectOpenHashMap<String, NavigableSet<String>> childValueToParentIds =
        new ObjectObjectOpenHashMap<String, NavigableSet<String>>();
    for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) {
      boolean markParentAsDeleted = rarely();
      boolean filterMe = rarely();
      String parent = Integer.toString(parentDocId);
      Document document = new Document();
      document.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
      if (markParentAsDeleted) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("delete", "me", Field.Store.NO));
      }
      if (filterMe) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("filter", "me", Field.Store.NO));
      }
      indexWriter.addDocument(document);

      int numChildDocs;
      if (rarely()) {
        numChildDocs = random().nextInt(TEST_NIGHTLY ? 100 : 25);
      } else {
        numChildDocs = random().nextInt(TEST_NIGHTLY ? 40 : 10);
      }
      for (int i = 0; i < numChildDocs; i++) {
        boolean markChildAsDeleted = rarely();
        String childValue = childValues[random().nextInt(childValues.length)];

        document = new Document();
        document.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        document.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        document.add(new StringField("field1", childValue, Field.Store.NO));
        if (markChildAsDeleted) {
          document.add(new StringField("delete", "me", Field.Store.NO));
        }
        indexWriter.addDocument(document);

        if (!markChildAsDeleted) {
          NavigableSet<String> parentIds;
          if (childValueToParentIds.containsKey(childValue)) {
            parentIds = childValueToParentIds.lget();
          } else {
            childValueToParentIds.put(childValue, parentIds = new TreeSet<String>());
          }
          if (!markParentAsDeleted && !filterMe) {
            parentIds.add(parent);
          }
        }
      }
    }

    // Delete docs that are marked to be deleted.
    indexWriter.deleteDocuments(new Term("delete", "me"));

    indexWriter.commit();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    Engine.Searcher engineSearcher =
        new Engine.SimpleSearcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) SearchContext.current())
        .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));

    Filter rawParentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent"));
    Filter rawFilterMe = new NotFilter(new TermFilter(new Term("filter", "me")));
    int max = numUniqueChildValues / 4;
    for (int i = 0; i < max; i++) {
      // Randomly pick a cached version: there is specific logic inside ChildrenQuery that deals
      // with the fact
      // that deletes are applied at the top level when filters are cached.
      Filter parentFilter;
      if (random().nextBoolean()) {
        parentFilter = SearchContext.current().filterCache().cache(rawParentFilter);
      } else {
        parentFilter = rawParentFilter;
      }

      // Using this in FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer
      // not get live docs as acceptedDocs
      Filter filterMe;
      if (random().nextBoolean()) {
        filterMe = SearchContext.current().filterCache().cache(rawFilterMe);
      } else {
        filterMe = rawFilterMe;
      }

      // Simulate a parent update
      if (random().nextBoolean()) {
        int numberOfUpdates = 1 + random().nextInt(TEST_NIGHTLY ? 25 : 5);
        for (int j = 0; j < numberOfUpdates; j++) {
          int parentId;
          do {
            parentId = random().nextInt(numParentDocs);
          } while (filteredOrDeletedDocs.contains(parentId));

          String parentUid = Uid.createUid("parent", Integer.toString(parentId));
          indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid));

          Document document = new Document();
          document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES));
          document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
          indexWriter.addDocument(document);
        }

        indexReader.close();
        indexReader = DirectoryReader.open(indexWriter.w, true);
        searcher = new IndexSearcher(indexReader);
        engineSearcher =
            new Engine.SimpleSearcher(
                ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
        ((TestSearchContext) SearchContext.current())
            .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));
      }

      String childValue = childValues[random().nextInt(numUniqueChildValues)];
      TermQuery childQuery = new TermQuery(new Term("field1", childValue));
      int shortCircuitParentDocSet = random().nextInt(numParentDocs);
      Filter nonNestedDocsFilter = random().nextBoolean() ? NonNestedDocsFilter.INSTANCE : null;
      Query query;
      if (random().nextBoolean()) {
        // Usage in HasChildQueryParser
        query =
            new ChildrenConstantScoreQuery(
                childQuery,
                "parent",
                "child",
                parentFilter,
                shortCircuitParentDocSet,
                nonNestedDocsFilter);
      } else {
        // Usage in HasChildFilterParser
        query =
            new XConstantScoreQuery(
                new CustomQueryWrappingFilter(
                    new ChildrenConstantScoreQuery(
                        childQuery,
                        "parent",
                        "child",
                        parentFilter,
                        shortCircuitParentDocSet,
                        nonNestedDocsFilter)));
      }
      query = new XFilteredQuery(query, filterMe);
      BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
      searcher.search(query, collector);
      FixedBitSet actualResult = collector.getResult();

      FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
      if (childValueToParentIds.containsKey(childValue)) {
        AtomicReader slowAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader);
        Terms terms = slowAtomicReader.terms(UidFieldMapper.NAME);
        if (terms != null) {
          NavigableSet<String> parentIds = childValueToParentIds.lget();
          TermsEnum termsEnum = terms.iterator(null);
          DocsEnum docsEnum = null;
          for (String id : parentIds) {
            TermsEnum.SeekStatus seekStatus =
                termsEnum.seekCeil(Uid.createUidAsBytes("parent", id));
            if (seekStatus == TermsEnum.SeekStatus.FOUND) {
              docsEnum =
                  termsEnum.docs(slowAtomicReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
              expectedResult.set(docsEnum.nextDoc());
            } else if (seekStatus == TermsEnum.SeekStatus.END) {
              break;
            }
          }
        }
      }

      assertBitSet(actualResult, expectedResult, searcher);
    }

    indexWriter.close();
    indexReader.close();
    directory.close();
  }
Ejemplo n.º 8
0
 public SpansFiltered(Spans spans, Filter filter, AtomicReader reader) throws IOException {
   this(spans, filter.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
 }
  @Override
  public DoubleArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
    AtomicReader reader = context.reader();

    Terms terms = reader.terms(getFieldNames().indexName());
    if (terms == null) {
      return DoubleArrayAtomicFieldData.EMPTY;
    }

    // TODO: how can we guess the number of terms? numerics end up creating more terms per value...
    final TDoubleArrayList values = new TDoubleArrayList();
    ArrayList<int[]> ordinals = new ArrayList<int[]>();
    int[] idx = new int[reader.maxDoc()];
    ordinals.add(new int[reader.maxDoc()]);

    values.add(0); // first "t" indicates null value
    int termOrd = 1; // current term number

    TermsEnum termsEnum = terms.iterator(null);
    try {
      DocsEnum docsEnum = null;
      for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
        values.add(FieldCache.NUMERIC_UTILS_DOUBLE_PARSER.parseDouble(term));
        docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0);
        for (int docId = docsEnum.nextDoc();
            docId != DocsEnum.NO_MORE_DOCS;
            docId = docsEnum.nextDoc()) {
          int[] ordinal;
          if (idx[docId] >= ordinals.size()) {
            ordinal = new int[reader.maxDoc()];
            ordinals.add(ordinal);
          } else {
            ordinal = ordinals.get(idx[docId]);
          }
          ordinal[docId] = termOrd;
          idx[docId]++;
        }
        termOrd++;
      }
    } catch (RuntimeException e) {
      if (e.getClass().getName().endsWith("StopFillCacheException")) {
        // all is well, in case numeric parsers are used.
      } else {
        throw e;
      }
    }

    if (ordinals.size() == 1) {
      int[] nativeOrdinals = ordinals.get(0);
      FixedBitSet set = new FixedBitSet(reader.maxDoc());
      double[] sValues = new double[reader.maxDoc()];
      boolean allHaveValue = true;
      for (int i = 0; i < nativeOrdinals.length; i++) {
        int nativeOrdinal = nativeOrdinals[i];
        if (nativeOrdinal == 0) {
          allHaveValue = false;
        } else {
          set.set(i);
          sValues[i] = values.get(nativeOrdinal);
        }
      }
      if (allHaveValue) {
        return new DoubleArrayAtomicFieldData.Single(sValues, reader.maxDoc());
      } else {
        return new DoubleArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
      }
    } else {
      int[][] nativeOrdinals = new int[ordinals.size()][];
      for (int i = 0; i < nativeOrdinals.length; i++) {
        nativeOrdinals[i] = ordinals.get(i);
      }
      return new DoubleArrayAtomicFieldData.WithOrdinals(
          values.toArray(new double[values.size()]),
          reader.maxDoc(),
          Ordinals.Factories.createFromFlatOrdinals(
              nativeOrdinals, termOrd, fieldDataType.getSettings()));
    }
  }
Ejemplo n.º 10
0
  @Override
  public void execute(String[] args, PrintStream out) throws Exception {
    String field = null;
    String termVal = null;
    try {
      field = args[0];
    } catch (Exception e) {
      field = null;
    }

    if (field != null) {
      String[] parts = field.split(":");
      if (parts.length > 1) {
        field = parts[0];
        termVal = parts[1];
      }
    }

    if (field == null || termVal == null) {
      out.println("usage: field:term");
      out.flush();
      return;
    }

    IndexReader reader = ctx.getIndexReader();
    List<AtomicReaderContext> leaves = reader.leaves();
    int docBase = 0;
    int numPerPage = 20;
    for (AtomicReaderContext leaf : leaves) {
      AtomicReader atomicReader = leaf.reader();
      Terms terms = atomicReader.terms(field);
      if (terms == null) {
        continue;
      }
      boolean hasPositions = terms.hasPositions();
      if (terms != null && termVal != null) {
        TermsEnum te = terms.iterator(null);
        int count = 0;
        if (te.seekExact(new BytesRef(termVal), true)) {

          if (hasPositions) {
            DocsAndPositionsEnum iter = te.docsAndPositions(atomicReader.getLiveDocs(), null);
            int docid;
            while ((docid = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
              count++;
              out.print("docid: " + (docid + docBase) + ", freq: " + iter.freq() + ", ");
              for (int i = 0; i < iter.freq(); ++i) {
                out.print("pos " + i + ": " + iter.nextPosition());
                BytesRef payload = iter.getPayload();
                if (payload != null) {
                  out.print(",payload: " + payload);
                }
                out.print(";");
              }
              out.println();
              if (ctx.isInteractiveMode()) {
                if (count % numPerPage == 0) {
                  out.println("Ctrl-D to break");
                  int ch = System.in.read();
                  if (ch == -1) {
                    out.flush();
                    return;
                  }
                }
              }
            }
          } else {
            DocsEnum iter = te.docs(atomicReader.getLiveDocs(), null);

            int docid;
            while ((docid = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
              count++;
              out.println("docid: " + (docid + docBase));
              if (ctx.isInteractiveMode()) {
                if (count % numPerPage == 0) {
                  out.println("Ctrl-D to break");
                  int ch = System.in.read();
                  if (ch == -1) {
                    out.flush();
                    return;
                  }
                }
              }
            }
          }
        }
      }
      docBase += atomicReader.maxDoc();
    }
  }
  int resolveParentDocuments(
      TopDocs topDocs,
      SearchContext context,
      Recycler.V<ObjectObjectOpenHashMap<Object, ParentDoc[]>> parentDocs) {
    int parentHitsResolved = 0;
    Recycler.V<ObjectObjectOpenHashMap<Object, Recycler.V<IntObjectOpenHashMap<ParentDoc>>>>
        parentDocsPerReader =
            cacheRecycler.hashMap(context.searcher().getIndexReader().leaves().size());
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
      int readerIndex =
          ReaderUtil.subIndex(scoreDoc.doc, context.searcher().getIndexReader().leaves());
      AtomicReaderContext subContext =
          context.searcher().getIndexReader().leaves().get(readerIndex);
      int subDoc = scoreDoc.doc - subContext.docBase;

      // find the parent id
      HashedBytesArray parentId =
          context.idCache().reader(subContext.reader()).parentIdByDoc(parentType, subDoc);
      if (parentId == null) {
        // no parent found
        continue;
      }
      // now go over and find the parent doc Id and reader tuple
      for (AtomicReaderContext atomicReaderContext : context.searcher().getIndexReader().leaves()) {
        AtomicReader indexReader = atomicReaderContext.reader();
        int parentDocId = context.idCache().reader(indexReader).docById(parentType, parentId);
        Bits liveDocs = indexReader.getLiveDocs();
        if (parentDocId != -1 && (liveDocs == null || liveDocs.get(parentDocId))) {
          // we found a match, add it and break

          Recycler.V<IntObjectOpenHashMap<ParentDoc>> readerParentDocs =
              parentDocsPerReader.v().get(indexReader.getCoreCacheKey());
          if (readerParentDocs == null) {
            readerParentDocs = cacheRecycler.intObjectMap(indexReader.maxDoc());
            parentDocsPerReader.v().put(indexReader.getCoreCacheKey(), readerParentDocs);
          }

          ParentDoc parentDoc = readerParentDocs.v().get(parentDocId);
          if (parentDoc == null) {
            parentHitsResolved++; // we have a hit on a parent
            parentDoc = new ParentDoc();
            parentDoc.docId = parentDocId;
            parentDoc.count = 1;
            parentDoc.maxScore = scoreDoc.score;
            parentDoc.sumScores = scoreDoc.score;
            readerParentDocs.v().put(parentDocId, parentDoc);
          } else {
            parentDoc.count++;
            parentDoc.sumScores += scoreDoc.score;
            if (scoreDoc.score > parentDoc.maxScore) {
              parentDoc.maxScore = scoreDoc.score;
            }
          }
        }
      }
    }
    boolean[] states = parentDocsPerReader.v().allocated;
    Object[] keys = parentDocsPerReader.v().keys;
    Object[] values = parentDocsPerReader.v().values;
    for (int i = 0; i < states.length; i++) {
      if (states[i]) {
        Recycler.V<IntObjectOpenHashMap<ParentDoc>> value =
            (Recycler.V<IntObjectOpenHashMap<ParentDoc>>) values[i];
        ParentDoc[] _parentDocs = value.v().values().toArray(ParentDoc.class);
        Arrays.sort(_parentDocs, PARENT_DOC_COMP);
        parentDocs.v().put(keys[i], _parentDocs);
        Releasables.release(value);
      }
    }
    Releasables.release(parentDocsPerReader);
    return parentHitsResolved;
  }
Ejemplo n.º 12
0
  private IndexIterationContext createContext(
      int nDocs,
      RandomIndexWriter fromWriter,
      RandomIndexWriter toWriter,
      boolean multipleValuesPerDocument,
      boolean scoreDocsInOrder)
      throws IOException {
    IndexIterationContext context = new IndexIterationContext();
    int numRandomValues = nDocs / 2;
    context.randomUniqueValues = new String[numRandomValues];
    Set<String> trackSet = new HashSet<String>();
    context.randomFrom = new boolean[numRandomValues];
    for (int i = 0; i < numRandomValues; i++) {
      String uniqueRandomValue;
      do {
        uniqueRandomValue = _TestUtil.randomRealisticUnicodeString(random());
        //        uniqueRandomValue = _TestUtil.randomSimpleString(random);
      } while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
      // Generate unique values and empty strings aren't allowed.
      trackSet.add(uniqueRandomValue);
      context.randomFrom[i] = random().nextBoolean();
      context.randomUniqueValues[i] = uniqueRandomValue;
    }

    RandomDoc[] docs = new RandomDoc[nDocs];
    for (int i = 0; i < nDocs; i++) {
      String id = Integer.toString(i);
      int randomI = random().nextInt(context.randomUniqueValues.length);
      String value = context.randomUniqueValues[randomI];
      Document document = new Document();
      document.add(newTextField(random(), "id", id, Field.Store.NO));
      document.add(newTextField(random(), "value", value, Field.Store.NO));

      boolean from = context.randomFrom[randomI];
      int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1;
      docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
      for (int j = 0; j < numberOfLinkValues; j++) {
        String linkValue =
            context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)];
        docs[i].linkValues.add(linkValue);
        if (from) {
          if (!context.fromDocuments.containsKey(linkValue)) {
            context.fromDocuments.put(linkValue, new ArrayList<RandomDoc>());
          }
          if (!context.randomValueFromDocs.containsKey(value)) {
            context.randomValueFromDocs.put(value, new ArrayList<RandomDoc>());
          }

          context.fromDocuments.get(linkValue).add(docs[i]);
          context.randomValueFromDocs.get(value).add(docs[i]);
          document.add(newTextField(random(), "from", linkValue, Field.Store.NO));
        } else {
          if (!context.toDocuments.containsKey(linkValue)) {
            context.toDocuments.put(linkValue, new ArrayList<RandomDoc>());
          }
          if (!context.randomValueToDocs.containsKey(value)) {
            context.randomValueToDocs.put(value, new ArrayList<RandomDoc>());
          }

          context.toDocuments.get(linkValue).add(docs[i]);
          context.randomValueToDocs.get(value).add(docs[i]);
          document.add(newTextField(random(), "to", linkValue, Field.Store.NO));
        }
      }

      final RandomIndexWriter w;
      if (from) {
        w = fromWriter;
      } else {
        w = toWriter;
      }

      w.addDocument(document);
      if (random().nextInt(10) == 4) {
        w.commit();
      }
      if (VERBOSE) {
        System.out.println("Added document[" + docs[i].id + "]: " + document);
      }
    }

    // Pre-compute all possible hits for all unique random values. On top of this also compute all
    // possible score for
    // any ScoreMode.
    IndexSearcher fromSearcher = newSearcher(fromWriter.getReader());
    IndexSearcher toSearcher = newSearcher(toWriter.getReader());
    for (int i = 0; i < context.randomUniqueValues.length; i++) {
      String uniqueRandomValue = context.randomUniqueValues[i];
      final String fromField;
      final String toField;
      final Map<String, Map<Integer, JoinScore>> queryVals;
      if (context.randomFrom[i]) {
        fromField = "from";
        toField = "to";
        queryVals = context.fromHitsToJoinScore;
      } else {
        fromField = "to";
        toField = "from";
        queryVals = context.toHitsToJoinScore;
      }
      final Map<BytesRef, JoinScore> joinValueToJoinScores = new HashMap<BytesRef, JoinScore>();
      if (multipleValuesPerDocument) {
        fromSearcher.search(
            new TermQuery(new Term("value", uniqueRandomValue)),
            new Collector() {

              private Scorer scorer;
              private SortedSetDocValues docTermOrds;
              final BytesRef joinValue = new BytesRef();

              @Override
              public void collect(int doc) throws IOException {
                docTermOrds.setDocument(doc);
                long ord;
                while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                  docTermOrds.lookupOrd(ord, joinValue);
                  JoinScore joinScore = joinValueToJoinScores.get(joinValue);
                  if (joinScore == null) {
                    joinValueToJoinScores.put(
                        BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
                  }
                  joinScore.addScore(scorer.score());
                }
              }

              @Override
              public void setNextReader(AtomicReaderContext context) throws IOException {
                docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField);
              }

              @Override
              public void setScorer(Scorer scorer) {
                this.scorer = scorer;
              }

              @Override
              public boolean acceptsDocsOutOfOrder() {
                return false;
              }
            });
      } else {
        fromSearcher.search(
            new TermQuery(new Term("value", uniqueRandomValue)),
            new Collector() {

              private Scorer scorer;
              private BinaryDocValues terms;
              private Bits docsWithField;
              private final BytesRef spare = new BytesRef();

              @Override
              public void collect(int doc) throws IOException {
                terms.get(doc, spare);
                BytesRef joinValue = spare;
                if (joinValue.length == 0 && !docsWithField.get(doc)) {
                  return;
                }

                JoinScore joinScore = joinValueToJoinScores.get(joinValue);
                if (joinScore == null) {
                  joinValueToJoinScores.put(
                      BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
                }
                joinScore.addScore(scorer.score());
              }

              @Override
              public void setNextReader(AtomicReaderContext context) throws IOException {
                terms = FieldCache.DEFAULT.getTerms(context.reader(), fromField, true);
                docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), fromField);
              }

              @Override
              public void setScorer(Scorer scorer) {
                this.scorer = scorer;
              }

              @Override
              public boolean acceptsDocsOutOfOrder() {
                return false;
              }
            });
      }

      final Map<Integer, JoinScore> docToJoinScore = new HashMap<Integer, JoinScore>();
      if (multipleValuesPerDocument) {
        if (scoreDocsInOrder) {
          AtomicReader slowCompositeReader =
              SlowCompositeReaderWrapper.wrap(toSearcher.getIndexReader());
          Terms terms = slowCompositeReader.terms(toField);
          if (terms != null) {
            DocsEnum docsEnum = null;
            TermsEnum termsEnum = null;
            SortedSet<BytesRef> joinValues =
                new TreeSet<BytesRef>(BytesRef.getUTF8SortedAsUnicodeComparator());
            joinValues.addAll(joinValueToJoinScores.keySet());
            for (BytesRef joinValue : joinValues) {
              termsEnum = terms.iterator(termsEnum);
              if (termsEnum.seekExact(joinValue)) {
                docsEnum =
                    termsEnum.docs(slowCompositeReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
                JoinScore joinScore = joinValueToJoinScores.get(joinValue);

                for (int doc = docsEnum.nextDoc();
                    doc != DocIdSetIterator.NO_MORE_DOCS;
                    doc = docsEnum.nextDoc()) {
                  // First encountered join value determines the score.
                  // Something to keep in mind for many-to-many relations.
                  if (!docToJoinScore.containsKey(doc)) {
                    docToJoinScore.put(doc, joinScore);
                  }
                }
              }
            }
          }
        } else {
          toSearcher.search(
              new MatchAllDocsQuery(),
              new Collector() {

                private SortedSetDocValues docTermOrds;
                private final BytesRef scratch = new BytesRef();
                private int docBase;

                @Override
                public void collect(int doc) throws IOException {
                  docTermOrds.setDocument(doc);
                  long ord;
                  while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                    docTermOrds.lookupOrd(ord, scratch);
                    JoinScore joinScore = joinValueToJoinScores.get(scratch);
                    if (joinScore == null) {
                      continue;
                    }
                    Integer basedDoc = docBase + doc;
                    // First encountered join value determines the score.
                    // Something to keep in mind for many-to-many relations.
                    if (!docToJoinScore.containsKey(basedDoc)) {
                      docToJoinScore.put(basedDoc, joinScore);
                    }
                  }
                }

                @Override
                public void setNextReader(AtomicReaderContext context) throws IOException {
                  docBase = context.docBase;
                  docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField);
                }

                @Override
                public boolean acceptsDocsOutOfOrder() {
                  return false;
                }

                @Override
                public void setScorer(Scorer scorer) {}
              });
        }
      } else {
        toSearcher.search(
            new MatchAllDocsQuery(),
            new Collector() {

              private BinaryDocValues terms;
              private int docBase;
              private final BytesRef spare = new BytesRef();

              @Override
              public void collect(int doc) {
                terms.get(doc, spare);
                JoinScore joinScore = joinValueToJoinScores.get(spare);
                if (joinScore == null) {
                  return;
                }
                docToJoinScore.put(docBase + doc, joinScore);
              }

              @Override
              public void setNextReader(AtomicReaderContext context) throws IOException {
                terms = FieldCache.DEFAULT.getTerms(context.reader(), toField, false);
                docBase = context.docBase;
              }

              @Override
              public boolean acceptsDocsOutOfOrder() {
                return false;
              }

              @Override
              public void setScorer(Scorer scorer) {}
            });
      }
      queryVals.put(uniqueRandomValue, docToJoinScore);
    }

    fromSearcher.getIndexReader().close();
    toSearcher.getIndexReader().close();

    return context;
  }
  @Test
  public void testRandom() throws Exception {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    int numUniqueChildValues = 1 + random().nextInt(TEST_NIGHTLY ? 6000 : 600);
    String[] childValues = new String[numUniqueChildValues];
    for (int i = 0; i < numUniqueChildValues; i++) {
      childValues[i] = Integer.toString(i);
    }

    int childDocId = 0;
    int numParentDocs = 1 + random().nextInt(TEST_NIGHTLY ? 20000 : 1000);
    ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds =
        new ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>>();
    for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) {
      boolean markParentAsDeleted = rarely();
      String parent = Integer.toString(parentDocId);
      Document document = new Document();
      document.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
      if (markParentAsDeleted) {
        document.add(new StringField("delete", "me", Field.Store.NO));
      }
      indexWriter.addDocument(document);

      int numChildDocs = random().nextInt(TEST_NIGHTLY ? 100 : 25);
      for (int i = 0; i < numChildDocs; i++) {
        boolean markChildAsDeleted = rarely();
        String childValue = childValues[random().nextInt(childValues.length)];

        document = new Document();
        document.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        document.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        document.add(new StringField("field1", childValue, Field.Store.NO));
        if (markChildAsDeleted) {
          document.add(new StringField("delete", "me", Field.Store.NO));
        }
        indexWriter.addDocument(document);

        if (!markChildAsDeleted) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores;
          if (childValueToParentIds.containsKey(childValue)) {
            parentIdToChildScores = childValueToParentIds.lget();
          } else {
            childValueToParentIds.put(
                childValue, parentIdToChildScores = new TreeMap<String, FloatArrayList>());
          }
          if (!markParentAsDeleted) {
            FloatArrayList childScores = parentIdToChildScores.get(parent);
            if (childScores == null) {
              parentIdToChildScores.put(parent, childScores = new FloatArrayList());
            }
            childScores.add(1f);
          }
        }
      }
    }

    // Delete docs that are marked to be deleted.
    indexWriter.deleteDocuments(new Term("delete", "me"));

    indexWriter.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    Engine.Searcher engineSearcher =
        new Engine.SimpleSearcher(ChildrenQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) SearchContext.current())
        .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));

    TermFilter parentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent"));
    for (String childValue : childValues) {
      Query childQuery = new ConstantScoreQuery(new TermQuery(new Term("field1", childValue)));
      int shortCircuitParentDocSet = random().nextInt(numParentDocs);
      ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)];
      Query query =
          new ChildrenQuery(
              "parent", "child", parentFilter, childQuery, scoreType, shortCircuitParentDocSet);
      BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
      int numHits = 1 + random().nextInt(25);
      TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits, false);
      searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector));
      FixedBitSet actualResult = collector.getResult();

      FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
      MockScorer mockScorer = new MockScorer(scoreType);
      TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits, false);
      expectedTopDocsCollector.setScorer(mockScorer);
      if (childValueToParentIds.containsKey(childValue)) {
        AtomicReader slowAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader);
        Terms terms = slowAtomicReader.terms(UidFieldMapper.NAME);
        if (terms != null) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget();
          TermsEnum termsEnum = terms.iterator(null);
          DocsEnum docsEnum = null;
          for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) {
            TermsEnum.SeekStatus seekStatus =
                termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey()));
            if (seekStatus == TermsEnum.SeekStatus.FOUND) {
              docsEnum =
                  termsEnum.docs(slowAtomicReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
              expectedResult.set(docsEnum.nextDoc());
              mockScorer.scores = entry.getValue();
              expectedTopDocsCollector.collect(docsEnum.docID());
            } else if (seekStatus == TermsEnum.SeekStatus.END) {
              break;
            }
          }
        }
      }

      assertBitSet(actualResult, expectedResult, searcher);
      assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs());
    }

    indexReader.close();
    directory.close();
  }