@Test
  public void testBytes() throws Exception {
    List<Integer> docs = Arrays.asList(1, 5, 7);

    ObjectOpenHashSet<BytesRef> hTerms = new ObjectOpenHashSet<BytesRef>();
    List<BytesRef> cTerms = new ArrayList<BytesRef>(docs.size());
    for (int i = 0; i < docs.size(); i++) {
      BytesRef term = new BytesRef("str" + docs.get(i));
      hTerms.add(term);
      cTerms.add(term);
    }

    FieldDataTermsFilter hFilter = FieldDataTermsFilter.newBytes(getFieldData(strMapper), hTerms);

    int size = reader.maxDoc();
    FixedBitSet result = new FixedBitSet(size);

    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));
    result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
    assertThat(result.cardinality(), equalTo(docs.size()));
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertThat(result.get(i), equalTo(docs.contains(i)));
    }

    // filter from mapper
    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));
    result.or(
        strMapper
            .termsFilter(ifdService, cTerms, null)
            .getDocIdSet(reader.getContext(), reader.getLiveDocs())
            .iterator());
    assertThat(result.cardinality(), equalTo(docs.size()));
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertThat(result.get(i), equalTo(docs.contains(i)));
    }

    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));

    // filter on a numeric field using BytesRef terms
    // should not match any docs
    hFilter = FieldDataTermsFilter.newBytes(getFieldData(lngMapper), hTerms);
    result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
    assertThat(result.cardinality(), equalTo(0));

    // filter on a numeric field using BytesRef terms
    // should not match any docs
    hFilter = FieldDataTermsFilter.newBytes(getFieldData(dblMapper), hTerms);
    result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
    assertThat(result.cardinality(), equalTo(0));
  }
Ejemplo n.º 2
0
 /** Creates a {@link DocMap} instance appropriate for this reader. */
 public static DocMap build(AtomicReader reader) {
   final int maxDoc = reader.maxDoc();
   if (!reader.hasDeletions()) {
     return new NoDelDocMap(maxDoc);
   }
   final Bits liveDocs = reader.getLiveDocs();
   return build(maxDoc, liveDocs);
 }
Ejemplo n.º 3
0
  // indexes Integer.MAX_VALUE docs with a fixed binary field
  public void testFixedSorted() throws Exception {
    BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BFixedSorted"));
    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }

    IndexWriter w =
        new IndexWriter(
            dir,
            new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                .setRAMBufferSizeMB(256.0)
                .setMergeScheduler(new ConcurrentMergeScheduler())
                .setMergePolicy(newLogMergePolicy(false, 10))
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));

    Document doc = new Document();
    byte bytes[] = new byte[2];
    BytesRef data = new BytesRef(bytes);
    SortedDocValuesField dvField = new SortedDocValuesField("dv", data);
    doc.add(dvField);

    for (int i = 0; i < Integer.MAX_VALUE; i++) {
      bytes[0] = (byte) (i >> 8);
      bytes[1] = (byte) i;
      w.addDocument(doc);
      if (i % 100000 == 0) {
        System.out.println("indexed: " + i);
        System.out.flush();
      }
    }

    w.forceMerge(1);
    w.close();

    System.out.println("verifying...");
    System.out.flush();

    DirectoryReader r = DirectoryReader.open(dir);
    int expectedValue = 0;
    for (AtomicReaderContext context : r.leaves()) {
      AtomicReader reader = context.reader();
      BytesRef scratch = new BytesRef();
      BinaryDocValues dv = reader.getSortedDocValues("dv");
      for (int i = 0; i < reader.maxDoc(); i++) {
        bytes[0] = (byte) (expectedValue >> 8);
        bytes[1] = (byte) expectedValue;
        dv.get(i, scratch);
        assertEquals(data, scratch);
        expectedValue++;
      }
    }

    r.close();
    dir.close();
  }
  @Test
  public void testDoubles() throws Exception {
    List<Integer> docs = Arrays.asList(1, 5, 7);

    DoubleOpenHashSet hTerms = new DoubleOpenHashSet();
    List<Double> cTerms = new ArrayList<Double>(docs.size());
    for (int i = 0; i < docs.size(); i++) {
      double term = Double.valueOf(docs.get(i));
      hTerms.add(term);
      cTerms.add(term);
    }

    FieldDataTermsFilter hFilter = FieldDataTermsFilter.newDoubles(getFieldData(dblMapper), hTerms);

    int size = reader.maxDoc();
    FixedBitSet result = new FixedBitSet(size);

    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));
    result.or(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()).iterator());
    assertThat(result.cardinality(), equalTo(docs.size()));
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertThat(result.get(i), equalTo(docs.contains(i)));
    }

    // filter from mapper
    result.clear(0, size);
    assertThat(result.cardinality(), equalTo(0));
    result.or(
        dblMapper
            .termsFilter(ifdService, cTerms, null)
            .getDocIdSet(reader.getContext(), reader.getLiveDocs())
            .iterator());
    assertThat(result.cardinality(), equalTo(docs.size()));
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertThat(result.get(i), equalTo(docs.contains(i)));
    }

    hFilter = FieldDataTermsFilter.newDoubles(getFieldData(lngMapper), hTerms);
    assertNull(hFilter.getDocIdSet(reader.getContext(), reader.getLiveDocs()));
  }
Ejemplo n.º 5
0
 private static void printDocs(DirectoryReader r) throws Throwable {
   for (AtomicReaderContext ctx : r.leaves()) {
     // TODO: improve this
     AtomicReader sub = ctx.reader();
     Bits liveDocs = sub.getLiveDocs();
     System.out.println("  " + ((SegmentReader) sub).getSegmentInfo());
     for (int docID = 0; docID < sub.maxDoc(); docID++) {
       StoredDocument doc = sub.document(docID);
       if (liveDocs == null || liveDocs.get(docID)) {
         System.out.println("    docID=" + docID + " id:" + doc.get("id"));
       } else {
         System.out.println("    DEL docID=" + docID + " id:" + doc.get("id"));
       }
     }
   }
 }
    public static TermsEnum filter(
        TermsEnum toFilter, Terms terms, AtomicReader reader, Settings settings)
        throws IOException {
      int docCount = terms.getDocCount();
      if (docCount == -1) {
        docCount = reader.maxDoc();
      }
      final double minFrequency = settings.getAsDouble("min", 0d);
      final double maxFrequency = settings.getAsDouble("max", docCount + 1d);
      final double minSegmentSize = settings.getAsInt("min_segment_size", 0);
      if (minSegmentSize < docCount) {
        final int minFreq =
            minFrequency >= 1.0 ? (int) minFrequency : (int) (docCount * minFrequency);
        final int maxFreq =
            maxFrequency >= 1.0 ? (int) maxFrequency : (int) (docCount * maxFrequency);
        assert minFreq < maxFreq;
        return new FrequencyFilter(toFilter, minFreq, maxFreq);
      }

      return toFilter;
    }
  @Override
  public DoubleArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
    AtomicReader reader = context.reader();

    Terms terms = reader.terms(getFieldNames().indexName());
    if (terms == null) {
      return DoubleArrayAtomicFieldData.EMPTY;
    }

    // TODO: how can we guess the number of terms? numerics end up creating more terms per value...
    final TDoubleArrayList values = new TDoubleArrayList();
    ArrayList<int[]> ordinals = new ArrayList<int[]>();
    int[] idx = new int[reader.maxDoc()];
    ordinals.add(new int[reader.maxDoc()]);

    values.add(0); // first "t" indicates null value
    int termOrd = 1; // current term number

    TermsEnum termsEnum = terms.iterator(null);
    try {
      DocsEnum docsEnum = null;
      for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
        values.add(FieldCache.NUMERIC_UTILS_DOUBLE_PARSER.parseDouble(term));
        docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0);
        for (int docId = docsEnum.nextDoc();
            docId != DocsEnum.NO_MORE_DOCS;
            docId = docsEnum.nextDoc()) {
          int[] ordinal;
          if (idx[docId] >= ordinals.size()) {
            ordinal = new int[reader.maxDoc()];
            ordinals.add(ordinal);
          } else {
            ordinal = ordinals.get(idx[docId]);
          }
          ordinal[docId] = termOrd;
          idx[docId]++;
        }
        termOrd++;
      }
    } catch (RuntimeException e) {
      if (e.getClass().getName().endsWith("StopFillCacheException")) {
        // all is well, in case numeric parsers are used.
      } else {
        throw e;
      }
    }

    if (ordinals.size() == 1) {
      int[] nativeOrdinals = ordinals.get(0);
      FixedBitSet set = new FixedBitSet(reader.maxDoc());
      double[] sValues = new double[reader.maxDoc()];
      boolean allHaveValue = true;
      for (int i = 0; i < nativeOrdinals.length; i++) {
        int nativeOrdinal = nativeOrdinals[i];
        if (nativeOrdinal == 0) {
          allHaveValue = false;
        } else {
          set.set(i);
          sValues[i] = values.get(nativeOrdinal);
        }
      }
      if (allHaveValue) {
        return new DoubleArrayAtomicFieldData.Single(sValues, reader.maxDoc());
      } else {
        return new DoubleArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
      }
    } else {
      int[][] nativeOrdinals = new int[ordinals.size()][];
      for (int i = 0; i < nativeOrdinals.length; i++) {
        nativeOrdinals[i] = ordinals.get(i);
      }
      return new DoubleArrayAtomicFieldData.WithOrdinals(
          values.toArray(new double[values.size()]),
          reader.maxDoc(),
          Ordinals.Factories.createFromFlatOrdinals(
              nativeOrdinals, termOrd, fieldDataType.getSettings()));
    }
  }
  int resolveParentDocuments(
      TopDocs topDocs,
      SearchContext context,
      Recycler.V<ObjectObjectOpenHashMap<Object, ParentDoc[]>> parentDocs) {
    int parentHitsResolved = 0;
    Recycler.V<ObjectObjectOpenHashMap<Object, Recycler.V<IntObjectOpenHashMap<ParentDoc>>>>
        parentDocsPerReader =
            cacheRecycler.hashMap(context.searcher().getIndexReader().leaves().size());
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
      int readerIndex =
          ReaderUtil.subIndex(scoreDoc.doc, context.searcher().getIndexReader().leaves());
      AtomicReaderContext subContext =
          context.searcher().getIndexReader().leaves().get(readerIndex);
      int subDoc = scoreDoc.doc - subContext.docBase;

      // find the parent id
      HashedBytesArray parentId =
          context.idCache().reader(subContext.reader()).parentIdByDoc(parentType, subDoc);
      if (parentId == null) {
        // no parent found
        continue;
      }
      // now go over and find the parent doc Id and reader tuple
      for (AtomicReaderContext atomicReaderContext : context.searcher().getIndexReader().leaves()) {
        AtomicReader indexReader = atomicReaderContext.reader();
        int parentDocId = context.idCache().reader(indexReader).docById(parentType, parentId);
        Bits liveDocs = indexReader.getLiveDocs();
        if (parentDocId != -1 && (liveDocs == null || liveDocs.get(parentDocId))) {
          // we found a match, add it and break

          Recycler.V<IntObjectOpenHashMap<ParentDoc>> readerParentDocs =
              parentDocsPerReader.v().get(indexReader.getCoreCacheKey());
          if (readerParentDocs == null) {
            readerParentDocs = cacheRecycler.intObjectMap(indexReader.maxDoc());
            parentDocsPerReader.v().put(indexReader.getCoreCacheKey(), readerParentDocs);
          }

          ParentDoc parentDoc = readerParentDocs.v().get(parentDocId);
          if (parentDoc == null) {
            parentHitsResolved++; // we have a hit on a parent
            parentDoc = new ParentDoc();
            parentDoc.docId = parentDocId;
            parentDoc.count = 1;
            parentDoc.maxScore = scoreDoc.score;
            parentDoc.sumScores = scoreDoc.score;
            readerParentDocs.v().put(parentDocId, parentDoc);
          } else {
            parentDoc.count++;
            parentDoc.sumScores += scoreDoc.score;
            if (scoreDoc.score > parentDoc.maxScore) {
              parentDoc.maxScore = scoreDoc.score;
            }
          }
        }
      }
    }
    boolean[] states = parentDocsPerReader.v().allocated;
    Object[] keys = parentDocsPerReader.v().keys;
    Object[] values = parentDocsPerReader.v().values;
    for (int i = 0; i < states.length; i++) {
      if (states[i]) {
        Recycler.V<IntObjectOpenHashMap<ParentDoc>> value =
            (Recycler.V<IntObjectOpenHashMap<ParentDoc>>) values[i];
        ParentDoc[] _parentDocs = value.v().values().toArray(ParentDoc.class);
        Arrays.sort(_parentDocs, PARENT_DOC_COMP);
        parentDocs.v().put(keys[i], _parentDocs);
        Releasables.release(value);
      }
    }
    Releasables.release(parentDocsPerReader);
    return parentHitsResolved;
  }
Ejemplo n.º 9
0
  private void verify(AtomicReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef)
      throws Exception {

    final DocTermOrds dto =
        new DocTermOrds(
            r,
            r.getLiveDocs(),
            "field",
            prefixRef,
            Integer.MAX_VALUE,
            _TestUtil.nextInt(random(), 2, 10));

    final FieldCache.Ints docIDToID = FieldCache.DEFAULT.getInts(r, "id", false);
    /*
      for(int docID=0;docID<subR.maxDoc();docID++) {
      System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
      }
    */

    if (VERBOSE) {
      System.out.println(
          "TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.utf8ToString()));
      System.out.println("TEST: all TERMS:");
      TermsEnum allTE = MultiFields.getTerms(r, "field").iterator(null);
      int ord = 0;
      while (allTE.next() != null) {
        System.out.println("  ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
      }
    }

    // final TermsEnum te = subR.fields().terms("field").iterator();
    final TermsEnum te = dto.getOrdTermsEnum(r);
    if (dto.numTerms() == 0) {
      if (prefixRef == null) {
        assertNull(MultiFields.getTerms(r, "field"));
      } else {
        Terms terms = MultiFields.getTerms(r, "field");
        if (terms != null) {
          TermsEnum termsEnum = terms.iterator(null);
          TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef, false);
          if (result != TermsEnum.SeekStatus.END) {
            assertFalse(
                "term="
                    + termsEnum.term().utf8ToString()
                    + " matches prefix="
                    + prefixRef.utf8ToString(),
                StringHelper.startsWith(termsEnum.term(), prefixRef));
          } else {
            // ok
          }
        } else {
          // ok
        }
      }
      return;
    }

    if (VERBOSE) {
      System.out.println("TEST: TERMS:");
      te.seekExact(0);
      while (true) {
        System.out.println("  ord=" + te.ord() + " term=" + te.term().utf8ToString());
        if (te.next() == null) {
          break;
        }
      }
    }

    SortedSetDocValues iter = dto.iterator(r);
    for (int docID = 0; docID < r.maxDoc(); docID++) {
      if (VERBOSE) {
        System.out.println(
            "TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.get(docID) + ")");
      }
      iter.setDocument(docID);
      final int[] answers = idToOrds[docIDToID.get(docID)];
      int upto = 0;
      long ord;
      while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
        te.seekExact(ord);
        final BytesRef expected = termsArray[answers[upto++]];
        if (VERBOSE) {
          System.out.println(
              "  exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
        }
        assertEquals(
            "expected="
                + expected.utf8ToString()
                + " actual="
                + te.term().utf8ToString()
                + " ord="
                + ord,
            expected,
            te.term());
      }
      assertEquals(answers.length, upto);
    }
  }