Java LeafReader.maxDoc 예제들, org.apache.lucene.index.LeafReader.maxDoc Java 예제들

예제 #1

0

파일 보기

파일: TestReuseDocsEnum.java 프로젝트: sail-umkc/Examples

  public void testReuseDocsEnumNoReuse() throws IOException {
    Directory dir = newDirectory();
    Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
    RandomIndexWriter writer =
        new RandomIndexWriter(
            random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setCodec(cp));
    int numdocs = atLeast(20);
    createRandomIndex(numdocs, writer, random());
    writer.commit();

    DirectoryReader open = DirectoryReader.open(dir);
    for (LeafReaderContext ctx : open.leaves()) {
      LeafReader indexReader = ctx.reader();
      Terms terms = indexReader.terms("body");
      TermsEnum iterator = terms.iterator();
      IdentityHashMap<PostingsEnum, Boolean> enums = new IdentityHashMap<>();
      MatchNoBits bits = new Bits.MatchNoBits(indexReader.maxDoc());
      while ((iterator.next()) != null) {
        PostingsEnum docs =
            iterator.postings(
                random().nextBoolean() ? bits : new Bits.MatchNoBits(indexReader.maxDoc()),
                null,
                random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE);
        enums.put(docs, true);
      }

      assertEquals(terms.size(), enums.size());
    }
    writer.commit();
    IOUtils.close(writer, open, dir);
  }

예제 #2

0

파일 보기

파일: ShapeFieldCacheProvider.java 프로젝트: Edwin-Ran/Lucene_src_learning

  public synchronized ShapeFieldCache<T> getCache(LeafReader reader) throws IOException {
    ShapeFieldCache<T> idx = sidx.get(reader);
    if (idx != null) {
      return idx;
    }
    long startTime = System.currentTimeMillis();

    log.fine("Building Cache [" + reader.maxDoc() + "]");
    idx = new ShapeFieldCache<>(reader.maxDoc(), defaultSize);
    int count = 0;
    DocsEnum docs = null;
    Terms terms = reader.terms(shapeField);
    TermsEnum te = null;
    if (terms != null) {
      te = terms.iterator(te);
      BytesRef term = te.next();
      while (term != null) {
        T shape = readShape(term);
        if (shape != null) {
          docs = te.docs(null, docs, DocsEnum.FLAG_NONE);
          Integer docid = docs.nextDoc();
          while (docid != DocIdSetIterator.NO_MORE_DOCS) {
            idx.add(docid, shape);
            docid = docs.nextDoc();
            count++;
          }
        }
        term = te.next();
      }
    }
    sidx.put(reader, idx);
    long elapsed = System.currentTimeMillis() - startTime;
    log.fine("Cached: [" + count + " in " + elapsed + "ms] " + idx);
    return idx;
  }

예제 #3

0

파일 보기

파일: Sorter.java 프로젝트: markrmiller/lucene-solr-svn2git

  /**
   * Returns a mapping from the old document ID to its new location in the sorted index.
   * Implementations can use the auxiliary {@link #sort(int, DocComparator)} to compute the
   * old-to-new permutation given a list of documents and their corresponding values.
   *
   * <p>A return value of <tt>null</tt> is allowed and means that <code>reader</code> is already
   * sorted.
   *
   * <p><b>NOTE:</b> deleted documents are expected to appear in the mapping as well, they will
   * however be marked as deleted in the sorted view.
   */
  DocMap sort(LeafReader reader) throws IOException {
    SortField fields[] = sort.getSort();
    final int reverseMul[] = new int[fields.length];
    final LeafFieldComparator comparators[] = new LeafFieldComparator[fields.length];

    for (int i = 0; i < fields.length; i++) {
      reverseMul[i] = fields[i].getReverse() ? -1 : 1;
      comparators[i] = fields[i].getComparator(1, i).getLeafComparator(reader.getContext());
      comparators[i].setScorer(FAKESCORER);
    }
    final DocComparator comparator =
        new DocComparator() {
          @Override
          public int compare(int docID1, int docID2) {
            try {
              for (int i = 0; i < comparators.length; i++) {
                // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
                // the segments are always the same here...
                comparators[i].copy(0, docID1);
                comparators[i].setBottom(0);
                int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
                if (comp != 0) {
                  return comp;
                }
              }
              return Integer.compare(docID1, docID2); // docid order tiebreak
            } catch (IOException e) {
              throw new RuntimeException(e);
            }
          }
        };
    return sort(reader.maxDoc(), comparator);
  }

예제 #4

0

파일 보기

파일: AssertingLeafReader.java 프로젝트: uknth/lucene-solr

  public AssertingLeafReader(LeafReader in) {
    super(in);
    // check some basic reader sanity
    assert in.maxDoc() >= 0;
    assert in.numDocs() <= in.maxDoc();
    assert in.numDeletedDocs() + in.numDocs() == in.maxDoc();
    assert !in.hasDeletions() || in.numDeletedDocs() > 0 && in.numDocs() < in.maxDoc();

    addCoreClosedListener(
        new CoreClosedListener() {
          @Override
          public void onClose(Object ownerCoreCacheKey) throws IOException {
            final Object expectedKey = getCoreCacheKey();
            assert expectedKey == ownerCoreCacheKey
                : "Core closed listener called on a different key "
                    + expectedKey
                    + " <> "
                    + ownerCoreCacheKey;
          }
        });
  }

예제 #5

0

파일 보기

파일: ESIndexLevelReplicationTestCase.java 프로젝트: yaauie/elasticsearch

 private Set<Uid> getShardDocUIDs(final IndexShard shard) throws IOException {
   shard.refresh("get_uids");
   try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
     Set<Uid> ids = new HashSet<>();
     for (LeafReaderContext leafContext : searcher.reader().leaves()) {
       LeafReader reader = leafContext.reader();
       Bits liveDocs = reader.getLiveDocs();
       for (int i = 0; i < reader.maxDoc(); i++) {
         if (liveDocs == null || liveDocs.get(i)) {
           Document uuid = reader.document(i, Collections.singleton(UidFieldMapper.NAME));
           ids.add(Uid.createUid(uuid.get(UidFieldMapper.NAME)));
         }
       }
     }
     return ids;
   }
 }

예제 #6

0

파일 보기

파일: TestSimilarityProvider.java 프로젝트: PATRIC3/p3_solr

  public void testBasics() throws Exception {
    // sanity check of norms writer
    // TODO: generalize
    LeafReader slow = SlowCompositeReaderWrapper.wrap(reader);
    NumericDocValues fooNorms = slow.getNormValues("foo");
    NumericDocValues barNorms = slow.getNormValues("bar");
    for (int i = 0; i < slow.maxDoc(); i++) {
      assertFalse(fooNorms.get(i) == barNorms.get(i));
    }

    // sanity check of searching
    TopDocs foodocs = searcher.search(new TermQuery(new Term("foo", "brown")), 10);
    assertTrue(foodocs.totalHits > 0);
    TopDocs bardocs = searcher.search(new TermQuery(new Term("bar", "brown")), 10);
    assertTrue(bardocs.totalHits > 0);
    assertTrue(foodocs.scoreDocs[0].score < bardocs.scoreDocs[0].score);
  }

예제 #7

0

파일 보기

파일: DocTermOrds.java 프로젝트: RainingWang/lucene-solr

  /** Call this only once (if you subclass!) */
  protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix)
      throws IOException {
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (checkForDocValues && info != null && info.getDocValuesType() != DocValuesType.NONE) {
      throw new IllegalStateException(
          "Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    }
    // System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
    final long startTime = System.nanoTime();
    prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix);

    final int maxDoc = reader.maxDoc();
    final int[] index =
        new int
            [maxDoc]; // immediate term numbers, or the index into the byte[] representing the last
    // number
    final int[] lastTerm = new int[maxDoc]; // last term we saw for this document
    final byte[][] bytes =
        new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)

    final Terms terms = reader.terms(field);
    if (terms == null) {
      // No terms
      return;
    }

    final TermsEnum te = terms.iterator();
    final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef();
    // System.out.println("seekStart=" + seekStart.utf8ToString());
    if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) {
      // No terms match
      return;
    }

    // For our "term index wrapper"
    final List<BytesRef> indexedTerms = new ArrayList<>();
    final PagedBytes indexedTermsBytes = new PagedBytes(15);

    // we need a minimum of 9 bytes, but round up to 12 since the space would
    // be wasted with most allocators anyway.
    byte[] tempArr = new byte[12];

    //
    // enumerate all terms, and build an intermediate form of the un-inverted field.
    //
    // During this intermediate form, every document has a (potential) byte[]
    // and the int[maxDoc()] array either contains the termNumber list directly
    // or the *end* offset of the termNumber list in its byte array (for faster
    // appending and faster creation of the final form).
    //
    // idea... if things are too large while building, we could do a range of docs
    // at a time (but it would be a fair amount slower to build)
    // could also do ranges in parallel to take advantage of multiple CPUs

    // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
    // values.  This requires going over the field first to find the most
    // frequent terms ahead of time.

    int termNum = 0;
    postingsEnum = null;

    // Loop begins with te positioned to first term (we call
    // seek above):
    for (; ; ) {
      final BytesRef t = te.term();
      if (t == null || (termPrefix != null && !StringHelper.startsWith(t, termPrefix))) {
        break;
      }
      // System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);

      visitTerm(te, termNum);

      if ((termNum & indexIntervalMask) == 0) {
        // Index this term
        sizeOfIndexedStrings += t.length;
        BytesRef indexedTerm = new BytesRef();
        indexedTermsBytes.copy(t, indexedTerm);
        // TODO: really should 1) strip off useless suffix,
        // and 2) use FST not array/PagedBytes
        indexedTerms.add(indexedTerm);
      }

      final int df = te.docFreq();
      if (df <= maxTermDocFreq) {

        postingsEnum = te.postings(postingsEnum, PostingsEnum.NONE);

        // dF, but takes deletions into account
        int actualDF = 0;

        for (; ; ) {
          int doc = postingsEnum.nextDoc();
          if (doc == DocIdSetIterator.NO_MORE_DOCS) {
            break;
          }
          // System.out.println("  chunk=" + chunk + " docs");

          actualDF++;
          termInstances++;

          // System.out.println("    docID=" + doc);
          // add TNUM_OFFSET to the term number to make room for special reserved values:
          // 0 (end term) and 1 (index into byte array follows)
          int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
          lastTerm[doc] = termNum;
          int val = index[doc];

          if ((val & 0xff) == 1) {
            // index into byte array (actually the end of
            // the doc-specific byte[] when building)
            int pos = val >>> 8;
            int ilen = vIntSize(delta);
            byte[] arr = bytes[doc];
            int newend = pos + ilen;
            if (newend > arr.length) {
              // We avoid a doubling strategy to lower memory usage.
              // this faceting method isn't for docs with many terms.
              // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit
              // boundary.
              // TODO: figure out what array lengths we can round up to w/o actually using more
              // memory
              // (how much space does a byte[] take up?  Is data preceded by a 32 bit length only?
              // It should be safe to round up to the nearest 32 bits in any case.
              int newLen = (newend + 3) & 0xfffffffc; // 4 byte alignment
              byte[] newarr = new byte[newLen];
              System.arraycopy(arr, 0, newarr, 0, pos);
              arr = newarr;
              bytes[doc] = newarr;
            }
            pos = writeInt(delta, arr, pos);
            index[doc] = (pos << 8) | 1; // update pointer to end index in byte[]
          } else {
            // OK, this int has data in it... find the end (a zero starting byte - not
            // part of another number, hence not following a byte with the high bit set).
            int ipos;
            if (val == 0) {
              ipos = 0;
            } else if ((val & 0x0000ff80) == 0) {
              ipos = 1;
            } else if ((val & 0x00ff8000) == 0) {
              ipos = 2;
            } else if ((val & 0xff800000) == 0) {
              ipos = 3;
            } else {
              ipos = 4;
            }

            // System.out.println("      ipos=" + ipos);

            int endPos = writeInt(delta, tempArr, ipos);
            // System.out.println("      endpos=" + endPos);
            if (endPos <= 4) {
              // System.out.println("      fits!");
              // value will fit in the integer... move bytes back
              for (int j = ipos; j < endPos; j++) {
                val |= (tempArr[j] & 0xff) << (j << 3);
              }
              index[doc] = val;
            } else {
              // value won't fit... move integer into byte[]
              for (int j = 0; j < ipos; j++) {
                tempArr[j] = (byte) val;
                val >>>= 8;
              }
              // point at the end index in the byte[]
              index[doc] = (endPos << 8) | 1;
              bytes[doc] = tempArr;
              tempArr = new byte[12];
            }
          }
        }
        setActualDocFreq(termNum, actualDF);
      }

      termNum++;
      if (te.next() == null) {
        break;
      }
    }

    numTermsInField = termNum;

    long midPoint = System.nanoTime();

    if (termInstances == 0) {
      // we didn't invert anything
      // lower memory consumption.
      tnums = null;
    } else {

      this.index = index;

      //
      // transform intermediate form into the final form, building a single byte[]
      // at a time, and releasing the intermediate byte[]s as we go to avoid
      // increasing the memory footprint.
      //

      for (int pass = 0; pass < 256; pass++) {
        byte[] target = tnums[pass];
        int pos = 0; // end in target;
        if (target != null) {
          pos = target.length;
        } else {
          target = new byte[4096];
        }

        // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
        // where pp is the pass (which array we are building), and xx is all values.
        // each pass shares the same byte[] for termNumber lists.
        for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24)) {
          int lim = Math.min(docbase + (1 << 16), maxDoc);
          for (int doc = docbase; doc < lim; doc++) {
            // System.out.println("  pass="******" process docID=" + doc);
            int val = index[doc];
            if ((val & 0xff) == 1) {
              int len = val >>> 8;
              // System.out.println("    ptr pos=" + pos);
              index[doc] = (pos << 8) | 1; // change index to point to start of array
              if ((pos & 0xff000000) != 0) {
                // we only have 24 bits for the array index
                throw new IllegalStateException(
                    "Too many values for UnInvertedField faceting on field " + field);
              }
              byte[] arr = bytes[doc];
              /*
              for(byte b : arr) {
                //System.out.println("      b=" + Integer.toHexString((int) b));
              }
              */
              bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
              if (target.length <= pos + len) {
                int newlen = target.length;
                /**
                 * * we don't have to worry about the array getting too large since the "pos" param
                 * will overflow first (only 24 bits available) if ((newlen<<1) <= 0) { //
                 * overflow... newlen = Integer.MAX_VALUE; if (newlen <= pos + len) { throw new
                 * SolrException(400,"Too many terms to uninvert field!"); } } else { while (newlen
                 * <= pos + len) newlen<<=1; // doubling strategy } **
                 */
                while (newlen <= pos + len) newlen <<= 1; // doubling strategy
                byte[] newtarget = new byte[newlen];
                System.arraycopy(target, 0, newtarget, 0, pos);
                target = newtarget;
              }
              System.arraycopy(arr, 0, target, pos, len);
              pos += len + 1; // skip single byte at end and leave it 0 for terminator
            }
          }
        }

        // shrink array
        if (pos < target.length) {
          byte[] newtarget = new byte[pos];
          System.arraycopy(target, 0, newtarget, 0, pos);
          target = newtarget;
        }

        tnums[pass] = target;

        if ((pass << 16) > maxDoc) break;
      }
    }
    indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]);

    long endTime = System.nanoTime();

    total_time = (int) TimeUnit.MILLISECONDS.convert(endTime - startTime, TimeUnit.NANOSECONDS);
    phase1_time = (int) TimeUnit.MILLISECONDS.convert(midPoint - startTime, TimeUnit.NANOSECONDS);
  }

예제 #8

0

파일 보기

파일: ChildrenQueryTests.java 프로젝트: Rjoydip/elasticsearch

  @Test
  public void testRandom() throws Exception {
    Directory directory = newDirectory();
    final Random r = random();
    final IndexWriterConfig iwc =
        LuceneTestCase.newIndexWriterConfig(r, new MockAnalyzer(r))
            .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
            .setRAMBufferSizeMB(
                scaledRandomIntBetween(16, 64)); // we might index a lot - don't go crazy here
    RandomIndexWriter indexWriter = new RandomIndexWriter(r, directory, iwc);
    int numUniqueChildValues = scaledRandomIntBetween(100, 2000);
    String[] childValues = new String[numUniqueChildValues];
    for (int i = 0; i < numUniqueChildValues; i++) {
      childValues[i] = Integer.toString(i);
    }

    IntOpenHashSet filteredOrDeletedDocs = new IntOpenHashSet();

    int childDocId = 0;
    int numParentDocs = scaledRandomIntBetween(1, numUniqueChildValues);
    ObjectObjectOpenHashMap<String, NavigableMap<String, FloatArrayList>> childValueToParentIds =
        new ObjectObjectOpenHashMap<>();
    for (int parentDocId = 0; parentDocId < numParentDocs; parentDocId++) {
      boolean markParentAsDeleted = rarely();
      boolean filterMe = rarely();
      String parent = Integer.toString(parentDocId);
      Document document = new Document();
      document.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
      if (markParentAsDeleted) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("delete", "me", Field.Store.NO));
      }
      if (filterMe) {
        filteredOrDeletedDocs.add(parentDocId);
        document.add(new StringField("filter", "me", Field.Store.NO));
      }
      indexWriter.addDocument(document);

      int numChildDocs = scaledRandomIntBetween(0, 100);
      for (int i = 0; i < numChildDocs; i++) {
        boolean markChildAsDeleted = rarely();
        String childValue = childValues[random().nextInt(childValues.length)];

        document = new Document();
        document.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId++)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        document.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        document.add(new StringField("field1", childValue, Field.Store.NO));
        if (markChildAsDeleted) {
          document.add(new StringField("delete", "me", Field.Store.NO));
        }
        indexWriter.addDocument(document);

        if (!markChildAsDeleted) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores;
          if (childValueToParentIds.containsKey(childValue)) {
            parentIdToChildScores = childValueToParentIds.lget();
          } else {
            childValueToParentIds.put(childValue, parentIdToChildScores = new TreeMap<>());
          }
          if (!markParentAsDeleted && !filterMe) {
            FloatArrayList childScores = parentIdToChildScores.get(parent);
            if (childScores == null) {
              parentIdToChildScores.put(parent, childScores = new FloatArrayList());
            }
            childScores.add(1f);
          }
        }
      }
    }

    // Delete docs that are marked to be deleted.
    indexWriter.deleteDocuments(new Term("delete", "me"));
    indexWriter.commit();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    Engine.Searcher engineSearcher =
        new Engine.Searcher(ChildrenQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) SearchContext.current())
        .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));

    int max = numUniqueChildValues / 4;
    for (int i = 0; i < max; i++) {
      // Simulate a parent update
      if (random().nextBoolean()) {
        final int numberOfUpdatableParents = numParentDocs - filteredOrDeletedDocs.size();
        int numberOfUpdates =
            RandomInts.randomIntBetween(
                random(), 0, Math.min(numberOfUpdatableParents, TEST_NIGHTLY ? 25 : 5));
        for (int j = 0; j < numberOfUpdates; j++) {
          int parentId;
          do {
            parentId = random().nextInt(numParentDocs);
          } while (filteredOrDeletedDocs.contains(parentId));

          String parentUid = Uid.createUid("parent", Integer.toString(parentId));
          indexWriter.deleteDocuments(new Term(UidFieldMapper.NAME, parentUid));

          Document document = new Document();
          document.add(new StringField(UidFieldMapper.NAME, parentUid, Field.Store.YES));
          document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
          indexWriter.addDocument(document);
        }

        indexReader.close();
        indexReader = DirectoryReader.open(indexWriter.w, true);
        searcher = new IndexSearcher(indexReader);
        engineSearcher =
            new Engine.Searcher(ChildrenConstantScoreQueryTests.class.getSimpleName(), searcher);
        ((TestSearchContext) SearchContext.current())
            .setSearcher(new ContextIndexSearcher(SearchContext.current(), engineSearcher));
      }

      String childValue = childValues[random().nextInt(numUniqueChildValues)];
      int shortCircuitParentDocSet = random().nextInt(numParentDocs);
      ScoreType scoreType = ScoreType.values()[random().nextInt(ScoreType.values().length)];
      // leave min/max set to 0 half the time
      int minChildren = random().nextInt(2) * scaledRandomIntBetween(0, 110);
      int maxChildren = random().nextInt(2) * scaledRandomIntBetween(minChildren, 110);

      QueryBuilder queryBuilder =
          hasChildQuery("child", constantScoreQuery(termQuery("field1", childValue)))
              .scoreType(scoreType.name().toLowerCase(Locale.ENGLISH))
              .minChildren(minChildren)
              .maxChildren(maxChildren)
              .setShortCircuitCutoff(shortCircuitParentDocSet);
      // Using a FQ, will invoke / test the Scorer#advance(..) and also let the Weight#scorer not
      // get live docs as acceptedDocs
      queryBuilder = filteredQuery(queryBuilder, notFilter(termFilter("filter", "me")));
      Query query = parseQuery(queryBuilder);
      BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
      int numHits = 1 + random().nextInt(25);
      TopScoreDocCollector actualTopDocsCollector = TopScoreDocCollector.create(numHits);
      searcher.search(query, MultiCollector.wrap(collector, actualTopDocsCollector));
      FixedBitSet actualResult = collector.getResult();

      FixedBitSet expectedResult = new FixedBitSet(indexReader.maxDoc());
      TopScoreDocCollector expectedTopDocsCollector = TopScoreDocCollector.create(numHits);
      if (childValueToParentIds.containsKey(childValue)) {
        LeafReader slowLeafReader = SlowCompositeReaderWrapper.wrap(indexReader);
        final FloatArrayList[] scores = new FloatArrayList[slowLeafReader.maxDoc()];
        Terms terms = slowLeafReader.terms(UidFieldMapper.NAME);
        if (terms != null) {
          NavigableMap<String, FloatArrayList> parentIdToChildScores = childValueToParentIds.lget();
          TermsEnum termsEnum = terms.iterator(null);
          DocsEnum docsEnum = null;
          for (Map.Entry<String, FloatArrayList> entry : parentIdToChildScores.entrySet()) {
            int count = entry.getValue().elementsCount;
            if (count >= minChildren && (maxChildren == 0 || count <= maxChildren)) {
              TermsEnum.SeekStatus seekStatus =
                  termsEnum.seekCeil(Uid.createUidAsBytes("parent", entry.getKey()));
              if (seekStatus == TermsEnum.SeekStatus.FOUND) {
                docsEnum =
                    termsEnum.docs(slowLeafReader.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
                expectedResult.set(docsEnum.nextDoc());
                scores[docsEnum.docID()] = new FloatArrayList(entry.getValue());
              } else if (seekStatus == TermsEnum.SeekStatus.END) {
                break;
              }
            }
          }
        }
        MockScorer mockScorer = new MockScorer(scoreType);
        final LeafCollector leafCollector =
            expectedTopDocsCollector.getLeafCollector(slowLeafReader.getContext());
        leafCollector.setScorer(mockScorer);
        for (int doc = expectedResult.nextSetBit(0);
            doc < slowLeafReader.maxDoc();
            doc =
                doc + 1 >= expectedResult.length()
                    ? DocIdSetIterator.NO_MORE_DOCS
                    : expectedResult.nextSetBit(doc + 1)) {
          mockScorer.scores = scores[doc];
          leafCollector.collect(doc);
        }
      }

      assertBitSet(actualResult, expectedResult, searcher);
      assertTopDocs(actualTopDocsCollector.topDocs(), expectedTopDocsCollector.topDocs());
    }

    indexWriter.close();
    indexReader.close();
    directory.close();
  }

예제 #9

0

파일 보기

파일: PackedArrayIndexFieldData.java 프로젝트: nirmalc/elasticsearch

  protected CommonSettings.MemoryStorageFormat chooseStorageFormat(
      LeafReader reader,
      PackedLongValues values,
      Ordinals build,
      RandomAccessOrds ordinals,
      long minValue,
      long maxValue,
      float acceptableOverheadRatio,
      int pageSize) {

    CommonSettings.MemoryStorageFormat format;

    // estimate memory usage for a single packed array
    long packedDelta = maxValue - minValue + 1; // allow for a missing value
    // valuesDelta can be negative if the difference between max and min values overflows the
    // positive side of longs.
    int bitsRequired = packedDelta < 0 ? 64 : PackedInts.bitsRequired(packedDelta);
    PackedInts.FormatAndBits formatAndBits =
        PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
    final long singleValuesSize =
        formatAndBits.format.longCount(
                PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue)
            * 8L;

    // ordinal memory usage
    final long ordinalsSize = build.ramBytesUsed() + values.ramBytesUsed();

    // estimate the memory signature of paged packing
    long pagedSingleValuesSize =
        (reader.maxDoc() / pageSize + 1) * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // array of pages
    int pageIndex = 0;
    long pageMinOrdinal = Long.MAX_VALUE;
    long pageMaxOrdinal = Long.MIN_VALUE;
    for (int i = 1; i < reader.maxDoc(); ++i, pageIndex = (pageIndex + 1) % pageSize) {
      ordinals.setDocument(i);
      if (ordinals.cardinality() > 0) {
        long ordinal = ordinals.ordAt(0);
        pageMaxOrdinal = Math.max(ordinal, pageMaxOrdinal);
        pageMinOrdinal = Math.min(ordinal, pageMinOrdinal);
      }
      if (pageIndex == pageSize - 1) {
        // end of page, we now know enough to estimate memory usage
        pagedSingleValuesSize +=
            getPageMemoryUsage(
                values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal);

        pageMinOrdinal = Long.MAX_VALUE;
        pageMaxOrdinal = Long.MIN_VALUE;
      }
    }

    if (pageIndex > 0) {
      // last page estimation
      pageIndex++;
      pagedSingleValuesSize +=
          getPageMemoryUsage(
              values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal);
    }

    if (ordinalsSize < singleValuesSize) {
      if (ordinalsSize < pagedSingleValuesSize) {
        format = CommonSettings.MemoryStorageFormat.ORDINALS;
      } else {
        format = CommonSettings.MemoryStorageFormat.PAGED;
      }
    } else {
      if (pagedSingleValuesSize < singleValuesSize) {
        format = CommonSettings.MemoryStorageFormat.PAGED;
      } else {
        format = CommonSettings.MemoryStorageFormat.PACKED;
      }
    }
    return format;
  }

예제 #10

0

파일 보기

파일: PackedArrayIndexFieldData.java 프로젝트: nirmalc/elasticsearch

  @Override
  public AtomicNumericFieldData loadDirect(LeafReaderContext context) throws Exception {
    final LeafReader reader = context.reader();
    Terms terms = reader.terms(getFieldNames().indexName());
    AtomicNumericFieldData data = null;
    PackedArrayEstimator estimator =
        new PackedArrayEstimator(
            breakerService.getBreaker(CircuitBreaker.FIELDDATA),
            getNumericType(),
            getFieldNames().fullName());
    if (terms == null) {
      data = AtomicLongFieldData.empty(reader.maxDoc());
      estimator.adjustForNoTerms(data.ramBytesUsed());
      return data;
    }
    // TODO: how can we guess the number of terms? numerics end up creating more terms per value...
    // Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer
    // order so we know the sequence of
    // longs is going to be monotonically increasing
    final PackedLongValues.Builder valuesBuilder =
        PackedLongValues.monotonicBuilder(PackedInts.COMPACT);

    final float acceptableTransientOverheadRatio =
        fieldDataType
            .getSettings()
            .getAsFloat(
                "acceptable_transient_overhead_ratio",
                OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
    TermsEnum termsEnum = estimator.beforeLoad(terms);
    assert !getNumericType().isFloatingPoint();
    boolean success = false;
    try (OrdinalsBuilder builder =
        new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio)) {
      BytesRefIterator iter = builder.buildFromTerms(termsEnum);
      BytesRef term;
      while ((term = iter.next()) != null) {
        final long value = numericType.toLong(term);
        valuesBuilder.add(value);
      }
      final PackedLongValues values = valuesBuilder.build();
      final Ordinals build = builder.build(fieldDataType.getSettings());
      CommonSettings.MemoryStorageFormat formatHint =
          CommonSettings.getMemoryStorageHint(fieldDataType);

      RandomAccessOrds ordinals = build.ordinals();
      if (FieldData.isMultiValued(ordinals)
          || formatHint == CommonSettings.MemoryStorageFormat.ORDINALS) {
        final long ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed();
        data =
            new AtomicLongFieldData(ramBytesUsed) {

              @Override
              public SortedNumericDocValues getLongValues() {
                return withOrdinals(build, values, reader.maxDoc());
              }

              @Override
              public Collection<Accountable> getChildResources() {
                List<Accountable> resources = new ArrayList<>();
                resources.add(Accountables.namedAccountable("ordinals", build));
                resources.add(Accountables.namedAccountable("values", values));
                return Collections.unmodifiableList(resources);
              }
            };
      } else {
        final BitSet docsWithValues = builder.buildDocsWithValuesSet();

        long minV, maxV;
        minV = maxV = 0;
        if (values.size() > 0) {
          minV = values.get(0);
          maxV = values.get(values.size() - 1);
        }

        final float acceptableOverheadRatio =
            fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
        final int pageSize = fieldDataType.getSettings().getAsInt("single_value_page_size", 1024);

        if (formatHint == null) {
          formatHint =
              chooseStorageFormat(
                  reader, values, build, ordinals, minV, maxV, acceptableOverheadRatio, pageSize);
        }

        logger.trace(
            "single value format for field [{}] set to [{}]",
            getFieldNames().fullName(),
            formatHint);

        switch (formatHint) {
          case PACKED:
            // Encode document without a value with a special value
            long missingV = 0;
            if (docsWithValues != null) {
              if ((maxV - minV + 1) == values.size()) {
                // values are dense
                if (minV > Long.MIN_VALUE) {
                  missingV = --minV;
                } else {
                  assert maxV != Long.MAX_VALUE;
                  missingV = ++maxV;
                }
              } else {
                for (long i = 1; i < values.size(); ++i) {
                  if (values.get(i) > values.get(i - 1) + 1) {
                    missingV = values.get(i - 1) + 1;
                    break;
                  }
                }
              }
              missingV -= minV;
            }
            final long missingValue = missingV;
            final long minValue = minV;
            final long maxValue = maxV;

            final long valuesDelta = maxValue - minValue;
            int bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(valuesDelta);
            final PackedInts.Mutable sValues =
                PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);

            if (docsWithValues != null) {
              sValues.fill(0, sValues.size(), missingV);
            }

            for (int i = 0; i < reader.maxDoc(); i++) {
              ordinals.setDocument(i);
              if (ordinals.cardinality() > 0) {
                final long ord = ordinals.ordAt(0);
                long value = values.get(ord);
                sValues.set(i, value - minValue);
              }
            }
            long ramBytesUsed =
                values.ramBytesUsed()
                    + (docsWithValues == null ? 0 : docsWithValues.ramBytesUsed());
            data =
                new AtomicLongFieldData(ramBytesUsed) {

                  @Override
                  public SortedNumericDocValues getLongValues() {
                    if (docsWithValues == null) {
                      return singles(sValues, minValue);
                    } else {
                      return sparseSingles(sValues, minValue, missingValue, reader.maxDoc());
                    }
                  }

                  @Override
                  public Collection<Accountable> getChildResources() {
                    List<Accountable> resources = new ArrayList<>();
                    resources.add(Accountables.namedAccountable("values", sValues));
                    if (docsWithValues != null) {
                      resources.add(
                          Accountables.namedAccountable("missing bitset", docsWithValues));
                    }
                    return Collections.unmodifiableList(resources);
                  }
                };
            break;
          case PAGED:
            final PackedLongValues.Builder dpValues =
                PackedLongValues.deltaPackedBuilder(pageSize, acceptableOverheadRatio);

            long lastValue = 0;
            for (int i = 0; i < reader.maxDoc(); i++) {
              ordinals.setDocument(i);
              if (ordinals.cardinality() > 0) {
                final long ord = ordinals.ordAt(i);
                lastValue = values.get(ord);
              }
              dpValues.add(lastValue);
            }
            final PackedLongValues pagedValues = dpValues.build();
            ramBytesUsed = pagedValues.ramBytesUsed();
            if (docsWithValues != null) {
              ramBytesUsed += docsWithValues.ramBytesUsed();
            }
            data =
                new AtomicLongFieldData(ramBytesUsed) {

                  @Override
                  public SortedNumericDocValues getLongValues() {
                    return pagedSingles(pagedValues, docsWithValues);
                  }

                  @Override
                  public Collection<Accountable> getChildResources() {
                    List<Accountable> resources = new ArrayList<>();
                    resources.add(Accountables.namedAccountable("values", pagedValues));
                    if (docsWithValues != null) {
                      resources.add(
                          Accountables.namedAccountable("missing bitset", docsWithValues));
                    }
                    return Collections.unmodifiableList(resources);
                  }
                };
            break;
          case ORDINALS:
            ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed();
            data =
                new AtomicLongFieldData(ramBytesUsed) {

                  @Override
                  public SortedNumericDocValues getLongValues() {
                    return withOrdinals(build, values, reader.maxDoc());
                  }

                  @Override
                  public Collection<Accountable> getChildResources() {
                    List<Accountable> resources = new ArrayList<>();
                    resources.add(Accountables.namedAccountable("ordinals", build));
                    resources.add(Accountables.namedAccountable("values", values));
                    return Collections.unmodifiableList(resources);
                  }
                };
            break;
          default:
            throw new ElasticsearchException("unknown memory format: " + formatHint);
        }
      }

      success = true;
      return data;
    } finally {
      if (!success) {
        // If something went wrong, unwind any current estimations we've made
        estimator.afterLoad(termsEnum, 0);
      } else {
        // Adjust as usual, based on the actual size of the field data
        estimator.afterLoad(termsEnum, data.ramBytesUsed());
      }
    }
  }