Java PendingBlockの例

プログラミング言語: Java

クラス/型: PendingBlock

hotexamples.comのコード掲載数: 2

Java PendingBlock - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたJavaのPendingBlockの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

compileIndex(1)

subIndices(1)

コード例 #1

ファイルを表示

ファイル: BlockTreeTermsWriter.java プロジェクト: shaie/lucene-solr

    void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException {
      if (prefixLength == 0 || count <= maxItemsInBlock) {
        // Easy case: not floor block.  Eg, prefix is "foo",
        // and we found 30 terms/sub-blocks starting w/ that
        // prefix, and minItemsInBlock <= 30 <=
        // maxItemsInBlock.
        final PendingBlock nonFloorBlock =
            writeBlock(prevTerm, prefixLength, prefixLength, count, count, 0, false, -1, true);
        nonFloorBlock.compileIndex(null, scratchBytes);
        pending.add(nonFloorBlock);
      } else {
        // Floor block case.  Eg, prefix is "foo" but we
        // have 100 terms/sub-blocks starting w/ that
        // prefix.  We segment the entries into a primary
        // block and following floor blocks using the first
        // label in the suffix to assign to floor blocks.

        // TODO: we could store min & max suffix start byte
        // in each block, to make floor blocks authoritative

        // if (DEBUG) {
        //  final BytesRef prefix = new BytesRef(prefixLength);
        //  for(int m=0;m<prefixLength;m++) {
        //    prefix.bytes[m] = (byte) prevTerm.ints[m];
        //  }
        //  prefix.length = prefixLength;
        //  //System.out.println("\nWBS count=" + count + " prefix=" + prefix.utf8ToString() + " " +
        // prefix);
        //  System.out.println("writeBlocks: prefix=" + prefix + " " + prefix + " count=" + count +
        // " pending.size()=" + pending.size());
        // }
        // System.out.println("\nwbs count=" + count);

        final int savLabel = prevTerm.ints[prevTerm.offset + prefixLength];

        // Count up how many items fall under
        // each unique label after the prefix.

        // TODO: this is wasteful since the builder had
        // already done this (partitioned these sub-terms
        // according to their leading prefix byte)

        final List<PendingEntry> slice = pending.subList(pending.size() - count, pending.size());
        int lastSuffixLeadLabel = -1;
        int termCount = 0;
        int subCount = 0;
        int numSubs = 0;

        for (PendingEntry ent : slice) {

          // First byte in the suffix of this term
          final int suffixLeadLabel;
          if (ent.isTerm) {
            PendingTerm term = (PendingTerm) ent;
            if (term.term.length == prefixLength) {
              // Suffix is 0, ie prefix 'foo' and term is
              // 'foo' so the term has empty string suffix
              // in this block
              assert lastSuffixLeadLabel == -1;
              assert numSubs == 0;
              suffixLeadLabel = -1;
            } else {
              suffixLeadLabel = term.term.bytes[term.term.offset + prefixLength] & 0xff;
            }
          } else {
            PendingBlock block = (PendingBlock) ent;
            assert block.prefix.length > prefixLength;
            suffixLeadLabel = block.prefix.bytes[block.prefix.offset + prefixLength] & 0xff;
          }

          if (suffixLeadLabel != lastSuffixLeadLabel && (termCount + subCount) != 0) {
            if (subBytes.length == numSubs) {
              subBytes = ArrayUtil.grow(subBytes);
              subTermCounts = ArrayUtil.grow(subTermCounts);
              subSubCounts = ArrayUtil.grow(subSubCounts);
            }
            subBytes[numSubs] = lastSuffixLeadLabel;
            lastSuffixLeadLabel = suffixLeadLabel;
            subTermCounts[numSubs] = termCount;
            subSubCounts[numSubs] = subCount;
            /*
            if (suffixLeadLabel == -1) {
              System.out.println("  sub " + -1 + " termCount=" + termCount + " subCount=" + subCount);
            } else {
              System.out.println("  sub " + Integer.toHexString(suffixLeadLabel) + " termCount=" + termCount + " subCount=" + subCount);
            }
            */
            termCount = subCount = 0;
            numSubs++;
          }

          if (ent.isTerm) {
            termCount++;
          } else {
            subCount++;
          }
        }

        if (subBytes.length == numSubs) {
          subBytes = ArrayUtil.grow(subBytes);
          subTermCounts = ArrayUtil.grow(subTermCounts);
          subSubCounts = ArrayUtil.grow(subSubCounts);
        }

        subBytes[numSubs] = lastSuffixLeadLabel;
        subTermCounts[numSubs] = termCount;
        subSubCounts[numSubs] = subCount;
        numSubs++;
        /*
        if (lastSuffixLeadLabel == -1) {
          System.out.println("  sub " + -1 + " termCount=" + termCount + " subCount=" + subCount);
        } else {
          System.out.println("  sub " + Integer.toHexString(lastSuffixLeadLabel) + " termCount=" + termCount + " subCount=" + subCount);
        }
        */

        if (subTermCountSums.length < numSubs) {
          subTermCountSums = ArrayUtil.grow(subTermCountSums, numSubs);
        }

        // Roll up (backwards) the termCounts; postings impl
        // needs this to know where to pull the term slice
        // from its pending terms stack:
        int sum = 0;
        for (int idx = numSubs - 1; idx >= 0; idx--) {
          sum += subTermCounts[idx];
          subTermCountSums[idx] = sum;
        }

        // TODO: make a better segmenter?  It'd have to
        // absorb the too-small end blocks backwards into
        // the previous blocks

        // Naive greedy segmentation; this is not always
        // best (it can produce a too-small block as the
        // last block):
        int pendingCount = 0;
        int startLabel = subBytes[0];
        int curStart = count;
        subCount = 0;

        final List<PendingBlock> floorBlocks = new ArrayList<PendingBlock>();
        PendingBlock firstBlock = null;

        for (int sub = 0; sub < numSubs; sub++) {
          pendingCount += subTermCounts[sub] + subSubCounts[sub];
          // System.out.println("  " + (subTermCounts[sub] + subSubCounts[sub]));
          subCount++;

          // Greedily make a floor block as soon as we've
          // crossed the min count
          if (pendingCount >= minItemsInBlock) {
            final int curPrefixLength;
            if (startLabel == -1) {
              curPrefixLength = prefixLength;
            } else {
              curPrefixLength = 1 + prefixLength;
              // floor term:
              prevTerm.ints[prevTerm.offset + prefixLength] = startLabel;
            }
            // System.out.println("  " + subCount + " subs");
            final PendingBlock floorBlock =
                writeBlock(
                    prevTerm,
                    prefixLength,
                    curPrefixLength,
                    curStart,
                    pendingCount,
                    subTermCountSums[1 + sub],
                    true,
                    startLabel,
                    curStart == pendingCount);
            if (firstBlock == null) {
              firstBlock = floorBlock;
            } else {
              floorBlocks.add(floorBlock);
            }
            curStart -= pendingCount;
            // System.out.println("    = " + pendingCount);
            pendingCount = 0;

            assert minItemsInBlock == 1 || subCount > 1
                : "minItemsInBlock="
                    + minItemsInBlock
                    + " subCount="
                    + subCount
                    + " sub="
                    + sub
                    + " of "
                    + numSubs
                    + " subTermCount="
                    + subTermCountSums[sub]
                    + " subSubCount="
                    + subSubCounts[sub]
                    + " depth="
                    + prefixLength;
            subCount = 0;
            startLabel = subBytes[sub + 1];

            if (curStart == 0) {
              break;
            }

            if (curStart <= maxItemsInBlock) {
              // remainder is small enough to fit into a
              // block.  NOTE that this may be too small (<
              // minItemsInBlock); need a true segmenter
              // here
              assert startLabel != -1;
              assert firstBlock != null;
              prevTerm.ints[prevTerm.offset + prefixLength] = startLabel;
              // System.out.println("  final " + (numSubs-sub-1) + " subs");
              /*
              for(sub++;sub < numSubs;sub++) {
                System.out.println("  " + (subTermCounts[sub] + subSubCounts[sub]));
              }
              System.out.println("    = " + curStart);
              if (curStart < minItemsInBlock) {
                System.out.println("      **");
              }
              */
              floorBlocks.add(
                  writeBlock(
                      prevTerm,
                      prefixLength,
                      prefixLength + 1,
                      curStart,
                      curStart,
                      0,
                      true,
                      startLabel,
                      true));
              break;
            }
          }
        }

        prevTerm.ints[prevTerm.offset + prefixLength] = savLabel;

        assert firstBlock != null;
        firstBlock.compileIndex(floorBlocks, scratchBytes);

        pending.add(firstBlock);
        // if (DEBUG) System.out.println("  done pending.size()=" + pending.size());
      }
      lastBlockIndex = pending.size() - 1;
    }

コード例 #2

ファイルを表示

ファイル: BlockTreeTermsWriter.java プロジェクト: shaie/lucene-solr

    public void compileIndex(List<PendingBlock> floorBlocks, RAMOutputStream scratchBytes)
        throws IOException {

      assert (isFloor && floorBlocks != null && floorBlocks.size() != 0)
              || (!isFloor && floorBlocks == null)
          : "isFloor=" + isFloor + " floorBlocks=" + floorBlocks;

      assert scratchBytes.getFilePointer() == 0;

      // TODO: try writing the leading vLong in MSB order
      // (opposite of what Lucene does today), for better
      // outputs sharing in the FST
      scratchBytes.writeVLong(encodeOutput(fp, hasTerms, isFloor));
      if (isFloor) {
        scratchBytes.writeVInt(floorBlocks.size());
        for (PendingBlock sub : floorBlocks) {
          assert sub.floorLeadByte != -1;
          // if (DEBUG) {
          //  System.out.println("    write floorLeadByte=" +
          // Integer.toHexString(sub.floorLeadByte&0xff));
          // }
          scratchBytes.writeByte((byte) sub.floorLeadByte);
          assert sub.fp > fp;
          scratchBytes.writeVLong((sub.fp - fp) << 1 | (sub.hasTerms ? 1 : 0));
        }
      }

      final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
      final Builder<BytesRef> indexBuilder =
          new Builder<BytesRef>(
              FST.INPUT_TYPE.BYTE1,
              0,
              0,
              true,
              false,
              Integer.MAX_VALUE,
              outputs,
              null,
              false,
              PackedInts.COMPACT,
              true,
              15);
      // if (DEBUG) {
      //  System.out.println("  compile index for prefix=" + prefix);
      // }
      // indexBuilder.DEBUG = false;
      final byte[] bytes = new byte[(int) scratchBytes.getFilePointer()];
      assert bytes.length > 0;
      scratchBytes.writeTo(bytes, 0);
      indexBuilder.add(
          Util.toIntsRef(prefix, scratchIntsRef), new BytesRef(bytes, 0, bytes.length));
      scratchBytes.reset();

      // Copy over index for all sub-blocks

      if (subIndices != null) {
        for (FST<BytesRef> subIndex : subIndices) {
          append(indexBuilder, subIndex);
        }
      }

      if (floorBlocks != null) {
        for (PendingBlock sub : floorBlocks) {
          if (sub.subIndices != null) {
            for (FST<BytesRef> subIndex : sub.subIndices) {
              append(indexBuilder, subIndex);
            }
          }
          sub.subIndices = null;
        }
      }

      index = indexBuilder.finish();
      subIndices = null;

      /*
      Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
      Util.toDot(index, w, false, false);
      System.out.println("SAVED to out.dot");
      w.close();
      */
    }