@Test
  public void testStream_bytesRefArray() throws Exception {
    final BytesRefArray bArr = new BytesRefArray(Counter.newCounter(false));
    bArr.append(new BytesRef("foo"));
    bArr.append(new BytesRef("bar"));
    bArr.append(new BytesRef("baz"));

    Assert.assertEquals("Not all items streamed.", 3L, StreamUtils.stream(bArr).count());

    Assert.assertEquals(
        "Term not found.",
        1L,
        StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("foo"))).count());
    Assert.assertEquals(
        "Term not found.",
        1L,
        StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("bar"))).count());
    Assert.assertEquals(
        "Term not found.",
        1L,
        StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("baz"))).count());

    Assert.assertEquals(
        "Unknown term found.",
        0L,
        StreamUtils.stream(bArr)
            .filter(
                t ->
                    !t.bytesEquals(new BytesRef("foo"))
                        && !t.bytesEquals(new BytesRef("bar"))
                        && !t.bytesEquals(new BytesRef("baz")))
            .count());
  }
  @Override
  public BytesRef writeToBytes() {
    long start = System.nanoTime();
    int size = set.size();

    BytesRef bytes = new BytesRef(new byte[HEADER_SIZE + (int) bytesUsed.get()]);

    // Encode encoding type
    Bytes.writeInt(bytes, this.getEncoding().ordinal());

    // Encode flag
    bytes.bytes[bytes.offset++] = (byte) (this.isPruned() ? 1 : 0);

    // Encode size of the set
    Bytes.writeInt(bytes, size);

    // Encode longs
    BytesRef reusable = new BytesRef();
    for (int i = 0; i < this.set.size(); i++) {
      this.set.get(i, reusable);
      Bytes.writeBytesRef(reusable, bytes);
    }

    logger.debug(
        "Serialized {} terms - took {} ms", this.size(), (System.nanoTime() - start) / 1000000);

    bytes.length = bytes.offset;
    bytes.offset = 0;
    return bytes;
  }
  @Override
  public void readFrom(StreamInput in) throws IOException {
    this.setIsPruned(in.readBoolean());
    int size = in.readInt();

    bytesUsed = Counter.newCounter();
    pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
    set = new BytesRefHash(pool);

    for (long i = 0; i < size; i++) {
      set.add(in.readBytesRef());
    }
  }
 public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
   this.fieldInfo = fieldInfo;
   this.iwBytesUsed = iwBytesUsed;
   hash =
       new BytesRefHash(
           new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
           BytesRefHash.DEFAULT_CAPACITY,
           new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
   pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
   docsWithField = new DocsWithFieldSet();
   bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
   iwBytesUsed.addAndGet(bytesUsed);
 }
  private void addOneValue(BytesRef value) {
    int termID = hash.add(value);
    if (termID < 0) {
      termID = -termID - 1;
    } else {
      // reserve additional space for each unique value:
      // 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints.
      //    TODO: can this same OOM happen in THPF?
      // 2. when flushing, we need 1 int per value (slot in the ordMap).
      iwBytesUsed.addAndGet(2 * Integer.BYTES);
    }

    pending.add(termID);
    updateBytesUsed();
  }
  private void readFromBytes(BytesRef bytes) {
    // Read pruned flag
    this.setIsPruned(bytes.bytes[bytes.offset++] == 1 ? true : false);

    // Read size fo the set
    int size = Bytes.readInt(bytes);

    // Read terms
    bytesUsed = Counter.newCounter();
    pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
    set = new BytesRefHash(pool);

    BytesRef reusable = new BytesRef();
    for (int i = 0; i < size; i++) {
      Bytes.readBytesRef(bytes, reusable);
      set.add(reusable);
    }
  }
Example #7
0
  private PossiblyLimitedTopDocs getTopDocs(Query query, Sort sort) throws IOException {
    final TopFieldCollector topCollector =
        TopFieldCollector.create(sort, maxHits, true, false, false, false);
    final Counter clock = Counter.newCounter(true);
    final int waitMillis = 1000;
    // TODO: if we interrupt the whole thread anyway, do we still need the TimeLimitingCollector?
    final TimeLimitingCollector collector =
        new TimeLimitingCollector(topCollector, clock, maxSearchTimeMillis / waitMillis);
    collector.setBaseline(0);
    final Thread counterThread =
        new Thread() {
          @Override
          public void run() {
            final long startTime = System.currentTimeMillis();
            while (true) {
              final long runTimeMillis = System.currentTimeMillis() - startTime;
              if (runTimeMillis > maxSearchTimeMillis) {
                // make sure there's no lingering thread for too long
                return;
              }
              clock.addAndGet(1);
              try {
                Thread.sleep(waitMillis);
              } catch (InterruptedException e) {
                throw new RuntimeException(e);
              }
            }
          }
        };
    counterThread.setName("LuceneSearchTimeoutThread");
    counterThread.start();

    boolean timeLimitActivated = false;
    try {
      indexSearcher.search(query, collector);
    } catch (TimeLimitingCollector.TimeExceededException e) {
      timeLimitActivated = true;
    }
    return new PossiblyLimitedTopDocs(topCollector.topDocs(), timeLimitActivated);
  }
Example #8
0
 /**
  * Expert: This constructor accepts an upper limit for the number of bytes that should be reused
  * if this instance is {@link #reset()}. The payload storage, if used, is unaffected by
  * maxReusuedBytes, however.
  *
  * @param storeOffsets <code>true</code> if offsets should be stored
  * @param storePayloads <code>true</code> if payloads should be stored
  * @param maxReusedBytes the number of bytes that should remain in the internal memory pools after
  *     {@link #reset()} is called
  */
 MemoryIndex(boolean storeOffsets, boolean storePayloads, long maxReusedBytes) {
   this.storeOffsets = storeOffsets;
   this.storePayloads = storePayloads;
   this.bytesUsed = Counter.newCounter();
   final int maxBufferedByteBlocks = (int) ((maxReusedBytes / 2) / ByteBlockPool.BYTE_BLOCK_SIZE);
   final int maxBufferedIntBlocks =
       (int)
           ((maxReusedBytes - (maxBufferedByteBlocks * ByteBlockPool.BYTE_BLOCK_SIZE))
               / (IntBlockPool.INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT));
   assert (maxBufferedByteBlocks * ByteBlockPool.BYTE_BLOCK_SIZE)
           + (maxBufferedIntBlocks * IntBlockPool.INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT)
       <= maxReusedBytes;
   byteBlockPool =
       new ByteBlockPool(
           new RecyclingByteBlockAllocator(
               ByteBlockPool.BYTE_BLOCK_SIZE, maxBufferedByteBlocks, bytesUsed));
   intBlockPool =
       new IntBlockPool(
           new RecyclingIntBlockAllocator(
               IntBlockPool.INT_BLOCK_SIZE, maxBufferedIntBlocks, bytesUsed));
   postingsWriter = new SliceWriter(intBlockPool);
   // TODO refactor BytesRefArray to allow us to apply maxReusedBytes option
   payloadsBytesRefs = storePayloads ? new BytesRefArray(bytesUsed) : null;
 }
 /** Return the memory usage of this object in bytes. */
 public long ramBytesUsed() {
   return bytesUsed.get();
 }
 public BytesRefTermsSet(final CircuitBreaker breaker) {
   super(breaker);
   this.bytesUsed = Counter.newCounter();
   this.pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
   this.set = new BytesRefHash(pool);
 }
 private void updateBytesUsed() {
   final long newBytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
   iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
   bytesUsed = newBytesUsed;
 }
Example #12
0
 @Test
 public void testStream_bytesRefArray_empty() throws Exception {
   final BytesRefArray bArr = new BytesRefArray(Counter.newCounter(false));
   Assert.assertEquals("Too much items streamed.", 0L, StreamUtils.stream(bArr).count());
 }
/**
 * This wrapper buffers incoming elements.
 *
 * @lucene.experimental
 */
public class BufferedInputIterator implements InputIterator {
  // TODO keep this for now
  /** buffered term entries */
  protected BytesRefArray entries = new BytesRefArray(Counter.newCounter());
  /** buffered payload entries */
  protected BytesRefArray payloads = new BytesRefArray(Counter.newCounter());
  /** buffered context set entries */
  protected List<Set<BytesRef>> contextSets = new ArrayList<>();
  /** current buffer position */
  protected int curPos = -1;
  /** buffered weights, parallel with {@link #entries} */
  protected long[] freqs = new long[1];

  private final BytesRefBuilder spare = new BytesRefBuilder();
  private final BytesRefBuilder payloadSpare = new BytesRefBuilder();
  private final boolean hasPayloads;
  private final boolean hasContexts;

  /** Creates a new iterator, buffering entries from the specified iterator */
  public BufferedInputIterator(InputIterator source) throws IOException {
    BytesRef spare;
    int freqIndex = 0;
    hasPayloads = source.hasPayloads();
    hasContexts = source.hasContexts();
    while ((spare = source.next()) != null) {
      entries.append(spare);
      if (hasPayloads) {
        payloads.append(source.payload());
      }
      if (hasContexts) {
        contextSets.add(source.contexts());
      }
      if (freqIndex >= freqs.length) {
        freqs = ArrayUtil.grow(freqs, freqs.length + 1);
      }
      freqs[freqIndex++] = source.weight();
    }
  }

  @Override
  public long weight() {
    return freqs[curPos];
  }

  @Override
  public BytesRef next() throws IOException {
    if (++curPos < entries.size()) {
      entries.get(spare, curPos);
      return spare.get();
    }
    return null;
  }

  @Override
  public BytesRef payload() {
    if (hasPayloads && curPos < payloads.size()) {
      return payloads.get(payloadSpare, curPos);
    }
    return null;
  }

  @Override
  public boolean hasPayloads() {
    return hasPayloads;
  }

  @Override
  public Set<BytesRef> contexts() {
    if (hasContexts && curPos < contextSets.size()) {
      return contextSets.get(curPos);
    }
    return null;
  }

  @Override
  public boolean hasContexts() {
    return hasContexts;
  }
}