@Test public void testStream_bytesRefArray() throws Exception { final BytesRefArray bArr = new BytesRefArray(Counter.newCounter(false)); bArr.append(new BytesRef("foo")); bArr.append(new BytesRef("bar")); bArr.append(new BytesRef("baz")); Assert.assertEquals("Not all items streamed.", 3L, StreamUtils.stream(bArr).count()); Assert.assertEquals( "Term not found.", 1L, StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("foo"))).count()); Assert.assertEquals( "Term not found.", 1L, StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("bar"))).count()); Assert.assertEquals( "Term not found.", 1L, StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("baz"))).count()); Assert.assertEquals( "Unknown term found.", 0L, StreamUtils.stream(bArr) .filter( t -> !t.bytesEquals(new BytesRef("foo")) && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz"))) .count()); }
@Override public BytesRef writeToBytes() { long start = System.nanoTime(); int size = set.size(); BytesRef bytes = new BytesRef(new byte[HEADER_SIZE + (int) bytesUsed.get()]); // Encode encoding type Bytes.writeInt(bytes, this.getEncoding().ordinal()); // Encode flag bytes.bytes[bytes.offset++] = (byte) (this.isPruned() ? 1 : 0); // Encode size of the set Bytes.writeInt(bytes, size); // Encode longs BytesRef reusable = new BytesRef(); for (int i = 0; i < this.set.size(); i++) { this.set.get(i, reusable); Bytes.writeBytesRef(reusable, bytes); } logger.debug( "Serialized {} terms - took {} ms", this.size(), (System.nanoTime() - start) / 1000000); bytes.length = bytes.offset; bytes.offset = 0; return bytes; }
@Override public void readFrom(StreamInput in) throws IOException { this.setIsPruned(in.readBoolean()); int size = in.readInt(); bytesUsed = Counter.newCounter(); pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed)); set = new BytesRefHash(pool); for (long i = 0; i < size; i++) { set.add(in.readBytesRef()); } }
public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { this.fieldInfo = fieldInfo; this.iwBytesUsed = iwBytesUsed; hash = new BytesRefHash( new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed)); pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); docsWithField = new DocsWithFieldSet(); bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed(); iwBytesUsed.addAndGet(bytesUsed); }
private void addOneValue(BytesRef value) { int termID = hash.add(value); if (termID < 0) { termID = -termID - 1; } else { // reserve additional space for each unique value: // 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints. // TODO: can this same OOM happen in THPF? // 2. when flushing, we need 1 int per value (slot in the ordMap). iwBytesUsed.addAndGet(2 * Integer.BYTES); } pending.add(termID); updateBytesUsed(); }
private void readFromBytes(BytesRef bytes) { // Read pruned flag this.setIsPruned(bytes.bytes[bytes.offset++] == 1 ? true : false); // Read size fo the set int size = Bytes.readInt(bytes); // Read terms bytesUsed = Counter.newCounter(); pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed)); set = new BytesRefHash(pool); BytesRef reusable = new BytesRef(); for (int i = 0; i < size; i++) { Bytes.readBytesRef(bytes, reusable); set.add(reusable); } }
private PossiblyLimitedTopDocs getTopDocs(Query query, Sort sort) throws IOException { final TopFieldCollector topCollector = TopFieldCollector.create(sort, maxHits, true, false, false, false); final Counter clock = Counter.newCounter(true); final int waitMillis = 1000; // TODO: if we interrupt the whole thread anyway, do we still need the TimeLimitingCollector? final TimeLimitingCollector collector = new TimeLimitingCollector(topCollector, clock, maxSearchTimeMillis / waitMillis); collector.setBaseline(0); final Thread counterThread = new Thread() { @Override public void run() { final long startTime = System.currentTimeMillis(); while (true) { final long runTimeMillis = System.currentTimeMillis() - startTime; if (runTimeMillis > maxSearchTimeMillis) { // make sure there's no lingering thread for too long return; } clock.addAndGet(1); try { Thread.sleep(waitMillis); } catch (InterruptedException e) { throw new RuntimeException(e); } } } }; counterThread.setName("LuceneSearchTimeoutThread"); counterThread.start(); boolean timeLimitActivated = false; try { indexSearcher.search(query, collector); } catch (TimeLimitingCollector.TimeExceededException e) { timeLimitActivated = true; } return new PossiblyLimitedTopDocs(topCollector.topDocs(), timeLimitActivated); }
/** * Expert: This constructor accepts an upper limit for the number of bytes that should be reused * if this instance is {@link #reset()}. The payload storage, if used, is unaffected by * maxReusuedBytes, however. * * @param storeOffsets <code>true</code> if offsets should be stored * @param storePayloads <code>true</code> if payloads should be stored * @param maxReusedBytes the number of bytes that should remain in the internal memory pools after * {@link #reset()} is called */ MemoryIndex(boolean storeOffsets, boolean storePayloads, long maxReusedBytes) { this.storeOffsets = storeOffsets; this.storePayloads = storePayloads; this.bytesUsed = Counter.newCounter(); final int maxBufferedByteBlocks = (int) ((maxReusedBytes / 2) / ByteBlockPool.BYTE_BLOCK_SIZE); final int maxBufferedIntBlocks = (int) ((maxReusedBytes - (maxBufferedByteBlocks * ByteBlockPool.BYTE_BLOCK_SIZE)) / (IntBlockPool.INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT)); assert (maxBufferedByteBlocks * ByteBlockPool.BYTE_BLOCK_SIZE) + (maxBufferedIntBlocks * IntBlockPool.INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT) <= maxReusedBytes; byteBlockPool = new ByteBlockPool( new RecyclingByteBlockAllocator( ByteBlockPool.BYTE_BLOCK_SIZE, maxBufferedByteBlocks, bytesUsed)); intBlockPool = new IntBlockPool( new RecyclingIntBlockAllocator( IntBlockPool.INT_BLOCK_SIZE, maxBufferedIntBlocks, bytesUsed)); postingsWriter = new SliceWriter(intBlockPool); // TODO refactor BytesRefArray to allow us to apply maxReusedBytes option payloadsBytesRefs = storePayloads ? new BytesRefArray(bytesUsed) : null; }
/** Return the memory usage of this object in bytes. */ public long ramBytesUsed() { return bytesUsed.get(); }
public BytesRefTermsSet(final CircuitBreaker breaker) { super(breaker); this.bytesUsed = Counter.newCounter(); this.pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed)); this.set = new BytesRefHash(pool); }
private void updateBytesUsed() { final long newBytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed(); iwBytesUsed.addAndGet(newBytesUsed - bytesUsed); bytesUsed = newBytesUsed; }
@Test public void testStream_bytesRefArray_empty() throws Exception { final BytesRefArray bArr = new BytesRefArray(Counter.newCounter(false)); Assert.assertEquals("Too much items streamed.", 0L, StreamUtils.stream(bArr).count()); }
/** * This wrapper buffers incoming elements. * * @lucene.experimental */ public class BufferedInputIterator implements InputIterator { // TODO keep this for now /** buffered term entries */ protected BytesRefArray entries = new BytesRefArray(Counter.newCounter()); /** buffered payload entries */ protected BytesRefArray payloads = new BytesRefArray(Counter.newCounter()); /** buffered context set entries */ protected List<Set<BytesRef>> contextSets = new ArrayList<>(); /** current buffer position */ protected int curPos = -1; /** buffered weights, parallel with {@link #entries} */ protected long[] freqs = new long[1]; private final BytesRefBuilder spare = new BytesRefBuilder(); private final BytesRefBuilder payloadSpare = new BytesRefBuilder(); private final boolean hasPayloads; private final boolean hasContexts; /** Creates a new iterator, buffering entries from the specified iterator */ public BufferedInputIterator(InputIterator source) throws IOException { BytesRef spare; int freqIndex = 0; hasPayloads = source.hasPayloads(); hasContexts = source.hasContexts(); while ((spare = source.next()) != null) { entries.append(spare); if (hasPayloads) { payloads.append(source.payload()); } if (hasContexts) { contextSets.add(source.contexts()); } if (freqIndex >= freqs.length) { freqs = ArrayUtil.grow(freqs, freqs.length + 1); } freqs[freqIndex++] = source.weight(); } } @Override public long weight() { return freqs[curPos]; } @Override public BytesRef next() throws IOException { if (++curPos < entries.size()) { entries.get(spare, curPos); return spare.get(); } return null; } @Override public BytesRef payload() { if (hasPayloads && curPos < payloads.size()) { return payloads.get(payloadSpare, curPos); } return null; } @Override public boolean hasPayloads() { return hasPayloads; } @Override public Set<BytesRef> contexts() { if (hasContexts && curPos < contextSets.size()) { return contextSets.get(curPos); } return null; } @Override public boolean hasContexts() { return hasContexts; } }