@Override
 public long ramBytesUsed() {
   return BASE_RAM_BYTES_USED
       + RamUsageEstimator.sizeOf(offsets)
       + RamUsageEstimator.sizeOf(scratch.bytes())
       + RamUsageEstimator.sizeOf(scratchUTF16.chars());
 }
 /**
  * Create a {@link SparseFixedBitSet} that can contain bits between <code>0</code> included and
  * <code>length</code> excluded.
  */
 public SparseFixedBitSet(int length) {
   if (length < 1) {
     throw new IllegalArgumentException("length needs to be >= 1");
   }
   this.length = length;
   final int blockCount = blockCount(length);
   indices = new long[blockCount];
   bits = new long[blockCount][];
   ramBytesUsed =
       BASE_RAM_BYTES_USED
           + RamUsageEstimator.shallowSizeOf(indices)
           + RamUsageEstimator.shallowSizeOf(bits);
 }
 @Override
 public long getMemorySizeInBytes() {
   if (size == -1) {
     size =
         RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
             + values.sizeInBytes()
             + RamUsageEstimator.sizeOf(set.getBits());
   }
   return size;
 }
Esempio n. 4
0
 private RoaringDocIdSet(DocIdSet[] docIdSets, int cardinality) {
   this.docIdSets = docIdSets;
   long ramBytesUsed = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(docIdSets);
   for (DocIdSet set : this.docIdSets) {
     if (set != null) {
       ramBytesUsed += set.ramBytesUsed();
     }
   }
   this.ramBytesUsed = ramBytesUsed;
   this.cardinality = cardinality;
 }
Esempio n. 5
0
  @Override
  public V put(K key, V value) {
    synchronized (map) {
      if (getState() == State.LIVE) {
        stats.inserts.increment();
      }

      // increment local inserts regardless of state???
      // it does make it more consistent with the current size...
      inserts++;

      // important to calc and add new ram bytes first so that removeEldestEntry can compare
      // correctly
      long keySize = DEFAULT_RAM_BYTES_USED;
      if (maxRamBytes != Long.MAX_VALUE) {
        if (key != null && key instanceof Accountable) {
          keySize = ((Accountable) key).ramBytesUsed();
        }
        long valueSize = 0;
        if (value != null) {
          if (value instanceof Accountable) {
            Accountable accountable = (Accountable) value;
            valueSize = accountable.ramBytesUsed();
          } else {
            throw new SolrException(
                SolrException.ErrorCode.SERVER_ERROR,
                "Cache: "
                    + getName()
                    + " is configured with maxRamBytes="
                    + RamUsageEstimator.humanReadableUnits(maxRamBytes)
                    + " but its values do not implement org.apache.lucene.util.Accountable");
          }
        }
        ramBytesUsed += keySize + valueSize + LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY;
      }
      V old = map.put(key, value);
      if (maxRamBytes != Long.MAX_VALUE && old != null) {
        long bytesToDecrement = ((Accountable) old).ramBytesUsed();
        // the key existed in the map but we added its size before the put, so let's back out
        bytesToDecrement += LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY;
        if (key != null) {
          if (key instanceof Accountable) {
            Accountable aKey = (Accountable) key;
            bytesToDecrement += aKey.ramBytesUsed();
          } else {
            bytesToDecrement += DEFAULT_RAM_BYTES_USED;
          }
        }
        ramBytesUsed -= bytesToDecrement;
      }
      return old;
    }
  }
Esempio n. 6
0
 private void _benchmark(Lookup lookup, Map<String, Integer> ref, boolean estimate, Bench bench)
     throws Exception {
   long start = System.currentTimeMillis();
   lookup.build(getTFIT());
   long buildTime = System.currentTimeMillis() - start;
   TermFreqIterator tfit = getTFIT();
   long elapsed = 0;
   while (tfit.hasNext()) {
     String key = tfit.next();
     // take only the first part of the key
     int len = key.length() > 4 ? key.length() / 3 : 2;
     String prefix = key.substring(0, len);
     start = System.nanoTime();
     List<LookupResult> res = lookup.lookup(prefix, true, 10);
     elapsed += System.nanoTime() - start;
     assertTrue(res.size() > 0);
     for (LookupResult lr : res) {
       assertTrue(lr.key.startsWith(prefix));
     }
     if (ref != null) { // verify the counts
       Integer Cnt = ref.get(key);
       if (Cnt == null) { // first pass
         ref.put(key, res.size());
       } else {
         assertEquals(key + ", prefix: " + prefix, Cnt.intValue(), res.size());
       }
     }
   }
   if (estimate) {
     RamUsageEstimator rue = new RamUsageEstimator();
     long size = rue.estimateRamUsage(lookup);
     System.err.println(lookup.getClass().getSimpleName() + " - size=" + size);
   }
   if (bench != null) {
     bench.buildTime += buildTime;
     bench.lookupTime += elapsed;
   }
 }
 private void insertLong(int i4096, int i64, int i, long index) {
   indices[i4096] |= 1L << i64; // shifts are mod 64 in java
   // we count the number of bits that are set on the right of i64
   // this gives us the index at which to perform the insertion
   final int o = Long.bitCount(index & ((1L << i64) - 1));
   final long[] bitArray = bits[i4096];
   if (bitArray[bitArray.length - 1] == 0) {
     // since we only store non-zero longs, if the last value is 0, it means
     // that we alreay have extra space, make use of it
     System.arraycopy(bitArray, o, bitArray, o + 1, bitArray.length - o - 1);
     bitArray[o] = 1L << i;
   } else {
     // we don't have extra space so we need to resize to insert the new long
     final int newSize = oversize(bitArray.length + 1);
     final long[] newBitArray = new long[newSize];
     System.arraycopy(bitArray, 0, newBitArray, 0, o);
     newBitArray[o] = 1L << i;
     System.arraycopy(bitArray, o, newBitArray, o + 1, bitArray.length - o);
     bits[i4096] = newBitArray;
     ramBytesUsed += RamUsageEstimator.sizeOf(newBitArray) - RamUsageEstimator.sizeOf(bitArray);
   }
   ++nonZeroLongCount;
 }
  private long getPageMemoryUsage(
      PackedLongValues values,
      float acceptableOverheadRatio,
      int pageSize,
      long pageMinOrdinal,
      long pageMaxOrdinal) {
    int bitsRequired;
    long pageMemorySize = 0;
    PackedInts.FormatAndBits formatAndBits;
    if (pageMaxOrdinal == Long.MIN_VALUE) {
      // empty page - will use the null reader which just stores size
      pageMemorySize +=
          RamUsageEstimator.alignObjectSize(
              RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);

    } else {
      long pageMinValue = values.get(pageMinOrdinal);
      long pageMaxValue = values.get(pageMaxOrdinal);
      long pageDelta = pageMaxValue - pageMinValue;
      if (pageDelta != 0) {
        bitsRequired = pageDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta);
        formatAndBits =
            PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio);
        pageMemorySize +=
            formatAndBits.format.longCount(
                    PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue)
                * RamUsageEstimator.NUM_BYTES_LONG;
        pageMemorySize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage
      } else {
        // empty page
        pageMemorySize +=
            RamUsageEstimator.alignObjectSize(
                RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
      }
    }
    return pageMemorySize;
  }
  long computeSizeInBytes() {
    long sizeInBytes = 0;
    // Ignore type field
    //  sizeInBytes += ((type.length() * RamUsage.NUM_BYTES_CHAR) + (3 * RamUsage.NUM_BYTES_INT)) +
    // RamUsage.NUM_BYTES_OBJECT_HEADER;
    sizeInBytes +=
        RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
            + (idToDoc.values.length * RamUsageEstimator.NUM_BYTES_INT);
    sizeInBytes += RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (idToDoc.allocated.length);
    final boolean[] states = idToDoc.allocated;
    final Object[] keys = idToDoc.keys;
    for (int i = 0; i < states.length; i++) {
      if (states[i]) {
        HashedBytesArray bytesArray = (HashedBytesArray) keys[i];
        if (bytesArray != null) {
          sizeInBytes +=
              RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
                  + (bytesArray.length() + RamUsageEstimator.NUM_BYTES_INT);
        } else {
          sizeInBytes += RamUsageEstimator.NUM_BYTES_OBJECT_REF;
        }
      }
    }

    // The docIdToId array contains references to idToDoc for this segment or other segments, so we
    // can use OBJECT_REF
    sizeInBytes +=
        RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
            + (RamUsageEstimator.NUM_BYTES_OBJECT_REF * docIdToId.length);
    for (HashedBytesArray bytesArray : parentIdsValues) {
      if (bytesArray == null) {
        sizeInBytes += RamUsageEstimator.NUM_BYTES_OBJECT_REF;
      } else {
        sizeInBytes +=
            RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
                + (bytesArray.length() + RamUsageEstimator.NUM_BYTES_INT);
      }
    }
    sizeInBytes += RamUsageEstimator.sizeOf(parentIdsOrdinals);

    return sizeInBytes;
  }
  private static StringBuilder toString(StringBuilder dest, Accountable a, int depth) {
    for (int i = 1; i < depth; i++) {
      dest.append("    ");
    }

    if (depth > 0) {
      dest.append("|-- ");
    }

    dest.append(a.toString());
    dest.append(": ");
    dest.append(RamUsageEstimator.humanReadableUnits(a.ramBytesUsed()));
    dest.append(System.lineSeparator());

    for (Accountable child : a.getChildResources()) {
      toString(dest, child, depth + 1);
    }

    return dest;
  }
 @Override
 public long sizeInBytes() {
   long mem = RamUsageEstimator.shallowSizeOf(this);
   try {
     if (searcherMgr != null) {
       IndexSearcher searcher = searcherMgr.acquire();
       try {
         for (AtomicReaderContext context : searcher.getIndexReader().leaves()) {
           AtomicReader reader = FilterAtomicReader.unwrap(context.reader());
           if (reader instanceof SegmentReader) {
             mem += ((SegmentReader) context.reader()).ramBytesUsed();
           }
         }
       } finally {
         searcherMgr.release(searcher);
       }
     }
     return mem;
   } catch (IOException ioe) {
     throw new RuntimeException(ioe);
   }
 }
  public void testReferenceSize() {
    if (!isSupportedJVM()) {
      System.err.println("WARN: Your JVM does not support certain Oracle/Sun extensions.");
      System.err.println(" Memory estimates may be inaccurate.");
      System.err.println(" Please report this to the Lucene mailing list.");
      System.err.println("JVM version: " + RamUsageEstimator.JVM_INFO_STRING);
      System.err.println("UnsupportedFeatures:");
      for (JvmFeature f : RamUsageEstimator.getUnsupportedFeatures()) {
        System.err.print(" - " + f.toString());
        if (f == RamUsageEstimator.JvmFeature.OBJECT_ALIGNMENT) {
          System.err.print(
              "; Please note: 32bit Oracle/Sun VMs don't allow exact OBJECT_ALIGNMENT retrieval, this is a known issue.");
        }
        System.err.println();
      }
    }

    assertTrue(NUM_BYTES_OBJECT_REF == 4 || NUM_BYTES_OBJECT_REF == 8);
    if (!Constants.JRE_IS_64BIT) {
      assertEquals("For 32bit JVMs, reference size must always be 4?", 4, NUM_BYTES_OBJECT_REF);
    }
  }
  private static class OrdinalsCollector implements Collector {

    private static final long SHALLOW_FIXEDBITSET_SIZE =
        RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class);

    /** Return an approximate memory overhead per bucket for this collector. */
    public static long memoryOverhead(long maxOrd) {
      return RamUsageEstimator.NUM_BYTES_OBJECT_REF
          + SHALLOW_FIXEDBITSET_SIZE
          + (maxOrd + 7) / 8; // 1 bit per ord
    }

    private final BigArrays bigArrays;
    private final RandomAccessOrds values;
    private final int maxOrd;
    private final HyperLogLogPlusPlus counts;
    private ObjectArray<FixedBitSet> visitedOrds;

    OrdinalsCollector(HyperLogLogPlusPlus counts, RandomAccessOrds values, BigArrays bigArrays) {
      Preconditions.checkArgument(values.getValueCount() <= Integer.MAX_VALUE);
      maxOrd = (int) values.getValueCount();
      this.bigArrays = bigArrays;
      this.counts = counts;
      this.values = values;
      visitedOrds = bigArrays.newObjectArray(1);
    }

    @Override
    public void collect(int doc, long bucketOrd) {
      visitedOrds = bigArrays.grow(visitedOrds, bucketOrd + 1);
      FixedBitSet bits = visitedOrds.get(bucketOrd);
      if (bits == null) {
        bits = new FixedBitSet(maxOrd);
        visitedOrds.set(bucketOrd, bits);
      }
      values.setDocument(doc);
      final int valueCount = values.cardinality();
      for (int i = 0; i < valueCount; ++i) {
        bits.set((int) values.ordAt(i));
      }
    }

    @Override
    public void postCollect() {
      final FixedBitSet allVisitedOrds = new FixedBitSet(maxOrd);
      for (long bucket = visitedOrds.size() - 1; bucket >= 0; --bucket) {
        final FixedBitSet bits = visitedOrds.get(bucket);
        if (bits != null) {
          allVisitedOrds.or(bits);
        }
      }

      final org.elasticsearch.common.hash.MurmurHash3.Hash128 hash =
          new org.elasticsearch.common.hash.MurmurHash3.Hash128();
      try (LongArray hashes = bigArrays.newLongArray(maxOrd, false)) {
        for (int ord = allVisitedOrds.nextSetBit(0);
            ord != -1;
            ord = ord + 1 < maxOrd ? allVisitedOrds.nextSetBit(ord + 1) : -1) {
          final BytesRef value = values.lookupOrd(ord);
          org.elasticsearch.common.hash.MurmurHash3.hash128(
              value.bytes, value.offset, value.length, 0, hash);
          hashes.set(ord, hash.h1);
        }

        for (long bucket = visitedOrds.size() - 1; bucket >= 0; --bucket) {
          final FixedBitSet bits = visitedOrds.get(bucket);
          if (bits != null) {
            for (int ord = bits.nextSetBit(0);
                ord != -1;
                ord = ord + 1 < maxOrd ? bits.nextSetBit(ord + 1) : -1) {
              counts.collect(bucket, hashes.get(ord));
            }
          }
        }
      }
    }

    @Override
    public void close() throws ElasticsearchException {
      Releasables.close(visitedOrds);
    }
  }
Esempio n. 14
0
/**
 * Handles a terms dict, but decouples all details of doc/freqs/positions reading to an instance of
 * {@link PostingsReaderBase}. This class is reusable for codecs that use a different format for
 * docs/freqs/positions (though codecs are also free to make their own terms dict impl).
 *
 * <p>This class also interacts with an instance of {@link TermsIndexReaderBase}, to abstract away
 * the specific implementation of the terms dict index.
 *
 * @lucene.experimental
 */
public class BlockTermsReader extends FieldsProducer {
  private static final long BASE_RAM_BYTES_USED =
      RamUsageEstimator.shallowSizeOfInstance(BlockTermsReader.class);
  // Open input to the main terms dict file (_X.tis)
  private final IndexInput in;

  // Reads the terms dict entries, to gather state to
  // produce DocsEnum on demand
  private final PostingsReaderBase postingsReader;

  private final TreeMap<String, FieldReader> fields = new TreeMap<>();

  // Reads the terms index
  private TermsIndexReaderBase indexReader;

  // Used as key for the terms cache
  private static class FieldAndTerm implements Cloneable {
    String field;
    BytesRef term;

    public FieldAndTerm() {}

    public FieldAndTerm(FieldAndTerm other) {
      field = other.field;
      term = BytesRef.deepCopyOf(other.term);
    }

    @Override
    public boolean equals(Object _other) {
      FieldAndTerm other = (FieldAndTerm) _other;
      return other.field.equals(field) && term.bytesEquals(other.term);
    }

    @Override
    public FieldAndTerm clone() {
      return new FieldAndTerm(this);
    }

    @Override
    public int hashCode() {
      return field.hashCode() * 31 + term.hashCode();
    }
  }

  public BlockTermsReader(
      TermsIndexReaderBase indexReader, PostingsReaderBase postingsReader, SegmentReadState state)
      throws IOException {

    this.postingsReader = postingsReader;

    String filename =
        IndexFileNames.segmentFileName(
            state.segmentInfo.name, state.segmentSuffix, BlockTermsWriter.TERMS_EXTENSION);
    in = state.directory.openInput(filename, state.context);

    boolean success = false;
    try {
      CodecUtil.checkIndexHeader(
          in,
          BlockTermsWriter.CODEC_NAME,
          BlockTermsWriter.VERSION_START,
          BlockTermsWriter.VERSION_CURRENT,
          state.segmentInfo.getId(),
          state.segmentSuffix);

      // Have PostingsReader init itself
      postingsReader.init(in, state);

      // NOTE: data file is too costly to verify checksum against all the bytes on open,
      // but for now we at least verify proper structure of the checksum footer: which looks
      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
      // such as file truncation.
      CodecUtil.retrieveChecksum(in);

      // Read per-field details
      seekDir(in);

      final int numFields = in.readVInt();
      if (numFields < 0) {
        throw new CorruptIndexException("invalid number of fields: " + numFields, in);
      }
      for (int i = 0; i < numFields; i++) {
        final int field = in.readVInt();
        final long numTerms = in.readVLong();
        assert numTerms >= 0;
        final long termsStartPointer = in.readVLong();
        final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
        final long sumTotalTermFreq =
            fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong();
        final long sumDocFreq = in.readVLong();
        final int docCount = in.readVInt();
        final int longsSize = in.readVInt();
        if (docCount < 0
            || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
          throw new CorruptIndexException(
              "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), in);
        }
        if (sumDocFreq < docCount) { // #postings must be >= #docs with field
          throw new CorruptIndexException(
              "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
        }
        if (sumTotalTermFreq != -1
            && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
          throw new CorruptIndexException(
              "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
        }
        FieldReader previous =
            fields.put(
                fieldInfo.name,
                new FieldReader(
                    fieldInfo,
                    numTerms,
                    termsStartPointer,
                    sumTotalTermFreq,
                    sumDocFreq,
                    docCount,
                    longsSize));
        if (previous != null) {
          throw new CorruptIndexException("duplicate fields: " + fieldInfo.name, in);
        }
      }
      success = true;
    } finally {
      if (!success) {
        in.close();
      }
    }

    this.indexReader = indexReader;
  }

  private void seekDir(IndexInput input) throws IOException {
    input.seek(input.length() - CodecUtil.footerLength() - 8);
    long dirOffset = input.readLong();
    input.seek(dirOffset);
  }

  @Override
  public void close() throws IOException {
    try {
      try {
        if (indexReader != null) {
          indexReader.close();
        }
      } finally {
        // null so if an app hangs on to us (ie, we are not
        // GCable, despite being closed) we still free most
        // ram
        indexReader = null;
        if (in != null) {
          in.close();
        }
      }
    } finally {
      if (postingsReader != null) {
        postingsReader.close();
      }
    }
  }

  @Override
  public Iterator<String> iterator() {
    return Collections.unmodifiableSet(fields.keySet()).iterator();
  }

  @Override
  public Terms terms(String field) throws IOException {
    assert field != null;
    return fields.get(field);
  }

  @Override
  public int size() {
    return fields.size();
  }

  private static final long FIELD_READER_RAM_BYTES_USED =
      RamUsageEstimator.shallowSizeOfInstance(FieldReader.class);

  private class FieldReader extends Terms implements Accountable {
    final long numTerms;
    final FieldInfo fieldInfo;
    final long termsStartPointer;
    final long sumTotalTermFreq;
    final long sumDocFreq;
    final int docCount;
    final int longsSize;

    FieldReader(
        FieldInfo fieldInfo,
        long numTerms,
        long termsStartPointer,
        long sumTotalTermFreq,
        long sumDocFreq,
        int docCount,
        int longsSize) {
      assert numTerms > 0;
      this.fieldInfo = fieldInfo;
      this.numTerms = numTerms;
      this.termsStartPointer = termsStartPointer;
      this.sumTotalTermFreq = sumTotalTermFreq;
      this.sumDocFreq = sumDocFreq;
      this.docCount = docCount;
      this.longsSize = longsSize;
    }

    @Override
    public long ramBytesUsed() {
      return FIELD_READER_RAM_BYTES_USED;
    }

    @Override
    public Collection<Accountable> getChildResources() {
      return Collections.emptyList();
    }

    @Override
    public TermsEnum iterator() throws IOException {
      return new SegmentTermsEnum();
    }

    @Override
    public boolean hasFreqs() {
      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
    }

    @Override
    public boolean hasOffsets() {
      return fieldInfo
              .getIndexOptions()
              .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
          >= 0;
    }

    @Override
    public boolean hasPositions() {
      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
    }

    @Override
    public boolean hasPayloads() {
      return fieldInfo.hasPayloads();
    }

    @Override
    public long size() {
      return numTerms;
    }

    @Override
    public long getSumTotalTermFreq() {
      return sumTotalTermFreq;
    }

    @Override
    public long getSumDocFreq() throws IOException {
      return sumDocFreq;
    }

    @Override
    public int getDocCount() throws IOException {
      return docCount;
    }

    // Iterates through terms in this field
    private final class SegmentTermsEnum extends TermsEnum {
      private final IndexInput in;
      private final BlockTermState state;
      private final boolean doOrd;
      private final FieldAndTerm fieldTerm = new FieldAndTerm();
      private final TermsIndexReaderBase.FieldIndexEnum indexEnum;
      private final BytesRefBuilder term = new BytesRefBuilder();

      /* This is true if indexEnum is "still" seek'd to the index term
      for the current term. We set it to true on seeking, and then it
      remains valid until next() is called enough times to load another
      terms block: */
      private boolean indexIsCurrent;

      /* True if we've already called .next() on the indexEnum, to "bracket"
      the current block of terms: */
      private boolean didIndexNext;

      /* Next index term, bracketing the current block of terms; this is
      only valid if didIndexNext is true: */
      private BytesRef nextIndexTerm;

      /* True after seekExact(TermState), do defer seeking.  If the app then
      calls next() (which is not "typical"), then we'll do the real seek */
      private boolean seekPending;

      private byte[] termSuffixes;
      private ByteArrayDataInput termSuffixesReader = new ByteArrayDataInput();

      /* Common prefix used for all terms in this block. */
      private int termBlockPrefix;

      /* How many terms in current block */
      private int blockTermCount;

      private byte[] docFreqBytes;
      private final ByteArrayDataInput freqReader = new ByteArrayDataInput();
      private int metaDataUpto;

      private long[] longs;
      private byte[] bytes;
      private ByteArrayDataInput bytesReader;

      public SegmentTermsEnum() throws IOException {
        in = BlockTermsReader.this.in.clone();
        in.seek(termsStartPointer);
        indexEnum = indexReader.getFieldEnum(fieldInfo);
        doOrd = indexReader.supportsOrd();
        fieldTerm.field = fieldInfo.name;
        state = postingsReader.newTermState();
        state.totalTermFreq = -1;
        state.ord = -1;

        termSuffixes = new byte[128];
        docFreqBytes = new byte[64];
        // System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader);
        longs = new long[longsSize];
      }

      // TODO: we may want an alternate mode here which is
      // "if you are about to return NOT_FOUND I won't use
      // the terms data from that"; eg FuzzyTermsEnum will
      // (usually) just immediately call seek again if we
      // return NOT_FOUND so it's a waste for us to fill in
      // the term that was actually NOT_FOUND
      @Override
      public SeekStatus seekCeil(final BytesRef target) throws IOException {

        if (indexEnum == null) {
          throw new IllegalStateException("terms index was not loaded");
        }

        // System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" +
        // target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term()
        // + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending="
        // + seekPending + " divisor=" + indexReader.getDivisor() + " this="  + this);
        if (didIndexNext) {
          if (nextIndexTerm == null) {
            // System.out.println("  nextIndexTerm=null");
          } else {
            // System.out.println("  nextIndexTerm=" + nextIndexTerm.utf8ToString());
          }
        }

        boolean doSeek = true;

        // See if we can avoid seeking, because target term
        // is after current term but before next index term:
        if (indexIsCurrent) {

          final int cmp = BytesRef.getUTF8SortedAsUnicodeComparator().compare(term.get(), target);

          if (cmp == 0) {
            // Already at the requested term
            return SeekStatus.FOUND;
          } else if (cmp < 0) {

            // Target term is after current term
            if (!didIndexNext) {
              if (indexEnum.next() == -1) {
                nextIndexTerm = null;
              } else {
                nextIndexTerm = indexEnum.term();
              }
              // System.out.println("  now do index next() nextIndexTerm=" + (nextIndexTerm == null
              // ? "null" : nextIndexTerm.utf8ToString()));
              didIndexNext = true;
            }

            if (nextIndexTerm == null
                || BytesRef.getUTF8SortedAsUnicodeComparator().compare(target, nextIndexTerm) < 0) {
              // Optimization: requested term is within the
              // same term block we are now in; skip seeking
              // (but do scanning):
              doSeek = false;
              // System.out.println("  skip seek: nextIndexTerm=" + (nextIndexTerm == null ? "null"
              // : nextIndexTerm.utf8ToString()));
            }
          }
        }

        if (doSeek) {
          // System.out.println("  seek");

          // Ask terms index to find biggest indexed term (=
          // first term in a block) that's <= our text:
          in.seek(indexEnum.seek(target));
          boolean result = nextBlock();

          // Block must exist since, at least, the indexed term
          // is in the block:
          assert result;

          indexIsCurrent = true;
          didIndexNext = false;

          if (doOrd) {
            state.ord = indexEnum.ord() - 1;
          }

          term.copyBytes(indexEnum.term());
          // System.out.println("  seek: term=" + term.utf8ToString());
        } else {
          // System.out.println("  skip seek");
          if (state.termBlockOrd == blockTermCount && !nextBlock()) {
            indexIsCurrent = false;
            return SeekStatus.END;
          }
        }

        seekPending = false;

        int common = 0;

        // Scan within block.  We could do this by calling
        // _next() and testing the resulting term, but this
        // is wasteful.  Instead, we first confirm the
        // target matches the common prefix of this block,
        // and then we scan the term bytes directly from the
        // termSuffixesreader's byte[], saving a copy into
        // the BytesRef term per term.  Only when we return
        // do we then copy the bytes into the term.

        while (true) {

          // First, see if target term matches common prefix
          // in this block:
          if (common < termBlockPrefix) {
            final int cmp =
                (term.byteAt(common) & 0xFF) - (target.bytes[target.offset + common] & 0xFF);
            if (cmp < 0) {

              // TODO: maybe we should store common prefix
              // in block header?  (instead of relying on
              // last term of previous block)

              // Target's prefix is after the common block
              // prefix, so term cannot be in this block
              // but it could be in next block.  We
              // must scan to end-of-block to set common
              // prefix for next block:
              if (state.termBlockOrd < blockTermCount) {
                while (state.termBlockOrd < blockTermCount - 1) {
                  state.termBlockOrd++;
                  state.ord++;
                  termSuffixesReader.skipBytes(termSuffixesReader.readVInt());
                }
                final int suffix = termSuffixesReader.readVInt();
                term.setLength(termBlockPrefix + suffix);
                term.grow(term.length());
                termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
              }
              state.ord++;

              if (!nextBlock()) {
                indexIsCurrent = false;
                return SeekStatus.END;
              }
              common = 0;

            } else if (cmp > 0) {
              // Target's prefix is before the common prefix
              // of this block, so we position to start of
              // block and return NOT_FOUND:
              assert state.termBlockOrd == 0;

              final int suffix = termSuffixesReader.readVInt();
              term.setLength(termBlockPrefix + suffix);
              term.grow(term.length());
              termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
              return SeekStatus.NOT_FOUND;
            } else {
              common++;
            }

            continue;
          }

          // Test every term in this block
          while (true) {
            state.termBlockOrd++;
            state.ord++;

            final int suffix = termSuffixesReader.readVInt();

            // We know the prefix matches, so just compare the new suffix:
            final int termLen = termBlockPrefix + suffix;
            int bytePos = termSuffixesReader.getPosition();

            boolean next = false;
            final int limit = target.offset + (termLen < target.length ? termLen : target.length);
            int targetPos = target.offset + termBlockPrefix;
            while (targetPos < limit) {
              final int cmp = (termSuffixes[bytePos++] & 0xFF) - (target.bytes[targetPos++] & 0xFF);
              if (cmp < 0) {
                // Current term is still before the target;
                // keep scanning
                next = true;
                break;
              } else if (cmp > 0) {
                // Done!  Current term is after target. Stop
                // here, fill in real term, return NOT_FOUND.
                term.setLength(termBlockPrefix + suffix);
                term.grow(term.length());
                termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
                // System.out.println("  NOT_FOUND");
                return SeekStatus.NOT_FOUND;
              }
            }

            if (!next && target.length <= termLen) {
              term.setLength(termBlockPrefix + suffix);
              term.grow(term.length());
              termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);

              if (target.length == termLen) {
                // Done!  Exact match.  Stop here, fill in
                // real term, return FOUND.
                // System.out.println("  FOUND");
                return SeekStatus.FOUND;
              } else {
                // System.out.println("  NOT_FOUND");
                return SeekStatus.NOT_FOUND;
              }
            }

            if (state.termBlockOrd == blockTermCount) {
              // Must pre-fill term for next block's common prefix
              term.setLength(termBlockPrefix + suffix);
              term.grow(term.length());
              termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
              break;
            } else {
              termSuffixesReader.skipBytes(suffix);
            }
          }

          // The purpose of the terms dict index is to seek
          // the enum to the closest index term before the
          // term we are looking for.  So, we should never
          // cross another index term (besides the first
          // one) while we are scanning:

          assert indexIsCurrent;

          if (!nextBlock()) {
            // System.out.println("  END");
            indexIsCurrent = false;
            return SeekStatus.END;
          }
          common = 0;
        }
      }

      @Override
      public BytesRef next() throws IOException {
        // System.out.println("BTR.next() seekPending=" + seekPending + " pendingSeekCount=" +
        // state.termBlockOrd);

        // If seek was previously called and the term was cached,
        // usually caller is just going to pull a D/&PEnum or get
        // docFreq, etc.  But, if they then call next(),
        // this method catches up all internal state so next()
        // works properly:
        if (seekPending) {
          assert !indexIsCurrent;
          in.seek(state.blockFilePointer);
          final int pendingSeekCount = state.termBlockOrd;
          boolean result = nextBlock();

          final long savOrd = state.ord;

          // Block must exist since seek(TermState) was called w/ a
          // TermState previously returned by this enum when positioned
          // on a real term:
          assert result;

          while (state.termBlockOrd < pendingSeekCount) {
            BytesRef nextResult = _next();
            assert nextResult != null;
          }
          seekPending = false;
          state.ord = savOrd;
        }
        return _next();
      }

      /* Decodes only the term bytes of the next term.  If caller then asks for
      metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily)
      decode all metadata up to the current term. */
      private BytesRef _next() throws IOException {
        // System.out.println("BTR._next seg=" + segment + " this=" + this + " termCount=" +
        // state.termBlockOrd + " (vs " + blockTermCount + ")");
        if (state.termBlockOrd == blockTermCount && !nextBlock()) {
          // System.out.println("  eof");
          indexIsCurrent = false;
          return null;
        }

        // TODO: cutover to something better for these ints!  simple64?
        final int suffix = termSuffixesReader.readVInt();
        // System.out.println("  suffix=" + suffix);

        term.setLength(termBlockPrefix + suffix);
        term.grow(term.length());
        termSuffixesReader.readBytes(term.bytes(), termBlockPrefix, suffix);
        state.termBlockOrd++;

        // NOTE: meaningless in the non-ord case
        state.ord++;

        // System.out.println("  return term=" + fieldInfo.name + ":" + term.utf8ToString() + " " +
        // term + " tbOrd=" + state.termBlockOrd);
        return term.get();
      }

      @Override
      public BytesRef term() {
        return term.get();
      }

      @Override
      public int docFreq() throws IOException {
        // System.out.println("BTR.docFreq");
        decodeMetaData();
        // System.out.println("  return " + state.docFreq);
        return state.docFreq;
      }

      @Override
      public long totalTermFreq() throws IOException {
        decodeMetaData();
        return state.totalTermFreq;
      }

      @Override
      public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {

        if (PostingsEnum.featureRequested(flags, DocsAndPositionsEnum.OLD_NULL_SEMANTICS)) {
          if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
              < 0) {
            // Positions were not indexed:
            return null;
          }
        }

        // System.out.println("BTR.docs this=" + this);
        decodeMetaData();
        // System.out.println("BTR.docs:  state.docFreq=" + state.docFreq);
        return postingsReader.postings(fieldInfo, state, reuse, flags);
      }

      @Override
      public void seekExact(BytesRef target, TermState otherState) {
        // System.out.println("BTR.seekExact termState target=" + target.utf8ToString() + " " +
        // target + " this=" + this);
        assert otherState != null && otherState instanceof BlockTermState;
        assert !doOrd || ((BlockTermState) otherState).ord < numTerms;
        state.copyFrom(otherState);
        seekPending = true;
        indexIsCurrent = false;
        term.copyBytes(target);
      }

      @Override
      public TermState termState() throws IOException {
        // System.out.println("BTR.termState this=" + this);
        decodeMetaData();
        TermState ts = state.clone();
        // System.out.println("  return ts=" + ts);
        return ts;
      }

      @Override
      public void seekExact(long ord) throws IOException {
        // System.out.println("BTR.seek by ord ord=" + ord);
        if (indexEnum == null) {
          throw new IllegalStateException("terms index was not loaded");
        }

        assert ord < numTerms;

        // TODO: if ord is in same terms block and
        // after current ord, we should avoid this seek just
        // like we do in the seek(BytesRef) case
        in.seek(indexEnum.seek(ord));
        boolean result = nextBlock();

        // Block must exist since ord < numTerms:
        assert result;

        indexIsCurrent = true;
        didIndexNext = false;
        seekPending = false;

        state.ord = indexEnum.ord() - 1;
        assert state.ord >= -1 : "ord=" + state.ord;
        term.copyBytes(indexEnum.term());

        // Now, scan:
        int left = (int) (ord - state.ord);
        while (left > 0) {
          final BytesRef term = _next();
          assert term != null;
          left--;
          assert indexIsCurrent;
        }
      }

      @Override
      public long ord() {
        if (!doOrd) {
          throw new UnsupportedOperationException();
        }
        return state.ord;
      }

      /* Does initial decode of next block of terms; this
      doesn't actually decode the docFreq, totalTermFreq,
      postings details (frq/prx offset, etc.) metadata;
      it just loads them as byte[] blobs which are then
      decoded on-demand if the metadata is ever requested
      for any term in this block.  This enables terms-only
      intensive consumes (eg certain MTQs, respelling) to
      not pay the price of decoding metadata they won't
      use. */
      private boolean nextBlock() throws IOException {

        // TODO: we still lazy-decode the byte[] for each
        // term (the suffix), but, if we decoded
        // all N terms up front then seeking could do a fast
        // bsearch w/in the block...

        // System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this);
        state.blockFilePointer = in.getFilePointer();
        blockTermCount = in.readVInt();
        // System.out.println("  blockTermCount=" + blockTermCount);
        if (blockTermCount == 0) {
          return false;
        }
        termBlockPrefix = in.readVInt();

        // term suffixes:
        int len = in.readVInt();
        if (termSuffixes.length < len) {
          termSuffixes = new byte[ArrayUtil.oversize(len, 1)];
        }
        // System.out.println("  termSuffixes len=" + len);
        in.readBytes(termSuffixes, 0, len);
        termSuffixesReader.reset(termSuffixes, 0, len);

        // docFreq, totalTermFreq
        len = in.readVInt();
        if (docFreqBytes.length < len) {
          docFreqBytes = new byte[ArrayUtil.oversize(len, 1)];
        }
        // System.out.println("  freq bytes len=" + len);
        in.readBytes(docFreqBytes, 0, len);
        freqReader.reset(docFreqBytes, 0, len);

        // metadata
        len = in.readVInt();
        if (bytes == null) {
          bytes = new byte[ArrayUtil.oversize(len, 1)];
          bytesReader = new ByteArrayDataInput();
        } else if (bytes.length < len) {
          bytes = new byte[ArrayUtil.oversize(len, 1)];
        }
        in.readBytes(bytes, 0, len);
        bytesReader.reset(bytes, 0, len);

        metaDataUpto = 0;
        state.termBlockOrd = 0;

        indexIsCurrent = false;
        // System.out.println("  indexIsCurrent=" + indexIsCurrent);

        return true;
      }

      private void decodeMetaData() throws IOException {
        // System.out.println("BTR.decodeMetadata mdUpto=" + metaDataUpto + " vs termCount=" +
        // state.termBlockOrd + " state=" + state);
        if (!seekPending) {
          // TODO: cutover to random-access API
          // here.... really stupid that we have to decode N
          // wasted term metadata just to get to the N+1th
          // that we really need...

          // lazily catch up on metadata decode:
          final int limit = state.termBlockOrd;
          boolean absolute = metaDataUpto == 0;
          // TODO: better API would be "jump straight to term=N"???
          while (metaDataUpto < limit) {
            // System.out.println("  decode mdUpto=" + metaDataUpto);
            // TODO: we could make "tiers" of metadata, ie,
            // decode docFreq/totalTF but don't decode postings
            // metadata; this way caller could get
            // docFreq/totalTF w/o paying decode cost for
            // postings

            // TODO: if docFreq were bulk decoded we could
            // just skipN here:

            // docFreq, totalTermFreq
            state.docFreq = freqReader.readVInt();
            // System.out.println("    dF=" + state.docFreq);
            if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
              state.totalTermFreq = state.docFreq + freqReader.readVLong();
              // System.out.println("    totTF=" + state.totalTermFreq);
            }
            // metadata
            for (int i = 0; i < longs.length; i++) {
              longs[i] = bytesReader.readVLong();
            }
            postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute);
            metaDataUpto++;
            absolute = false;
          }
        } else {
          // System.out.println("  skip! seekPending");
        }
      }
    }
  }

  @Override
  public long ramBytesUsed() {
    long ramBytesUsed = BASE_RAM_BYTES_USED;
    ramBytesUsed += (postingsReader != null) ? postingsReader.ramBytesUsed() : 0;
    ramBytesUsed += (indexReader != null) ? indexReader.ramBytesUsed() : 0;
    ramBytesUsed += fields.size() * 2L * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
    for (FieldReader reader : fields.values()) {
      ramBytesUsed += reader.ramBytesUsed();
    }
    return ramBytesUsed;
  }

  @Override
  public Collection<Accountable> getChildResources() {
    List<Accountable> resources = new ArrayList<>();
    if (indexReader != null) {
      resources.add(Accountables.namedAccountable("term index", indexReader));
    }
    if (postingsReader != null) {
      resources.add(Accountables.namedAccountable("delegate", postingsReader));
    }
    return Collections.unmodifiableList(resources);
  }

  @Override
  public String toString() {
    return getClass().getSimpleName()
        + "(index="
        + indexReader
        + ",delegate="
        + postingsReader
        + ")";
  }

  @Override
  public void checkIntegrity() throws IOException {
    // verify terms
    CodecUtil.checksumEntireFile(in);

    // verify postings
    postingsReader.checkIntegrity();
  }
}
 @Override
 public long ramBytesUsed() {
   return RamUsageEstimator.sizeOf(filter.getBits());
 }
  Lucene42DocValuesProducer(
      SegmentReadState state,
      String dataCodec,
      String dataExtension,
      String metaCodec,
      String metaExtension)
      throws IOException {
    maxDoc = state.segmentInfo.maxDoc();
    merging = false;
    String metaName =
        IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
    // read in the entries from the metadata file.
    ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
    boolean success = false;
    ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
    try {
      version = CodecUtil.checkHeader(in, metaCodec, VERSION_START, VERSION_CURRENT);
      numerics = new HashMap<>();
      binaries = new HashMap<>();
      fsts = new HashMap<>();
      numEntries = readFields(in, state.fieldInfos);

      if (version >= VERSION_CHECKSUM) {
        CodecUtil.checkFooter(in);
      } else {
        CodecUtil.checkEOF(in);
      }

      success = true;
    } finally {
      if (success) {
        IOUtils.close(in);
      } else {
        IOUtils.closeWhileHandlingException(in);
      }
    }

    String dataName =
        IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
    this.data = state.directory.openInput(dataName, state.context);
    success = false;
    try {
      final int version2 = CodecUtil.checkHeader(data, dataCodec, VERSION_START, VERSION_CURRENT);
      if (version != version2) {
        throw new CorruptIndexException(
            "Format versions mismatch: meta=" + version + ", data=" + version2, data);
      }

      if (version >= VERSION_CHECKSUM) {
        // NOTE: data file is too costly to verify checksum against all the bytes on open,
        // but for now we at least verify proper structure of the checksum footer: which looks
        // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
        // such as file truncation.
        CodecUtil.retrieveChecksum(data);
      }

      success = true;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(this.data);
      }
    }
  }
Esempio n. 17
0
/**
 * A bit set that only stores longs that have at least one bit which is set. The way it works is
 * that the space of bits is divided into blocks of 4096 bits, which is 64 longs. Then for each
 * block, we have:
 *
 * <ul>
 *   <li>a long[] which stores the non-zero longs for that block
 *   <li>a long so that bit <tt>i</tt> being set means that the <code>i-th</code> long of the block
 *       is non-null, and its offset in the array of longs is the number of one bits on the right of
 *       the <code>i-th</code> bit.
 * </ul>
 *
 * @lucene.internal
 */
public class SparseFixedBitSet extends BitSet implements Bits, Accountable {

  private static final long BASE_RAM_BYTES_USED =
      RamUsageEstimator.shallowSizeOfInstance(SparseFixedBitSet.class);
  private static final long SINGLE_ELEMENT_ARRAY_BYTES_USED = RamUsageEstimator.sizeOf(new long[1]);
  private static final int MASK_4096 = (1 << 12) - 1;

  private static int blockCount(int length) {
    int blockCount = length >>> 12;
    if ((blockCount << 12) < length) {
      ++blockCount;
    }
    assert (blockCount << 12) >= length;
    return blockCount;
  }

  final long[] indices;
  final long[][] bits;
  final int length;
  int nonZeroLongCount;
  long ramBytesUsed;

  /**
   * Create a {@link SparseFixedBitSet} that can contain bits between <code>0</code> included and
   * <code>length</code> excluded.
   */
  public SparseFixedBitSet(int length) {
    if (length < 1) {
      throw new IllegalArgumentException("length needs to be >= 1");
    }
    this.length = length;
    final int blockCount = blockCount(length);
    indices = new long[blockCount];
    bits = new long[blockCount][];
    ramBytesUsed =
        BASE_RAM_BYTES_USED
            + RamUsageEstimator.shallowSizeOf(indices)
            + RamUsageEstimator.shallowSizeOf(bits);
  }

  @Override
  public int length() {
    return length;
  }

  private boolean consistent(int index) {
    assert index >= 0 && index < length : "index=" + index + ",length=" + length;
    return true;
  }

  @Override
  public int cardinality() {
    int cardinality = 0;
    for (long[] bitArray : bits) {
      if (bitArray != null) {
        for (long bits : bitArray) {
          cardinality += Long.bitCount(bits);
        }
      }
    }
    return cardinality;
  }

  @Override
  public int approximateCardinality() {
    // we are assuming that bits are uniformly set and use the linear counting
    // algorithm to estimate the number of bits that are set based on the number
    // of longs that are different from zero
    final int totalLongs = (length + 63) >>> 6; // total number of longs in the space
    assert totalLongs >= nonZeroLongCount;
    final int zeroLongs = totalLongs - nonZeroLongCount; // number of longs that are zeros
    // No need to guard against division by zero, it will return +Infinity and things will work as
    // expected
    final long estimate = Math.round(totalLongs * Math.log((double) totalLongs / zeroLongs));
    return (int) Math.min(length, estimate);
  }

  @Override
  public boolean get(int i) {
    assert consistent(i);
    final int i4096 = i >>> 12;
    final long index = indices[i4096];
    final int i64 = i >>> 6;
    // first check the index, if the i64-th bit is not set, then i is not set
    // note: this relies on the fact that shifts are mod 64 in java
    if ((index & (1L << i64)) == 0) {
      return false;
    }

    // if it is set, then we count the number of bits that are set on the right
    // of i64, and that gives us the index of the long that stores the bits we
    // are interested in
    final long bits = this.bits[i4096][Long.bitCount(index & ((1L << i64) - 1))];
    return (bits & (1L << i)) != 0;
  }

  private static int oversize(int s) {
    int newSize = s + (s >>> 1);
    if (newSize > 50) {
      newSize = 64;
    }
    return newSize;
  }

  /** Set the bit at index <tt>i</tt>. */
  public void set(int i) {
    assert consistent(i);
    final int i4096 = i >>> 12;
    final long index = indices[i4096];
    final int i64 = i >>> 6;
    if ((index & (1L << i64)) != 0) {
      // in that case the sub 64-bits block we are interested in already exists,
      // we just need to set a bit in an existing long: the number of ones on
      // the right of i64 gives us the index of the long we need to update
      bits[i4096][Long.bitCount(index & ((1L << i64) - 1))] |= 1L << i; // shifts are mod 64 in java
    } else if (index == 0) {
      // if the index is 0, it means that we just found a block of 4096 bits
      // that has no bit that is set yet. So let's initialize a new block:
      insertBlock(i4096, i64, i);
    } else {
      // in that case we found a block of 4096 bits that has some values, but
      // the sub-block of 64 bits that we are interested in has no value yet,
      // so we need to insert a new long
      insertLong(i4096, i64, i, index);
    }
  }

  private void insertBlock(int i4096, int i64, int i) {
    indices[i4096] = 1L << i64; // shifts are mod 64 in java
    assert bits[i4096] == null;
    bits[i4096] = new long[] {1L << i}; // shifts are mod 64 in java
    ++nonZeroLongCount;
    ramBytesUsed += SINGLE_ELEMENT_ARRAY_BYTES_USED;
  }

  private void insertLong(int i4096, int i64, int i, long index) {
    indices[i4096] |= 1L << i64; // shifts are mod 64 in java
    // we count the number of bits that are set on the right of i64
    // this gives us the index at which to perform the insertion
    final int o = Long.bitCount(index & ((1L << i64) - 1));
    final long[] bitArray = bits[i4096];
    if (bitArray[bitArray.length - 1] == 0) {
      // since we only store non-zero longs, if the last value is 0, it means
      // that we alreay have extra space, make use of it
      System.arraycopy(bitArray, o, bitArray, o + 1, bitArray.length - o - 1);
      bitArray[o] = 1L << i;
    } else {
      // we don't have extra space so we need to resize to insert the new long
      final int newSize = oversize(bitArray.length + 1);
      final long[] newBitArray = new long[newSize];
      System.arraycopy(bitArray, 0, newBitArray, 0, o);
      newBitArray[o] = 1L << i;
      System.arraycopy(bitArray, o, newBitArray, o + 1, bitArray.length - o);
      bits[i4096] = newBitArray;
      ramBytesUsed += RamUsageEstimator.sizeOf(newBitArray) - RamUsageEstimator.sizeOf(bitArray);
    }
    ++nonZeroLongCount;
  }

  /** Clear the bit at index <tt>i</tt>. */
  public void clear(int i) {
    assert consistent(i);
    final int i4096 = i >>> 12;
    final int i64 = i >>> 6;
    and(i4096, i64, ~(1L << i));
  }

  private void and(int i4096, int i64, long mask) {
    final long index = indices[i4096];
    if ((index & (1L << i64)) != 0) {
      // offset of the long bits we are interested in in the array
      final int o = Long.bitCount(index & ((1L << i64) - 1));
      long bits = this.bits[i4096][o] & mask;
      if (bits == 0) {
        removeLong(i4096, i64, index, o);
      } else {
        this.bits[i4096][o] = bits;
      }
    }
  }

  private void removeLong(int i4096, int i64, long index, int o) {
    index &= ~(1L << i64);
    indices[i4096] = index;
    if (index == 0) {
      // release memory, there is nothing in this block anymore
      this.bits[i4096] = null;
    } else {
      final int length = Long.bitCount(index);
      final long[] bitArray = bits[i4096];
      System.arraycopy(bitArray, o + 1, bitArray, o, length - o);
      bitArray[length] = 0L;
    }
    nonZeroLongCount -= 1;
  }

  @Override
  public void clear(int from, int to) {
    assert from >= 0;
    assert to <= length;
    if (from >= to) {
      return;
    }
    final int firstBlock = from >>> 12;
    final int lastBlock = (to - 1) >>> 12;
    if (firstBlock == lastBlock) {
      clearWithinBlock(firstBlock, from & MASK_4096, (to - 1) & MASK_4096);
    } else {
      clearWithinBlock(firstBlock, from & MASK_4096, MASK_4096);
      for (int i = firstBlock + 1; i < lastBlock; ++i) {
        nonZeroLongCount -= Long.bitCount(indices[i]);
        indices[i] = 0;
        bits[i] = null;
      }
      clearWithinBlock(lastBlock, 0, (to - 1) & MASK_4096);
    }
  }

  // create a long that has bits set to one between from and to
  private static long mask(int from, int to) {
    return ((1L << (to - from) << 1) - 1) << from;
  }

  private void clearWithinBlock(int i4096, int from, int to) {
    int firstLong = from >>> 6;
    int lastLong = to >>> 6;

    if (firstLong == lastLong) {
      and(i4096, firstLong, ~mask(from, to));
    } else {
      assert firstLong < lastLong;
      and(i4096, lastLong, ~mask(0, to));
      for (int i = lastLong - 1; i >= firstLong + 1; --i) {
        and(i4096, i, 0L);
      }
      and(i4096, firstLong, ~mask(from, 63));
    }
  }

  /** Return the first document that occurs on or after the provided block index. */
  private int firstDoc(int i4096) {
    long index = 0;
    while (i4096 < indices.length) {
      index = indices[i4096];
      if (index != 0) {
        final int i64 = Long.numberOfTrailingZeros(index);
        return (i4096 << 12) | (i64 << 6) | Long.numberOfTrailingZeros(bits[i4096][0]);
      }
      i4096 += 1;
    }
    return DocIdSetIterator.NO_MORE_DOCS;
  }

  @Override
  public int nextSetBit(int i) {
    assert i < length;
    final int i4096 = i >>> 12;
    final long index = indices[i4096];
    final long[] bitArray = this.bits[i4096];
    int i64 = i >>> 6;
    int o = Long.bitCount(index & ((1L << i64) - 1));
    if ((index & (1L << i64)) != 0) {
      // There is at least one bit that is set in the current long, check if
      // one of them is after i
      final long bits = bitArray[o] >>> i; // shifts are mod 64
      if (bits != 0) {
        return i + Long.numberOfTrailingZeros(bits);
      }
      o += 1;
    }
    final long indexBits = index >>> i64 >>> 1;
    if (indexBits == 0) {
      // no more bits are set in the current block of 4096 bits, go to the next one
      return firstDoc(i4096 + 1);
    }
    // there are still set bits
    i64 += 1 + Long.numberOfTrailingZeros(indexBits);
    final long bits = bitArray[o];
    return (i64 << 6) | Long.numberOfTrailingZeros(bits);
  }

  /** Return the last document that occurs on or before the provided block index. */
  private int lastDoc(int i4096) {
    long index;
    while (i4096 >= 0) {
      index = indices[i4096];
      if (index != 0) {
        final int i64 = 63 - Long.numberOfLeadingZeros(index);
        final long bits = this.bits[i4096][Long.bitCount(index) - 1];
        return (i4096 << 12) | (i64 << 6) | (63 - Long.numberOfLeadingZeros(bits));
      }
      i4096 -= 1;
    }
    return -1;
  }

  @Override
  public int prevSetBit(int i) {
    assert i >= 0;
    final int i4096 = i >>> 12;
    final long index = indices[i4096];
    final long[] bitArray = this.bits[i4096];
    int i64 = i >>> 6;
    final long indexBits = index & ((1L << i64) - 1);
    final int o = Long.bitCount(indexBits);
    if ((index & (1L << i64)) != 0) {
      // There is at least one bit that is set in the same long, check if there
      // is one bit that is set that is lower than i
      final long bits = bitArray[o] & ((1L << i << 1) - 1);
      if (bits != 0) {
        return (i64 << 6) | (63 - Long.numberOfLeadingZeros(bits));
      }
    }
    if (indexBits == 0) {
      // no more bits are set in this block, go find the last bit in the
      // previous block
      return lastDoc(i4096 - 1);
    }
    // go to the previous long
    i64 = 63 - Long.numberOfLeadingZeros(indexBits);
    final long bits = bitArray[o - 1];
    return (i4096 << 12) | (i64 << 6) | (63 - Long.numberOfLeadingZeros(bits));
  }

  /** Return the long bits at the given <code>i64</code> index. */
  private long longBits(long index, long[] bits, int i64) {
    if ((index & (1L << i64)) == 0) {
      return 0L;
    } else {
      return bits[Long.bitCount(index & ((1L << i64) - 1))];
    }
  }

  private void or(final int i4096, final long index, long[] bits, int nonZeroLongCount) {
    assert Long.bitCount(index) == nonZeroLongCount;
    final long currentIndex = indices[i4096];
    if (currentIndex == 0) {
      // fast path: if we currently have nothing in the block, just copy the data
      // this especially happens all the time if you call OR on an empty set
      indices[i4096] = index;
      this.bits[i4096] = Arrays.copyOf(bits, nonZeroLongCount);
      this.nonZeroLongCount += nonZeroLongCount;
      return;
    }
    final long[] currentBits = this.bits[i4096];
    final long[] newBits;
    final long newIndex = currentIndex | index;
    final int requiredCapacity = Long.bitCount(newIndex);
    if (currentBits.length >= requiredCapacity) {
      newBits = currentBits;
    } else {
      newBits = new long[oversize(requiredCapacity)];
    }
    // we iterate backwards in order to not override data we might need on the next iteration if the
    // array is reused
    for (int i = Long.numberOfLeadingZeros(newIndex), newO = Long.bitCount(newIndex) - 1;
        i < 64;
        i += 1 + Long.numberOfLeadingZeros(newIndex << (i + 1)), newO -= 1) {
      // bitIndex is the index of a bit which is set in newIndex and newO is the number of 1 bits on
      // its right
      final int bitIndex = 63 - i;
      assert newO == Long.bitCount(newIndex & ((1L << bitIndex) - 1));
      newBits[newO] =
          longBits(currentIndex, currentBits, bitIndex) | longBits(index, bits, bitIndex);
    }
    indices[i4096] = newIndex;
    this.bits[i4096] = newBits;
    this.nonZeroLongCount += nonZeroLongCount - Long.bitCount(currentIndex & index);
  }

  private void or(SparseFixedBitSet other) {
    for (int i = 0; i < other.indices.length; ++i) {
      final long index = other.indices[i];
      if (index != 0) {
        or(i, index, other.bits[i], Long.bitCount(index));
      }
    }
  }

  /** {@link #or(DocIdSetIterator)} impl that works best when <code>it</code> is dense */
  private void orDense(DocIdSetIterator it) throws IOException {
    assertUnpositioned(it);
    // The goal here is to try to take advantage of the ordering of documents
    // to build the data-structure more efficiently
    // NOTE: this heavily relies on the fact that shifts are mod 64
    final int firstDoc = it.nextDoc();
    if (firstDoc == DocIdSetIterator.NO_MORE_DOCS) {
      return;
    }
    int i4096 = firstDoc >>> 12;
    int i64 = firstDoc >>> 6;
    long index = 1L << i64;
    long currentLong = 1L << firstDoc;
    // we store at most 64 longs per block so preallocate in order never to have to resize
    long[] longs = new long[64];
    int numLongs = 0;

    for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
      final int doc64 = doc >>> 6;
      if (doc64 == i64) {
        // still in the same long, just set the bit
        currentLong |= 1L << doc;
      } else {
        longs[numLongs++] = currentLong;

        final int doc4096 = doc >>> 12;
        if (doc4096 == i4096) {
          index |= 1L << doc64;
        } else {
          // we are on a new block, flush what we buffered
          or(i4096, index, longs, numLongs);
          // and reset state for the new block
          i4096 = doc4096;
          index = 1L << doc64;
          numLongs = 0;
        }

        // we are on a new long, reset state
        i64 = doc64;
        currentLong = 1L << doc;
      }
    }

    // flush
    longs[numLongs++] = currentLong;
    or(i4096, index, longs, numLongs);
  }

  @Override
  public void or(DocIdSetIterator it) throws IOException {
    {
      // specialize union with another SparseFixedBitSet
      final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it);
      if (other != null) {
        assertUnpositioned(it);
        or(other);
        return;
      }
    }

    // We do not specialize the union with a FixedBitSet since FixedBitSets are
    // supposed to be used for dense data and sparse fixed bit sets for sparse
    // data, so a sparse set would likely get upgraded by DocIdSetBuilder before
    // being or'ed with a FixedBitSet

    if (it.cost() < indices.length) {
      // the default impl is good for sparse iterators
      super.or(it);
    } else {
      orDense(it);
    }
  }

  @Override
  public long ramBytesUsed() {
    return ramBytesUsed;
  }

  @Override
  public String toString() {
    return "SparseFixedBitSet(size=" + length + ",cardinality=~" + approximateCardinality();
  }
}
 private NumericDocValues loadNumeric(FieldInfo field) throws IOException {
   NumericEntry entry = numerics.get(field.name);
   data.seek(entry.offset);
   switch (entry.format) {
     case TABLE_COMPRESSED:
       int size = data.readVInt();
       if (size > 256) {
         throw new CorruptIndexException(
             "TABLE_COMPRESSED cannot have more than 256 distinct values, got=" + size, data);
       }
       final long decode[] = new long[size];
       for (int i = 0; i < decode.length; i++) {
         decode[i] = data.readLong();
       }
       final int formatID = data.readVInt();
       final int bitsPerValue = data.readVInt();
       final PackedInts.Reader ordsReader =
           PackedInts.getReaderNoHeader(
               data,
               PackedInts.Format.byId(formatID),
               entry.packedIntsVersion,
               maxDoc,
               bitsPerValue);
       if (!merging) {
         ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(decode) + ordsReader.ramBytesUsed());
         numericInfo.put(field.name, ordsReader);
       }
       return new NumericDocValues() {
         @Override
         public long get(int docID) {
           return decode[(int) ordsReader.get(docID)];
         }
       };
     case DELTA_COMPRESSED:
       final int blockSize = data.readVInt();
       final BlockPackedReader reader =
           new BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc, false);
       if (!merging) {
         ramBytesUsed.addAndGet(reader.ramBytesUsed());
         numericInfo.put(field.name, reader);
       }
       return reader;
     case UNCOMPRESSED:
       final byte bytes[] = new byte[maxDoc];
       data.readBytes(bytes, 0, bytes.length);
       if (!merging) {
         ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(bytes));
         numericInfo.put(field.name, Accountables.namedAccountable("byte array", maxDoc));
       }
       return new NumericDocValues() {
         @Override
         public long get(int docID) {
           return bytes[docID];
         }
       };
     case GCD_COMPRESSED:
       final long min = data.readLong();
       final long mult = data.readLong();
       final int quotientBlockSize = data.readVInt();
       final BlockPackedReader quotientReader =
           new BlockPackedReader(data, entry.packedIntsVersion, quotientBlockSize, maxDoc, false);
       if (!merging) {
         ramBytesUsed.addAndGet(quotientReader.ramBytesUsed());
         numericInfo.put(field.name, quotientReader);
       }
       return new NumericDocValues() {
         @Override
         public long get(int docID) {
           return min + mult * quotientReader.get(docID);
         }
       };
     default:
       throw new AssertionError();
   }
 }
Esempio n. 19
0
/**
 * FST-based terms dictionary reader.
 *
 * <p>The FST directly maps each term and its metadata, it is memory resident.
 *
 * @lucene.experimental
 */
public class FSTTermsReader extends FieldsProducer {
  final TreeMap<String, TermsReader> fields = new TreeMap<>();
  final PostingsReaderBase postingsReader;
  // static boolean TEST = false;
  final int version;

  public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader)
      throws IOException {
    final String termsFileName =
        IndexFileNames.segmentFileName(
            state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);

    this.postingsReader = postingsReader;
    final IndexInput in = state.directory.openInput(termsFileName, state.context);

    boolean success = false;
    try {
      version = readHeader(in);
      if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) {
        CodecUtil.checksumEntireFile(in);
      }
      this.postingsReader.init(in);
      seekDir(in);

      final FieldInfos fieldInfos = state.fieldInfos;
      final int numFields = in.readVInt();
      for (int i = 0; i < numFields; i++) {
        int fieldNumber = in.readVInt();
        FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
        long numTerms = in.readVLong();
        long sumTotalTermFreq =
            fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
        long sumDocFreq = in.readVLong();
        int docCount = in.readVInt();
        int longsSize = in.readVInt();
        TermsReader current =
            new TermsReader(
                fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
        TermsReader previous = fields.put(fieldInfo.name, current);
        checkFieldSummary(state.segmentInfo, in, current, previous);
      }
      success = true;
    } finally {
      if (success) {
        IOUtils.close(in);
      } else {
        IOUtils.closeWhileHandlingException(in);
      }
    }
  }

  private int readHeader(IndexInput in) throws IOException {
    return CodecUtil.checkHeader(
        in,
        FSTTermsWriter.TERMS_CODEC_NAME,
        FSTTermsWriter.TERMS_VERSION_START,
        FSTTermsWriter.TERMS_VERSION_CURRENT);
  }

  private void seekDir(IndexInput in) throws IOException {
    if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) {
      in.seek(in.length() - CodecUtil.footerLength() - 8);
    } else {
      in.seek(in.length() - 8);
    }
    in.seek(in.readLong());
  }

  private void checkFieldSummary(
      SegmentInfo info, IndexInput in, TermsReader field, TermsReader previous) throws IOException {
    // #docs with field must be <= #docs
    if (field.docCount < 0 || field.docCount > info.getDocCount()) {
      throw new CorruptIndexException(
          "invalid docCount: "
              + field.docCount
              + " maxDoc: "
              + info.getDocCount()
              + " (resource="
              + in
              + ")");
    }
    // #postings must be >= #docs with field
    if (field.sumDocFreq < field.docCount) {
      throw new CorruptIndexException(
          "invalid sumDocFreq: "
              + field.sumDocFreq
              + " docCount: "
              + field.docCount
              + " (resource="
              + in
              + ")");
    }
    // #positions must be >= #postings
    if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) {
      throw new CorruptIndexException(
          "invalid sumTotalTermFreq: "
              + field.sumTotalTermFreq
              + " sumDocFreq: "
              + field.sumDocFreq
              + " (resource="
              + in
              + ")");
    }
    if (previous != null) {
      throw new CorruptIndexException(
          "duplicate fields: " + field.fieldInfo.name + " (resource=" + in + ")");
    }
  }

  @Override
  public Iterator<String> iterator() {
    return Collections.unmodifiableSet(fields.keySet()).iterator();
  }

  @Override
  public Terms terms(String field) throws IOException {
    assert field != null;
    return fields.get(field);
  }

  @Override
  public int size() {
    return fields.size();
  }

  @Override
  public void close() throws IOException {
    try {
      IOUtils.close(postingsReader);
    } finally {
      fields.clear();
    }
  }

  private static final long BASE_RAM_BYTES_USED =
      RamUsageEstimator.shallowSizeOfInstance(TermsReader.class);

  final class TermsReader extends Terms implements Accountable {

    final FieldInfo fieldInfo;
    final long numTerms;
    final long sumTotalTermFreq;
    final long sumDocFreq;
    final int docCount;
    final int longsSize;
    final FST<FSTTermOutputs.TermData> dict;

    TermsReader(
        FieldInfo fieldInfo,
        IndexInput in,
        long numTerms,
        long sumTotalTermFreq,
        long sumDocFreq,
        int docCount,
        int longsSize)
        throws IOException {
      this.fieldInfo = fieldInfo;
      this.numTerms = numTerms;
      this.sumTotalTermFreq = sumTotalTermFreq;
      this.sumDocFreq = sumDocFreq;
      this.docCount = docCount;
      this.longsSize = longsSize;
      this.dict = new FST<>(in, new FSTTermOutputs(fieldInfo, longsSize));
    }

    @Override
    public Comparator<BytesRef> getComparator() {
      return BytesRef.getUTF8SortedAsUnicodeComparator();
    }

    @Override
    public long ramBytesUsed() {
      return BASE_RAM_BYTES_USED + dict.ramBytesUsed();
    }

    @Override
    public boolean hasFreqs() {
      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
    }

    @Override
    public boolean hasOffsets() {
      return fieldInfo
              .getIndexOptions()
              .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
          >= 0;
    }

    @Override
    public boolean hasPositions() {
      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
    }

    @Override
    public boolean hasPayloads() {
      return fieldInfo.hasPayloads();
    }

    @Override
    public long size() {
      return numTerms;
    }

    @Override
    public long getSumTotalTermFreq() {
      return sumTotalTermFreq;
    }

    @Override
    public long getSumDocFreq() throws IOException {
      return sumDocFreq;
    }

    @Override
    public int getDocCount() throws IOException {
      return docCount;
    }

    @Override
    public TermsEnum iterator(TermsEnum reuse) throws IOException {
      return new SegmentTermsEnum();
    }

    @Override
    public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
      return new IntersectTermsEnum(compiled, startTerm);
    }

    // Only wraps common operations for PBF interact
    abstract class BaseTermsEnum extends TermsEnum {
      /* Current term, null when enum ends or unpositioned */
      BytesRef term;

      /* Current term stats + decoded metadata (customized by PBF) */
      final BlockTermState state;

      /* Current term stats + undecoded metadata (long[] & byte[]) */
      FSTTermOutputs.TermData meta;
      ByteArrayDataInput bytesReader;

      /** Decodes metadata into customized term state */
      abstract void decodeMetaData() throws IOException;

      BaseTermsEnum() throws IOException {
        this.state = postingsReader.newTermState();
        this.bytesReader = new ByteArrayDataInput();
        this.term = null;
        // NOTE: metadata will only be initialized in child class
      }

      @Override
      public TermState termState() throws IOException {
        decodeMetaData();
        return state.clone();
      }

      @Override
      public BytesRef term() {
        return term;
      }

      @Override
      public int docFreq() throws IOException {
        return state.docFreq;
      }

      @Override
      public long totalTermFreq() throws IOException {
        return state.totalTermFreq;
      }

      @Override
      public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
        decodeMetaData();
        return postingsReader.docs(fieldInfo, state, liveDocs, reuse, flags);
      }

      @Override
      public DocsAndPositionsEnum docsAndPositions(
          Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
        if (!hasPositions()) {
          return null;
        }
        decodeMetaData();
        return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags);
      }

      @Override
      public void seekExact(long ord) throws IOException {
        throw new UnsupportedOperationException();
      }

      @Override
      public long ord() {
        throw new UnsupportedOperationException();
      }
    }

    // Iterates through all terms in this field
    private final class SegmentTermsEnum extends BaseTermsEnum {
      final BytesRefFSTEnum<FSTTermOutputs.TermData> fstEnum;

      /* True when current term's metadata is decoded */
      boolean decoded;

      /* True when current enum is 'positioned' by seekExact(TermState) */
      boolean seekPending;

      SegmentTermsEnum() throws IOException {
        super();
        this.fstEnum = new BytesRefFSTEnum<>(dict);
        this.decoded = false;
        this.seekPending = false;
        this.meta = null;
      }

      @Override
      public Comparator<BytesRef> getComparator() {
        return BytesRef.getUTF8SortedAsUnicodeComparator();
      }

      // Let PBF decode metadata from long[] and byte[]
      @Override
      void decodeMetaData() throws IOException {
        if (!decoded && !seekPending) {
          if (meta.bytes != null) {
            bytesReader.reset(meta.bytes, 0, meta.bytes.length);
          }
          postingsReader.decodeTerm(meta.longs, bytesReader, fieldInfo, state, true);
          decoded = true;
        }
      }

      // Update current enum according to FSTEnum
      void updateEnum(final InputOutput<FSTTermOutputs.TermData> pair) {
        if (pair == null) {
          term = null;
        } else {
          term = pair.input;
          meta = pair.output;
          state.docFreq = meta.docFreq;
          state.totalTermFreq = meta.totalTermFreq;
        }
        decoded = false;
        seekPending = false;
      }

      @Override
      public BytesRef next() throws IOException {
        if (seekPending) { // previously positioned, but termOutputs not fetched
          seekPending = false;
          SeekStatus status = seekCeil(term);
          assert status == SeekStatus.FOUND; // must positioned on valid term
        }
        updateEnum(fstEnum.next());
        return term;
      }

      @Override
      public boolean seekExact(BytesRef target) throws IOException {
        updateEnum(fstEnum.seekExact(target));
        return term != null;
      }

      @Override
      public SeekStatus seekCeil(BytesRef target) throws IOException {
        updateEnum(fstEnum.seekCeil(target));
        if (term == null) {
          return SeekStatus.END;
        } else {
          return term.equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
        }
      }

      @Override
      public void seekExact(BytesRef target, TermState otherState) {
        if (!target.equals(term)) {
          state.copyFrom(otherState);
          term = BytesRef.deepCopyOf(target);
          seekPending = true;
        }
      }
    }

    // Iterates intersect result with automaton (cannot seek!)
    private final class IntersectTermsEnum extends BaseTermsEnum {
      /* True when current term's metadata is decoded */
      boolean decoded;

      /* True when there is pending term when calling next() */
      boolean pending;

      /* stack to record how current term is constructed,
       * used to accumulate metadata or rewind term:
       *   level == term.length + 1,
       *         == 0 when term is null */
      Frame[] stack;
      int level;

      /* to which level the metadata is accumulated
       * so that we can accumulate metadata lazily */
      int metaUpto;

      /* term dict fst */
      final FST<FSTTermOutputs.TermData> fst;
      final FST.BytesReader fstReader;
      final Outputs<FSTTermOutputs.TermData> fstOutputs;

      /* query automaton to intersect with */
      final ByteRunAutomaton fsa;

      private final class Frame {
        /* fst stats */
        FST.Arc<FSTTermOutputs.TermData> fstArc;

        /* automaton stats */
        int fsaState;

        Frame() {
          this.fstArc = new FST.Arc<>();
          this.fsaState = -1;
        }

        public String toString() {
          return "arc=" + fstArc + " state=" + fsaState;
        }
      }

      IntersectTermsEnum(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
        super();
        // if (TEST) System.out.println("Enum init, startTerm=" + startTerm);
        this.fst = dict;
        this.fstReader = fst.getBytesReader();
        this.fstOutputs = dict.outputs;
        this.fsa = compiled.runAutomaton;
        this.level = -1;
        this.stack = new Frame[16];
        for (int i = 0; i < stack.length; i++) {
          this.stack[i] = new Frame();
        }

        Frame frame;
        frame = loadVirtualFrame(newFrame());
        this.level++;
        frame = loadFirstFrame(newFrame());
        pushFrame(frame);

        this.meta = null;
        this.metaUpto = 1;
        this.decoded = false;
        this.pending = false;

        if (startTerm == null) {
          pending = isAccept(topFrame());
        } else {
          doSeekCeil(startTerm);
          pending = !startTerm.equals(term) && isValid(topFrame()) && isAccept(topFrame());
        }
      }

      @Override
      public Comparator<BytesRef> getComparator() {
        return BytesRef.getUTF8SortedAsUnicodeComparator();
      }

      @Override
      void decodeMetaData() throws IOException {
        assert term != null;
        if (!decoded) {
          if (meta.bytes != null) {
            bytesReader.reset(meta.bytes, 0, meta.bytes.length);
          }
          postingsReader.decodeTerm(meta.longs, bytesReader, fieldInfo, state, true);
          decoded = true;
        }
      }

      /** Lazily accumulate meta data, when we got a accepted term */
      void loadMetaData() throws IOException {
        FST.Arc<FSTTermOutputs.TermData> last, next;
        last = stack[metaUpto].fstArc;
        while (metaUpto != level) {
          metaUpto++;
          next = stack[metaUpto].fstArc;
          next.output = fstOutputs.add(next.output, last.output);
          last = next;
        }
        if (last.isFinal()) {
          meta = fstOutputs.add(last.output, last.nextFinalOutput);
        } else {
          meta = last.output;
        }
        state.docFreq = meta.docFreq;
        state.totalTermFreq = meta.totalTermFreq;
      }

      @Override
      public SeekStatus seekCeil(BytesRef target) throws IOException {
        decoded = false;
        term = doSeekCeil(target);
        loadMetaData();
        if (term == null) {
          return SeekStatus.END;
        } else {
          return term.equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
        }
      }

      @Override
      public BytesRef next() throws IOException {
        // if (TEST) System.out.println("Enum next()");
        if (pending) {
          pending = false;
          loadMetaData();
          return term;
        }
        decoded = false;
        DFS:
        while (level > 0) {
          Frame frame = newFrame();
          if (loadExpandFrame(topFrame(), frame) != null) { // has valid target
            pushFrame(frame);
            if (isAccept(frame)) { // gotcha
              break;
            }
            continue; // check next target
          }
          frame = popFrame();
          while (level > 0) {
            if (loadNextFrame(topFrame(), frame) != null) { // has valid sibling
              pushFrame(frame);
              if (isAccept(frame)) { // gotcha
                break DFS;
              }
              continue DFS; // check next target
            }
            frame = popFrame();
          }
          return null;
        }
        loadMetaData();
        return term;
      }

      private BytesRef doSeekCeil(BytesRef target) throws IOException {
        // if (TEST) System.out.println("Enum doSeekCeil()");
        Frame frame = null;
        int label, upto = 0, limit = target.length;
        while (upto < limit) { // to target prefix, or ceil label (rewind prefix)
          frame = newFrame();
          label = target.bytes[upto] & 0xff;
          frame = loadCeilFrame(label, topFrame(), frame);
          if (frame == null || frame.fstArc.label != label) {
            break;
          }
          assert isValid(frame); // target must be fetched from automaton
          pushFrame(frame);
          upto++;
        }
        if (upto == limit) { // got target
          return term;
        }
        if (frame != null) { // got larger term('s prefix)
          pushFrame(frame);
          return isAccept(frame) ? term : next();
        }
        while (level > 0) { // got target's prefix, advance to larger term
          frame = popFrame();
          while (level > 0 && !canRewind(frame)) {
            frame = popFrame();
          }
          if (loadNextFrame(topFrame(), frame) != null) {
            pushFrame(frame);
            return isAccept(frame) ? term : next();
          }
        }
        return null;
      }

      /** Virtual frame, never pop */
      Frame loadVirtualFrame(Frame frame) throws IOException {
        frame.fstArc.output = fstOutputs.getNoOutput();
        frame.fstArc.nextFinalOutput = fstOutputs.getNoOutput();
        frame.fsaState = -1;
        return frame;
      }

      /** Load frame for start arc(node) on fst */
      Frame loadFirstFrame(Frame frame) throws IOException {
        frame.fstArc = fst.getFirstArc(frame.fstArc);
        frame.fsaState = fsa.getInitialState();
        return frame;
      }

      /** Load frame for target arc(node) on fst */
      Frame loadExpandFrame(Frame top, Frame frame) throws IOException {
        if (!canGrow(top)) {
          return null;
        }
        frame.fstArc = fst.readFirstRealTargetArc(top.fstArc.target, frame.fstArc, fstReader);
        frame.fsaState = fsa.step(top.fsaState, frame.fstArc.label);
        // if (TEST) System.out.println(" loadExpand frame="+frame);
        if (frame.fsaState == -1) {
          return loadNextFrame(top, frame);
        }
        return frame;
      }

      /** Load frame for sibling arc(node) on fst */
      Frame loadNextFrame(Frame top, Frame frame) throws IOException {
        if (!canRewind(frame)) {
          return null;
        }
        while (!frame.fstArc.isLast()) {
          frame.fstArc = fst.readNextRealArc(frame.fstArc, fstReader);
          frame.fsaState = fsa.step(top.fsaState, frame.fstArc.label);
          if (frame.fsaState != -1) {
            break;
          }
        }
        // if (TEST) System.out.println(" loadNext frame="+frame);
        if (frame.fsaState == -1) {
          return null;
        }
        return frame;
      }

      /**
       * Load frame for target arc(node) on fst, so that arc.label >= label and
       * !fsa.reject(arc.label)
       */
      Frame loadCeilFrame(int label, Frame top, Frame frame) throws IOException {
        FST.Arc<FSTTermOutputs.TermData> arc = frame.fstArc;
        arc = Util.readCeilArc(label, fst, top.fstArc, arc, fstReader);
        if (arc == null) {
          return null;
        }
        frame.fsaState = fsa.step(top.fsaState, arc.label);
        // if (TEST) System.out.println(" loadCeil frame="+frame);
        if (frame.fsaState == -1) {
          return loadNextFrame(top, frame);
        }
        return frame;
      }

      boolean isAccept(Frame frame) { // reach a term both fst&fsa accepts
        return fsa.isAccept(frame.fsaState) && frame.fstArc.isFinal();
      }

      boolean isValid(Frame frame) { // reach a prefix both fst&fsa won't reject
        return /*frame != null &&*/ frame.fsaState != -1;
      }

      boolean canGrow(Frame frame) { // can walk forward on both fst&fsa
        return frame.fsaState != -1 && FST.targetHasArcs(frame.fstArc);
      }

      boolean canRewind(Frame frame) { // can jump to sibling
        return !frame.fstArc.isLast();
      }

      void pushFrame(Frame frame) {
        term = grow(frame.fstArc.label);
        level++;
        // if (TEST) System.out.println("  term=" + term + " level=" + level);
      }

      Frame popFrame() {
        term = shrink();
        level--;
        metaUpto = metaUpto > level ? level : metaUpto;
        // if (TEST) System.out.println("  term=" + term + " level=" + level);
        return stack[level + 1];
      }

      Frame newFrame() {
        if (level + 1 == stack.length) {
          final Frame[] temp =
              new Frame[ArrayUtil.oversize(level + 2, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
          System.arraycopy(stack, 0, temp, 0, stack.length);
          for (int i = stack.length; i < temp.length; i++) {
            temp[i] = new Frame();
          }
          stack = temp;
        }
        return stack[level + 1];
      }

      Frame topFrame() {
        return stack[level];
      }

      BytesRef grow(int label) {
        if (term == null) {
          term = new BytesRef(new byte[16], 0, 0);
        } else {
          if (term.length == term.bytes.length) {
            term.grow(term.length + 1);
          }
          term.bytes[term.length++] = (byte) label;
        }
        return term;
      }

      BytesRef shrink() {
        if (term.length == 0) {
          term = null;
        } else {
          term.length--;
        }
        return term;
      }
    }
  }

  static <T> void walk(FST<T> fst) throws IOException {
    final ArrayList<FST.Arc<T>> queue = new ArrayList<>();
    final BitSet seen = new BitSet();
    final FST.BytesReader reader = fst.getBytesReader();
    final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
    queue.add(startArc);
    while (!queue.isEmpty()) {
      final FST.Arc<T> arc = queue.remove(0);
      final long node = arc.target;
      // System.out.println(arc);
      if (FST.targetHasArcs(arc) && !seen.get((int) node)) {
        seen.set((int) node);
        fst.readFirstRealTargetArc(node, arc, reader);
        while (true) {
          queue.add(new FST.Arc<T>().copyFrom(arc));
          if (arc.isLast()) {
            break;
          } else {
            fst.readNextRealArc(arc, reader);
          }
        }
      }
    }
  }

  @Override
  public long ramBytesUsed() {
    long ramBytesUsed = postingsReader.ramBytesUsed();
    for (TermsReader r : fields.values()) {
      ramBytesUsed += r.ramBytesUsed();
    }
    return ramBytesUsed;
  }

  @Override
  public void checkIntegrity() throws IOException {
    postingsReader.checkIntegrity();
  }
}
Esempio n. 20
0
/**
 * {@link DocIdSet} implementation inspired from http://roaringbitmap.org/
 *
 * <p>The space is divided into blocks of 2^16 bits and each block is encoded independently. In each
 * block, if less than 2^12 bits are set, then documents are simply stored in a short[]. If more
 * than 2^16-2^12 bits are set, then the inverse of the set is encoded in a simple short[].
 * Otherwise a {@link FixedBitSet} is used.
 *
 * @lucene.internal
 */
public class RoaringDocIdSet extends DocIdSet {

  // Number of documents in a block
  private static final int BLOCK_SIZE = 1 << 16;
  // The maximum length for an array, beyond that point we switch to a bitset
  private static final int MAX_ARRAY_LENGTH = 1 << 12;
  private static final long BASE_RAM_BYTES_USED =
      RamUsageEstimator.shallowSizeOfInstance(RoaringDocIdSet.class);

  /** A builder of {@link RoaringDocIdSet}s. */
  public static class Builder {

    private final int maxDoc;
    private final DocIdSet[] sets;

    private int cardinality;
    private int lastDocId;
    private int currentBlock;
    private int currentBlockCardinality;

    // We start by filling the buffer and when it's full we copy the content of
    // the buffer to the FixedBitSet and put further documents in that bitset
    private final short[] buffer;
    private FixedBitSet denseBuffer;

    /** Sole constructor. */
    public Builder(int maxDoc) {
      this.maxDoc = maxDoc;
      sets = new DocIdSet[(maxDoc + (1 << 16) - 1) >>> 16];
      lastDocId = -1;
      currentBlock = -1;
      buffer = new short[MAX_ARRAY_LENGTH];
    }

    private void flush() {
      assert currentBlockCardinality <= BLOCK_SIZE;
      if (currentBlockCardinality <= MAX_ARRAY_LENGTH) {
        // Use sparse encoding
        assert denseBuffer == null;
        if (currentBlockCardinality > 0) {
          sets[currentBlock] =
              new ShortArrayDocIdSet(Arrays.copyOf(buffer, currentBlockCardinality));
        }
      } else {
        assert denseBuffer != null;
        assert denseBuffer.cardinality() == currentBlockCardinality;
        if (denseBuffer.length() == BLOCK_SIZE
            && BLOCK_SIZE - currentBlockCardinality < MAX_ARRAY_LENGTH) {
          // Doc ids are very dense, inverse the encoding
          final short[] excludedDocs = new short[BLOCK_SIZE - currentBlockCardinality];
          denseBuffer.flip(0, denseBuffer.length());
          int excludedDoc = -1;
          for (int i = 0; i < excludedDocs.length; ++i) {
            excludedDoc = denseBuffer.nextSetBit(excludedDoc + 1);
            assert excludedDoc != DocIdSetIterator.NO_MORE_DOCS;
            excludedDocs[i] = (short) excludedDoc;
          }
          assert excludedDoc + 1 == denseBuffer.length()
              || denseBuffer.nextSetBit(excludedDoc + 1) == DocIdSetIterator.NO_MORE_DOCS;
          sets[currentBlock] = new NotDocIdSet(BLOCK_SIZE, new ShortArrayDocIdSet(excludedDocs));
        } else {
          // Neither sparse nor super dense, use a fixed bit set
          sets[currentBlock] = new BitDocIdSet(denseBuffer, currentBlockCardinality);
        }
        denseBuffer = null;
      }

      cardinality += currentBlockCardinality;
      denseBuffer = null;
      currentBlockCardinality = 0;
    }

    /** Add a new doc-id to this builder. NOTE: doc ids must be added in order. */
    public Builder add(int docId) {
      if (docId <= lastDocId) {
        throw new IllegalArgumentException(
            "Doc ids must be added in-order, got " + docId + " which is <= lastDocID=" + lastDocId);
      }
      final int block = docId >>> 16;
      if (block != currentBlock) {
        // we went to a different block, let's flush what we buffered and start from fresh
        flush();
        currentBlock = block;
      }

      if (currentBlockCardinality < MAX_ARRAY_LENGTH) {
        buffer[currentBlockCardinality] = (short) docId;
      } else {
        if (denseBuffer == null) {
          // the buffer is full, let's move to a fixed bit set
          final int numBits = Math.min(1 << 16, maxDoc - (block << 16));
          denseBuffer = new FixedBitSet(numBits);
          for (short doc : buffer) {
            denseBuffer.set(doc & 0xFFFF);
          }
        }
        denseBuffer.set(docId & 0xFFFF);
      }

      lastDocId = docId;
      currentBlockCardinality += 1;
      return this;
    }

    /** Add the content of the provided {@link DocIdSetIterator}. */
    public Builder add(DocIdSetIterator disi) throws IOException {
      for (int doc = disi.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = disi.nextDoc()) {
        add(doc);
      }
      return this;
    }

    /** Build an instance. */
    public RoaringDocIdSet build() {
      flush();
      return new RoaringDocIdSet(sets, cardinality);
    }
  }

  /** {@link DocIdSet} implementation that can store documents up to 2^16-1 in a short[]. */
  private static class ShortArrayDocIdSet extends DocIdSet {

    private static final long BASE_RAM_BYTES_USED =
        RamUsageEstimator.shallowSizeOfInstance(ShortArrayDocIdSet.class);

    private final short[] docIDs;

    private ShortArrayDocIdSet(short[] docIDs) {
      this.docIDs = docIDs;
    }

    @Override
    public long ramBytesUsed() {
      return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(docIDs);
    }

    @Override
    public DocIdSetIterator iterator() throws IOException {
      return new DocIdSetIterator() {

        int i = -1; // this is the index of the current document in the array
        int doc = -1;

        private int docId(int i) {
          return docIDs[i] & 0xFFFF;
        }

        @Override
        public int nextDoc() throws IOException {
          if (++i >= docIDs.length) {
            return doc = NO_MORE_DOCS;
          }
          return doc = docId(i);
        }

        @Override
        public int docID() {
          return doc;
        }

        @Override
        public long cost() {
          return docIDs.length;
        }

        @Override
        public int advance(int target) throws IOException {
          // binary search
          int lo = i + 1;
          int hi = docIDs.length - 1;
          while (lo <= hi) {
            final int mid = (lo + hi) >>> 1;
            final int midDoc = docId(mid);
            if (midDoc < target) {
              lo = mid + 1;
            } else {
              hi = mid - 1;
            }
          }
          if (lo == docIDs.length) {
            i = docIDs.length;
            return doc = NO_MORE_DOCS;
          } else {
            i = lo;
            return doc = docId(i);
          }
        }
      };
    }
  }

  private final DocIdSet[] docIdSets;
  private final int cardinality;
  private final long ramBytesUsed;

  private RoaringDocIdSet(DocIdSet[] docIdSets, int cardinality) {
    this.docIdSets = docIdSets;
    long ramBytesUsed = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(docIdSets);
    for (DocIdSet set : this.docIdSets) {
      if (set != null) {
        ramBytesUsed += set.ramBytesUsed();
      }
    }
    this.ramBytesUsed = ramBytesUsed;
    this.cardinality = cardinality;
  }

  @Override
  public long ramBytesUsed() {
    return ramBytesUsed;
  }

  @Override
  public DocIdSetIterator iterator() throws IOException {
    if (cardinality == 0) {
      return null;
    }
    return new Iterator();
  }

  private class Iterator extends DocIdSetIterator {

    int block;
    DocIdSetIterator sub = null;
    int doc;

    Iterator() throws IOException {
      doc = -1;
      block = -1;
      sub = DocIdSetIterator.empty();
    }

    @Override
    public int docID() {
      return doc;
    }

    @Override
    public int nextDoc() throws IOException {
      final int subNext = sub.nextDoc();
      if (subNext == NO_MORE_DOCS) {
        return firstDocFromNextBlock();
      }
      return doc = (block << 16) | subNext;
    }

    @Override
    public int advance(int target) throws IOException {
      final int targetBlock = target >>> 16;
      if (targetBlock != block) {
        block = targetBlock;
        if (block >= docIdSets.length) {
          sub = null;
          return doc = NO_MORE_DOCS;
        }
        if (docIdSets[block] == null) {
          return firstDocFromNextBlock();
        }
        sub = docIdSets[block].iterator();
      }
      final int subNext = sub.advance(target & 0xFFFF);
      if (subNext == NO_MORE_DOCS) {
        return firstDocFromNextBlock();
      }
      return doc = (block << 16) | subNext;
    }

    private int firstDocFromNextBlock() throws IOException {
      while (true) {
        block += 1;
        if (block >= docIdSets.length) {
          sub = null;
          return doc = NO_MORE_DOCS;
        } else if (docIdSets[block] != null) {
          sub = docIdSets[block].iterator();
          final int subNext = sub.nextDoc();
          assert subNext != NO_MORE_DOCS;
          return doc = (block << 16) | subNext;
        }
      }
    }

    @Override
    public long cost() {
      return cardinality;
    }
  }

  /** Return the exact number of documents that are contained in this set. */
  public int cardinality() {
    return cardinality;
  }

  @Override
  public String toString() {
    return "RoaringDocIdSet(cardinality=" + cardinality + ")";
  }
}
/**
 * reads plaintext stored fields
 *
 * <p><b>FOR RECREATIONAL USE ONLY</b>
 *
 * @lucene.experimental
 */
public class SimpleTextStoredFieldsReader extends StoredFieldsReader {

  private static final long BASE_RAM_BYTES_USED =
      RamUsageEstimator.shallowSizeOfInstance(SimpleTextStoredFieldsReader.class)
          + RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)
          + RamUsageEstimator.shallowSizeOfInstance(CharsRef.class);

  private long offsets[]; /* docid -> offset in .fld file */
  private IndexInput in;
  private BytesRefBuilder scratch = new BytesRefBuilder();
  private CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
  private final FieldInfos fieldInfos;

  public SimpleTextStoredFieldsReader(
      Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
    this.fieldInfos = fn;
    boolean success = false;
    try {
      in =
          directory.openInput(
              IndexFileNames.segmentFileName(
                  si.name, "", SimpleTextStoredFieldsWriter.FIELDS_EXTENSION),
              context);
      success = true;
    } finally {
      if (!success) {
        try {
          close();
        } catch (Throwable t) {
        } // ensure we throw our original exception
      }
    }
    readIndex(si.maxDoc());
  }

  // used by clone
  SimpleTextStoredFieldsReader(long offsets[], IndexInput in, FieldInfos fieldInfos) {
    this.offsets = offsets;
    this.in = in;
    this.fieldInfos = fieldInfos;
  }

  // we don't actually write a .fdx-like index, instead we read the
  // stored fields file in entirety up-front and save the offsets
  // so we can seek to the documents later.
  private void readIndex(int size) throws IOException {
    ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
    offsets = new long[size];
    int upto = 0;
    while (!scratch.get().equals(END)) {
      SimpleTextUtil.readLine(input, scratch);
      if (StringHelper.startsWith(scratch.get(), DOC)) {
        offsets[upto] = input.getFilePointer();
        upto++;
      }
    }
    SimpleTextUtil.checkFooter(input);
    assert upto == offsets.length;
  }

  @Override
  public void visitDocument(int n, StoredFieldVisitor visitor) throws IOException {
    in.seek(offsets[n]);

    while (true) {
      readLine();
      if (StringHelper.startsWith(scratch.get(), FIELD) == false) {
        break;
      }
      int fieldNumber = parseIntAt(FIELD.length);
      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
      readLine();
      assert StringHelper.startsWith(scratch.get(), NAME);
      readLine();
      assert StringHelper.startsWith(scratch.get(), TYPE);

      final BytesRef type;
      if (equalsAt(TYPE_STRING, scratch.get(), TYPE.length)) {
        type = TYPE_STRING;
      } else if (equalsAt(TYPE_BINARY, scratch.get(), TYPE.length)) {
        type = TYPE_BINARY;
      } else if (equalsAt(TYPE_INT, scratch.get(), TYPE.length)) {
        type = TYPE_INT;
      } else if (equalsAt(TYPE_LONG, scratch.get(), TYPE.length)) {
        type = TYPE_LONG;
      } else if (equalsAt(TYPE_FLOAT, scratch.get(), TYPE.length)) {
        type = TYPE_FLOAT;
      } else if (equalsAt(TYPE_DOUBLE, scratch.get(), TYPE.length)) {
        type = TYPE_DOUBLE;
      } else {
        throw new RuntimeException("unknown field type");
      }

      switch (visitor.needsField(fieldInfo)) {
        case YES:
          readField(type, fieldInfo, visitor);
          break;
        case NO:
          readLine();
          assert StringHelper.startsWith(scratch.get(), VALUE);
          break;
        case STOP:
          return;
      }
    }
  }

  private void readField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor)
      throws IOException {
    readLine();
    assert StringHelper.startsWith(scratch.get(), VALUE);
    if (type == TYPE_STRING) {
      byte[] bytes = new byte[scratch.length() - VALUE.length];
      System.arraycopy(scratch.bytes(), VALUE.length, bytes, 0, bytes.length);
      visitor.stringField(fieldInfo, bytes);
    } else if (type == TYPE_BINARY) {
      byte[] copy = new byte[scratch.length() - VALUE.length];
      System.arraycopy(scratch.bytes(), VALUE.length, copy, 0, copy.length);
      visitor.binaryField(fieldInfo, copy);
    } else if (type == TYPE_INT) {
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), VALUE.length, scratch.length() - VALUE.length);
      visitor.intField(fieldInfo, Integer.parseInt(scratchUTF16.toString()));
    } else if (type == TYPE_LONG) {
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), VALUE.length, scratch.length() - VALUE.length);
      visitor.longField(fieldInfo, Long.parseLong(scratchUTF16.toString()));
    } else if (type == TYPE_FLOAT) {
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), VALUE.length, scratch.length() - VALUE.length);
      visitor.floatField(fieldInfo, Float.parseFloat(scratchUTF16.toString()));
    } else if (type == TYPE_DOUBLE) {
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), VALUE.length, scratch.length() - VALUE.length);
      visitor.doubleField(fieldInfo, Double.parseDouble(scratchUTF16.toString()));
    }
  }

  @Override
  public StoredFieldsReader clone() {
    if (in == null) {
      throw new AlreadyClosedException("this FieldsReader is closed");
    }
    return new SimpleTextStoredFieldsReader(offsets, in.clone(), fieldInfos);
  }

  @Override
  public void close() throws IOException {
    try {
      IOUtils.close(in);
    } finally {
      in = null;
      offsets = null;
    }
  }

  private void readLine() throws IOException {
    SimpleTextUtil.readLine(in, scratch);
  }

  private int parseIntAt(int offset) {
    scratchUTF16.copyUTF8Bytes(scratch.bytes(), offset, scratch.length() - offset);
    return ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
  }

  private boolean equalsAt(BytesRef a, BytesRef b, int bOffset) {
    return a.length == b.length - bOffset
        && ArrayUtil.equals(a.bytes, a.offset, b.bytes, b.offset + bOffset, b.length - bOffset);
  }

  @Override
  public long ramBytesUsed() {
    return BASE_RAM_BYTES_USED
        + RamUsageEstimator.sizeOf(offsets)
        + RamUsageEstimator.sizeOf(scratch.bytes())
        + RamUsageEstimator.sizeOf(scratchUTF16.chars());
  }

  @Override
  public String toString() {
    return getClass().getSimpleName();
  }

  @Override
  public void checkIntegrity() throws IOException {}
}
Esempio n. 22
0
 @Override
 public long ramBytesUsed() {
   return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(docIDs);
 }
/**
 * Reads plain-text term vectors.
 *
 * <p><b>FOR RECREATIONAL USE ONLY</b>
 *
 * @lucene.experimental
 */
public class SimpleTextTermVectorsReader extends TermVectorsReader {

  private static final long BASE_RAM_BYTES_USED =
      RamUsageEstimator.shallowSizeOfInstance(SimpleTextTermVectorsReader.class)
          + RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)
          + RamUsageEstimator.shallowSizeOfInstance(CharsRef.class);

  private long offsets[]; /* docid -> offset in .vec file */
  private IndexInput in;
  private BytesRefBuilder scratch = new BytesRefBuilder();
  private CharsRefBuilder scratchUTF16 = new CharsRefBuilder();

  public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, IOContext context)
      throws IOException {
    boolean success = false;
    try {
      in =
          directory.openInput(
              IndexFileNames.segmentFileName(si.name, "", VECTORS_EXTENSION), context);
      success = true;
    } finally {
      if (!success) {
        try {
          close();
        } catch (Throwable t) {
        } // ensure we throw our original exception
      }
    }
    readIndex(si.maxDoc());
  }

  // used by clone
  SimpleTextTermVectorsReader(long offsets[], IndexInput in) {
    this.offsets = offsets;
    this.in = in;
  }

  // we don't actually write a .tvx-like index, instead we read the
  // vectors file in entirety up-front and save the offsets
  // so we can seek to the data later.
  private void readIndex(int maxDoc) throws IOException {
    ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
    offsets = new long[maxDoc];
    int upto = 0;
    while (!scratch.get().equals(END)) {
      SimpleTextUtil.readLine(input, scratch);
      if (StringHelper.startsWith(scratch.get(), DOC)) {
        offsets[upto] = input.getFilePointer();
        upto++;
      }
    }
    SimpleTextUtil.checkFooter(input);
    assert upto == offsets.length;
  }

  @Override
  public Fields get(int doc) throws IOException {
    SortedMap<String, SimpleTVTerms> fields = new TreeMap<>();
    in.seek(offsets[doc]);
    readLine();
    assert StringHelper.startsWith(scratch.get(), NUMFIELDS);
    int numFields = parseIntAt(NUMFIELDS.length);
    if (numFields == 0) {
      return null; // no vectors for this doc
    }
    for (int i = 0; i < numFields; i++) {
      readLine();
      assert StringHelper.startsWith(scratch.get(), FIELD);
      // skip fieldNumber:
      parseIntAt(FIELD.length);

      readLine();
      assert StringHelper.startsWith(scratch.get(), FIELDNAME);
      String fieldName = readString(FIELDNAME.length, scratch);

      readLine();
      assert StringHelper.startsWith(scratch.get(), FIELDPOSITIONS);
      boolean positions = Boolean.parseBoolean(readString(FIELDPOSITIONS.length, scratch));

      readLine();
      assert StringHelper.startsWith(scratch.get(), FIELDOFFSETS);
      boolean offsets = Boolean.parseBoolean(readString(FIELDOFFSETS.length, scratch));

      readLine();
      assert StringHelper.startsWith(scratch.get(), FIELDPAYLOADS);
      boolean payloads = Boolean.parseBoolean(readString(FIELDPAYLOADS.length, scratch));

      readLine();
      assert StringHelper.startsWith(scratch.get(), FIELDTERMCOUNT);
      int termCount = parseIntAt(FIELDTERMCOUNT.length);

      SimpleTVTerms terms = new SimpleTVTerms(offsets, positions, payloads);
      fields.put(fieldName, terms);

      BytesRefBuilder term = new BytesRefBuilder();
      for (int j = 0; j < termCount; j++) {
        readLine();
        assert StringHelper.startsWith(scratch.get(), TERMTEXT);
        int termLength = scratch.length() - TERMTEXT.length;
        term.grow(termLength);
        term.setLength(termLength);
        System.arraycopy(scratch.bytes(), TERMTEXT.length, term.bytes(), 0, termLength);

        SimpleTVPostings postings = new SimpleTVPostings();
        terms.terms.put(term.toBytesRef(), postings);

        readLine();
        assert StringHelper.startsWith(scratch.get(), TERMFREQ);
        postings.freq = parseIntAt(TERMFREQ.length);

        if (positions || offsets) {
          if (positions) {
            postings.positions = new int[postings.freq];
            if (payloads) {
              postings.payloads = new BytesRef[postings.freq];
            }
          }

          if (offsets) {
            postings.startOffsets = new int[postings.freq];
            postings.endOffsets = new int[postings.freq];
          }

          for (int k = 0; k < postings.freq; k++) {
            if (positions) {
              readLine();
              assert StringHelper.startsWith(scratch.get(), POSITION);
              postings.positions[k] = parseIntAt(POSITION.length);
              if (payloads) {
                readLine();
                assert StringHelper.startsWith(scratch.get(), PAYLOAD);
                if (scratch.length() - PAYLOAD.length == 0) {
                  postings.payloads[k] = null;
                } else {
                  byte payloadBytes[] = new byte[scratch.length() - PAYLOAD.length];
                  System.arraycopy(
                      scratch.bytes(), PAYLOAD.length, payloadBytes, 0, payloadBytes.length);
                  postings.payloads[k] = new BytesRef(payloadBytes);
                }
              }
            }

            if (offsets) {
              readLine();
              assert StringHelper.startsWith(scratch.get(), STARTOFFSET);
              postings.startOffsets[k] = parseIntAt(STARTOFFSET.length);

              readLine();
              assert StringHelper.startsWith(scratch.get(), ENDOFFSET);
              postings.endOffsets[k] = parseIntAt(ENDOFFSET.length);
            }
          }
        }
      }
    }
    return new SimpleTVFields(fields);
  }

  @Override
  public TermVectorsReader clone() {
    if (in == null) {
      throw new AlreadyClosedException("this TermVectorsReader is closed");
    }
    return new SimpleTextTermVectorsReader(offsets, in.clone());
  }

  @Override
  public void close() throws IOException {
    try {
      IOUtils.close(in);
    } finally {
      in = null;
      offsets = null;
    }
  }

  private void readLine() throws IOException {
    SimpleTextUtil.readLine(in, scratch);
  }

  private int parseIntAt(int offset) {
    scratchUTF16.copyUTF8Bytes(scratch.bytes(), offset, scratch.length() - offset);
    return ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
  }

  private String readString(int offset, BytesRefBuilder scratch) {
    scratchUTF16.copyUTF8Bytes(scratch.bytes(), offset, scratch.length() - offset);
    return scratchUTF16.toString();
  }

  private class SimpleTVFields extends Fields {
    private final SortedMap<String, SimpleTVTerms> fields;

    SimpleTVFields(SortedMap<String, SimpleTVTerms> fields) {
      this.fields = fields;
    }

    @Override
    public Iterator<String> iterator() {
      return Collections.unmodifiableSet(fields.keySet()).iterator();
    }

    @Override
    public Terms terms(String field) throws IOException {
      return fields.get(field);
    }

    @Override
    public int size() {
      return fields.size();
    }
  }

  private static class SimpleTVTerms extends Terms {
    final SortedMap<BytesRef, SimpleTVPostings> terms;
    final boolean hasOffsets;
    final boolean hasPositions;
    final boolean hasPayloads;

    SimpleTVTerms(boolean hasOffsets, boolean hasPositions, boolean hasPayloads) {
      this.hasOffsets = hasOffsets;
      this.hasPositions = hasPositions;
      this.hasPayloads = hasPayloads;
      terms = new TreeMap<>();
    }

    @Override
    public TermsEnum iterator() throws IOException {
      // TODO: reuse
      return new SimpleTVTermsEnum(terms);
    }

    @Override
    public long size() throws IOException {
      return terms.size();
    }

    @Override
    public long getSumTotalTermFreq() throws IOException {
      return -1;
    }

    @Override
    public long getSumDocFreq() throws IOException {
      return terms.size();
    }

    @Override
    public int getDocCount() throws IOException {
      return 1;
    }

    @Override
    public boolean hasFreqs() {
      return true;
    }

    @Override
    public boolean hasOffsets() {
      return hasOffsets;
    }

    @Override
    public boolean hasPositions() {
      return hasPositions;
    }

    @Override
    public boolean hasPayloads() {
      return hasPayloads;
    }
  }

  private static class SimpleTVPostings {
    private int freq;
    private int positions[];
    private int startOffsets[];
    private int endOffsets[];
    private BytesRef payloads[];
  }

  private static class SimpleTVTermsEnum extends TermsEnum {
    SortedMap<BytesRef, SimpleTVPostings> terms;
    Iterator<Map.Entry<BytesRef, SimpleTextTermVectorsReader.SimpleTVPostings>> iterator;
    Map.Entry<BytesRef, SimpleTextTermVectorsReader.SimpleTVPostings> current;

    SimpleTVTermsEnum(SortedMap<BytesRef, SimpleTVPostings> terms) {
      this.terms = terms;
      this.iterator = terms.entrySet().iterator();
    }

    @Override
    public SeekStatus seekCeil(BytesRef text) throws IOException {
      iterator = terms.tailMap(text).entrySet().iterator();
      if (!iterator.hasNext()) {
        return SeekStatus.END;
      } else {
        return next().equals(text) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
      }
    }

    @Override
    public void seekExact(long ord) throws IOException {
      throw new UnsupportedOperationException();
    }

    @Override
    public BytesRef next() throws IOException {
      if (!iterator.hasNext()) {
        return null;
      } else {
        current = iterator.next();
        return current.getKey();
      }
    }

    @Override
    public BytesRef term() throws IOException {
      return current.getKey();
    }

    @Override
    public long ord() throws IOException {
      throw new UnsupportedOperationException();
    }

    @Override
    public int docFreq() throws IOException {
      return 1;
    }

    @Override
    public long totalTermFreq() throws IOException {
      return current.getValue().freq;
    }

    @Override
    public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {

      if (PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) {
        SimpleTVPostings postings = current.getValue();
        if (postings.positions != null || postings.startOffsets != null) {
          // TODO: reuse
          SimpleTVPostingsEnum e = new SimpleTVPostingsEnum();
          e.reset(
              postings.positions, postings.startOffsets, postings.endOffsets, postings.payloads);
          return e;
        }
      }

      // TODO: reuse
      SimpleTVDocsEnum e = new SimpleTVDocsEnum();
      e.reset(
          PostingsEnum.featureRequested(flags, PostingsEnum.FREQS) == false
              ? 1
              : current.getValue().freq);
      return e;
    }
  }

  // note: these two enum classes are exactly like the Default impl...
  private static class SimpleTVDocsEnum extends PostingsEnum {
    private boolean didNext;
    private int doc = -1;
    private int freq;

    @Override
    public int freq() throws IOException {
      assert freq != -1;
      return freq;
    }

    @Override
    public int nextPosition() throws IOException {
      return -1;
    }

    @Override
    public int startOffset() throws IOException {
      return -1;
    }

    @Override
    public int endOffset() throws IOException {
      return -1;
    }

    @Override
    public BytesRef getPayload() throws IOException {
      return null;
    }

    @Override
    public int docID() {
      return doc;
    }

    @Override
    public int nextDoc() {
      if (!didNext) {
        didNext = true;
        return (doc = 0);
      } else {
        return (doc = NO_MORE_DOCS);
      }
    }

    @Override
    public int advance(int target) throws IOException {
      return slowAdvance(target);
    }

    public void reset(int freq) {
      this.freq = freq;
      this.doc = -1;
      didNext = false;
    }

    @Override
    public long cost() {
      return 1;
    }
  }

  private static class SimpleTVPostingsEnum extends PostingsEnum {
    private boolean didNext;
    private int doc = -1;
    private int nextPos;
    private int[] positions;
    private BytesRef[] payloads;
    private int[] startOffsets;
    private int[] endOffsets;

    @Override
    public int freq() throws IOException {
      if (positions != null) {
        return positions.length;
      } else {
        assert startOffsets != null;
        return startOffsets.length;
      }
    }

    @Override
    public int docID() {
      return doc;
    }

    @Override
    public int nextDoc() {
      if (!didNext) {
        didNext = true;
        return (doc = 0);
      } else {
        return (doc = NO_MORE_DOCS);
      }
    }

    @Override
    public int advance(int target) throws IOException {
      return slowAdvance(target);
    }

    public void reset(int[] positions, int[] startOffsets, int[] endOffsets, BytesRef payloads[]) {
      this.positions = positions;
      this.startOffsets = startOffsets;
      this.endOffsets = endOffsets;
      this.payloads = payloads;
      this.doc = -1;
      didNext = false;
      nextPos = 0;
    }

    @Override
    public BytesRef getPayload() {
      return payloads == null ? null : payloads[nextPos - 1];
    }

    @Override
    public int nextPosition() {
      if (positions != null) {
        assert nextPos < positions.length;
        return positions[nextPos++];
      } else {
        assert nextPos < startOffsets.length;
        nextPos++;
        return -1;
      }
    }

    @Override
    public int startOffset() {
      if (startOffsets == null) {
        return -1;
      } else {
        return startOffsets[nextPos - 1];
      }
    }

    @Override
    public int endOffset() {
      if (endOffsets == null) {
        return -1;
      } else {
        return endOffsets[nextPos - 1];
      }
    }

    @Override
    public long cost() {
      return 1;
    }
  }

  @Override
  public long ramBytesUsed() {
    return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(offsets);
  }

  @Override
  public String toString() {
    return getClass().getSimpleName();
  }

  @Override
  public void checkIntegrity() throws IOException {}
}
Esempio n. 24
0
public class LRUCache<K, V> extends SolrCacheBase implements SolrCache<K, V>, Accountable {
  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

  static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(LRUCache.class);

  ///  Copied from Lucene's LRUQueryCache

  // memory usage of a simple term query
  public static final long DEFAULT_RAM_BYTES_USED = 192;

  public static final long HASHTABLE_RAM_BYTES_PER_ENTRY =
      2
          * RamUsageEstimator.NUM_BYTES_OBJECT_REF // key + value
          * 2; // hash tables need to be oversized to avoid collisions, assume 2x capacity

  static final long LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY =
      HASHTABLE_RAM_BYTES_PER_ENTRY
          + 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // previous & next references
  /// End copied code

  /* An instance of this class will be shared across multiple instances
   * of an LRUCache at the same time.  Make sure everything is thread safe.
   */
  private static class CumulativeStats {
    LongAdder lookups = new LongAdder();
    LongAdder hits = new LongAdder();
    LongAdder inserts = new LongAdder();
    LongAdder evictions = new LongAdder();
    LongAdder evictionsRamUsage = new LongAdder();
  }

  private CumulativeStats stats;

  // per instance stats.  The synchronization used for the map will also be
  // used for updating these statistics (and hence they are not AtomicLongs
  private long lookups;
  private long hits;
  private long inserts;
  private long evictions;
  private long evictionsRamUsage;

  private long warmupTime = 0;

  private Map<K, V> map;
  private String description = "LRU Cache";

  private long maxRamBytes = Long.MAX_VALUE;
  // The synchronization used for the map will be used to update this,
  // hence not an AtomicLong
  private long ramBytesUsed = 0;

  @Override
  public Object init(Map args, Object persistence, CacheRegenerator regenerator) {
    super.init(args, regenerator);
    String str = (String) args.get("size");
    final int limit = str == null ? 1024 : Integer.parseInt(str);
    str = (String) args.get("initialSize");
    final int initialSize = Math.min(str == null ? 1024 : Integer.parseInt(str), limit);
    str = (String) args.get("maxRamMB");
    final long maxRamBytes =
        this.maxRamBytes =
            str == null ? Long.MAX_VALUE : (long) (Double.parseDouble(str) * 1024L * 1024L);
    description = generateDescription(limit, initialSize);

    map =
        new LinkedHashMap<K, V>(initialSize, 0.75f, true) {
          @Override
          protected boolean removeEldestEntry(Map.Entry eldest) {
            if (size() > limit || ramBytesUsed > maxRamBytes) {
              if (maxRamBytes != Long.MAX_VALUE && ramBytesUsed > maxRamBytes) {
                long bytesToDecrement = 0;

                Iterator<Map.Entry<K, V>> iterator = entrySet().iterator();
                do {
                  Map.Entry<K, V> entry = iterator.next();
                  if (entry.getKey() != null) {
                    if (entry.getKey() instanceof Accountable) {
                      bytesToDecrement += ((Accountable) entry.getKey()).ramBytesUsed();
                    } else {
                      bytesToDecrement += DEFAULT_RAM_BYTES_USED;
                    }
                  }
                  if (entry.getValue() != null) {
                    bytesToDecrement += ((Accountable) entry.getValue()).ramBytesUsed();
                  }
                  bytesToDecrement += LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY;
                  ramBytesUsed -= bytesToDecrement;
                  iterator.remove();
                  evictions++;
                  evictionsRamUsage++;
                  stats.evictions.increment();
                  stats.evictionsRamUsage.increment();
                } while (iterator.hasNext() && ramBytesUsed > maxRamBytes);
                // must return false according to javadocs of removeEldestEntry if we're modifying
                // the map ourselves
                return false;
              } else {
                // increment evictions regardless of state.
                // this doesn't need to be synchronized because it will
                // only be called in the context of a higher level synchronized block.
                evictions++;
                stats.evictions.increment();
                return true;
              }
            }
            return false;
          }
        };

    if (persistence == null) {
      // must be the first time a cache of this type is being created
      persistence = new CumulativeStats();
    }

    stats = (CumulativeStats) persistence;

    return persistence;
  }

  /** @return Returns the description of this cache. */
  private String generateDescription(int limit, int initialSize) {
    String description = "LRU Cache(maxSize=" + limit + ", initialSize=" + initialSize;
    if (isAutowarmingOn()) {
      description += ", " + getAutowarmDescription();
    }
    if (maxRamBytes != Long.MAX_VALUE) {
      description += ", maxRamMB=" + (maxRamBytes / 1024L / 1024L);
    }
    description += ')';
    return description;
  }

  @Override
  public int size() {
    synchronized (map) {
      return map.size();
    }
  }

  @Override
  public V put(K key, V value) {
    synchronized (map) {
      if (getState() == State.LIVE) {
        stats.inserts.increment();
      }

      // increment local inserts regardless of state???
      // it does make it more consistent with the current size...
      inserts++;

      // important to calc and add new ram bytes first so that removeEldestEntry can compare
      // correctly
      long keySize = DEFAULT_RAM_BYTES_USED;
      if (maxRamBytes != Long.MAX_VALUE) {
        if (key != null && key instanceof Accountable) {
          keySize = ((Accountable) key).ramBytesUsed();
        }
        long valueSize = 0;
        if (value != null) {
          if (value instanceof Accountable) {
            Accountable accountable = (Accountable) value;
            valueSize = accountable.ramBytesUsed();
          } else {
            throw new SolrException(
                SolrException.ErrorCode.SERVER_ERROR,
                "Cache: "
                    + getName()
                    + " is configured with maxRamBytes="
                    + RamUsageEstimator.humanReadableUnits(maxRamBytes)
                    + " but its values do not implement org.apache.lucene.util.Accountable");
          }
        }
        ramBytesUsed += keySize + valueSize + LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY;
      }
      V old = map.put(key, value);
      if (maxRamBytes != Long.MAX_VALUE && old != null) {
        long bytesToDecrement = ((Accountable) old).ramBytesUsed();
        // the key existed in the map but we added its size before the put, so let's back out
        bytesToDecrement += LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY;
        if (key != null) {
          if (key instanceof Accountable) {
            Accountable aKey = (Accountable) key;
            bytesToDecrement += aKey.ramBytesUsed();
          } else {
            bytesToDecrement += DEFAULT_RAM_BYTES_USED;
          }
        }
        ramBytesUsed -= bytesToDecrement;
      }
      return old;
    }
  }

  @Override
  public V get(K key) {
    synchronized (map) {
      V val = map.get(key);
      if (getState() == State.LIVE) {
        // only increment lookups and hits if we are live.
        lookups++;
        stats.lookups.increment();
        if (val != null) {
          hits++;
          stats.hits.increment();
        }
      }
      return val;
    }
  }

  @Override
  public void clear() {
    synchronized (map) {
      map.clear();
      ramBytesUsed = 0;
    }
  }

  @Override
  public void warm(SolrIndexSearcher searcher, SolrCache<K, V> old) {
    if (regenerator == null) return;
    long warmingStartTime = System.nanoTime();
    LRUCache<K, V> other = (LRUCache<K, V>) old;

    // warm entries
    if (isAutowarmingOn()) {
      Object[] keys, vals = null;

      // Don't do the autowarming in the synchronized block, just pull out the keys and values.
      synchronized (other.map) {
        int sz = autowarm.getWarmCount(other.map.size());

        keys = new Object[sz];
        vals = new Object[sz];

        Iterator<Map.Entry<K, V>> iter = other.map.entrySet().iterator();

        // iteration goes from oldest (least recently used) to most recently used,
        // so we need to skip over the oldest entries.
        int skip = other.map.size() - sz;
        for (int i = 0; i < skip; i++) iter.next();

        for (int i = 0; i < sz; i++) {
          Map.Entry<K, V> entry = iter.next();
          keys[i] = entry.getKey();
          vals[i] = entry.getValue();
        }
      }

      // autowarm from the oldest to the newest entries so that the ordering will be
      // correct in the new cache.
      for (int i = 0; i < keys.length; i++) {
        try {
          boolean continueRegen = regenerator.regenerateItem(searcher, this, old, keys[i], vals[i]);
          if (!continueRegen) break;
        } catch (Exception e) {
          SolrException.log(log, "Error during auto-warming of key:" + keys[i], e);
        }
      }
    }

    warmupTime =
        TimeUnit.MILLISECONDS.convert(System.nanoTime() - warmingStartTime, TimeUnit.NANOSECONDS);
  }

  @Override
  public void close() {}

  //////////////////////// SolrInfoMBeans methods //////////////////////

  @Override
  public String getName() {
    return LRUCache.class.getName();
  }

  @Override
  public String getDescription() {
    return description;
  }

  @Override
  public String getSource() {
    return null;
  }

  @Override
  public NamedList getStatistics() {
    NamedList lst = new SimpleOrderedMap();
    synchronized (map) {
      lst.add("lookups", lookups);
      lst.add("hits", hits);
      lst.add("hitratio", calcHitRatio(lookups, hits));
      lst.add("inserts", inserts);
      lst.add("evictions", evictions);
      lst.add("size", map.size());
      if (maxRamBytes != Long.MAX_VALUE) {
        lst.add("maxRamMB", maxRamBytes / 1024L / 1024L);
        lst.add("ramBytesUsed", ramBytesUsed());
        lst.add("evictionsRamUsage", evictionsRamUsage);
      }
    }
    lst.add("warmupTime", warmupTime);

    long clookups = stats.lookups.longValue();
    long chits = stats.hits.longValue();
    lst.add("cumulative_lookups", clookups);
    lst.add("cumulative_hits", chits);
    lst.add("cumulative_hitratio", calcHitRatio(clookups, chits));
    lst.add("cumulative_inserts", stats.inserts.longValue());
    lst.add("cumulative_evictions", stats.evictions.longValue());
    if (maxRamBytes != Long.MAX_VALUE) {
      lst.add("cumulative_evictionsRamUsage", stats.evictionsRamUsage.longValue());
    }

    return lst;
  }

  @Override
  public String toString() {
    return name() + getStatistics().toString();
  }

  @Override
  public long ramBytesUsed() {
    synchronized (map) {
      return BASE_RAM_BYTES_USED + ramBytesUsed;
    }
  }

  @Override
  public Collection<Accountable> getChildResources() {
    if (maxRamBytes != Long.MAX_VALUE) {
      synchronized (map) {
        return Accountables.namedAccountables(getName(), (Map<?, ? extends Accountable>) map);
      }
    } else {
      return Collections.emptyList();
    }
  }
}
 /** Returns byte size of the underlying TST */
 @Override
 public long sizeInBytes() {
   return RamUsageEstimator.sizeOf(autocomplete);
 }
Esempio n. 26
0
  /** {@link DocIdSet} implementation that can store documents up to 2^16-1 in a short[]. */
  private static class ShortArrayDocIdSet extends DocIdSet {

    private static final long BASE_RAM_BYTES_USED =
        RamUsageEstimator.shallowSizeOfInstance(ShortArrayDocIdSet.class);

    private final short[] docIDs;

    private ShortArrayDocIdSet(short[] docIDs) {
      this.docIDs = docIDs;
    }

    @Override
    public long ramBytesUsed() {
      return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(docIDs);
    }

    @Override
    public DocIdSetIterator iterator() throws IOException {
      return new DocIdSetIterator() {

        int i = -1; // this is the index of the current document in the array
        int doc = -1;

        private int docId(int i) {
          return docIDs[i] & 0xFFFF;
        }

        @Override
        public int nextDoc() throws IOException {
          if (++i >= docIDs.length) {
            return doc = NO_MORE_DOCS;
          }
          return doc = docId(i);
        }

        @Override
        public int docID() {
          return doc;
        }

        @Override
        public long cost() {
          return docIDs.length;
        }

        @Override
        public int advance(int target) throws IOException {
          // binary search
          int lo = i + 1;
          int hi = docIDs.length - 1;
          while (lo <= hi) {
            final int mid = (lo + hi) >>> 1;
            final int midDoc = docId(mid);
            if (midDoc < target) {
              lo = mid + 1;
            } else {
              hi = mid - 1;
            }
          }
          if (lo == docIDs.length) {
            i = docIDs.length;
            return doc = NO_MORE_DOCS;
          } else {
            i = lo;
            return doc = docId(i);
          }
        }
      };
    }
  }