@Override
  public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
    PostingsReaderBase postingsReader =
        new SepPostingsReader(
            state.dir,
            state.segmentInfo,
            state.context,
            new MockIntFactory(blockSize),
            state.segmentSuffix);

    TermsIndexReaderBase indexReader;
    boolean success = false;
    try {
      indexReader =
          new FixedGapTermsIndexReader(
              state.dir,
              state.fieldInfos,
              state.segmentInfo.name,
              state.termsIndexDivisor,
              BytesRef.getUTF8SortedAsUnicodeComparator(),
              state.segmentSuffix,
              IOContext.DEFAULT);
      success = true;
    } finally {
      if (!success) {
        postingsReader.close();
      }
    }

    success = false;
    try {
      FieldsProducer ret =
          new BlockTermsReader(
              indexReader,
              state.dir,
              state.fieldInfos,
              state.segmentInfo.name,
              postingsReader,
              state.context,
              1024,
              state.segmentSuffix);
      success = true;
      return ret;
    } finally {
      if (!success) {
        try {
          postingsReader.close();
        } finally {
          indexReader.close();
        }
      }
    }
  }
 @Override
 public long ramBytesUsed() {
   long ramBytesUsed = postingsReader.ramBytesUsed();
   for (TermsReader r : fields.values()) {
     ramBytesUsed += r.ramBytesUsed();
   }
   return ramBytesUsed;
 }
  @Override
  public void checkIntegrity() throws IOException {
    // term dictionary
    CodecUtil.checksumEntireFile(termsIn);

    // postings
    postingsReader.checkIntegrity();
  }
 @Override
 public long ramBytesUsed() {
   long sizeInBytes = postingsReader.ramBytesUsed();
   for (FieldReader reader : fields.values()) {
     sizeInBytes += reader.ramBytesUsed();
   }
   return sizeInBytes;
 }
Beispiel #5
0
  @Override
  public void checkIntegrity() throws IOException {
    // verify terms
    CodecUtil.checksumEntireFile(in);

    // verify postings
    postingsReader.checkIntegrity();
  }
Beispiel #6
0
 @Override
 public long ramBytesUsed() {
   long ramBytesUsed = BASE_RAM_BYTES_USED;
   ramBytesUsed += (postingsReader != null) ? postingsReader.ramBytesUsed() : 0;
   ramBytesUsed += (indexReader != null) ? indexReader.ramBytesUsed() : 0;
   ramBytesUsed += fields.size() * 2L * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
   for (FieldReader reader : fields.values()) {
     ramBytesUsed += reader.ramBytesUsed();
   }
   return ramBytesUsed;
 }
Beispiel #7
0
 @Override
 public void close() throws IOException {
   try {
     try {
       if (indexReader != null) {
         indexReader.close();
       }
     } finally {
       // null so if an app hangs on to us (ie, we are not
       // GCable, despite being closed) we still free most
       // ram
       indexReader = null;
       if (in != null) {
         in.close();
       }
     }
   } finally {
     if (postingsReader != null) {
       postingsReader.close();
     }
   }
 }
  @Override
  public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {

    final String seedFileName =
        IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT);
    final IndexInput in = state.dir.openInput(seedFileName, state.context);
    final long seed = in.readLong();
    if (LuceneTestCase.VERBOSE) {
      System.out.println(
          "MockRandomCodec: reading from seg="
              + state.segmentInfo.name
              + " formatID="
              + state.segmentSuffix
              + " seed="
              + seed);
    }
    in.close();

    final Random random = new Random(seed);

    int readBufferSize = _TestUtil.nextInt(random, 1, 4096);
    if (LuceneTestCase.VERBOSE) {
      System.out.println("MockRandomCodec: readBufferSize=" + readBufferSize);
    }

    PostingsReaderBase postingsReader;

    if (random.nextBoolean()) {
      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: reading Sep postings");
      }
      postingsReader =
          new SepPostingsReader(
              state.dir,
              state.segmentInfo,
              state.context,
              new MockIntStreamFactory(random),
              state.segmentSuffix);
    } else {
      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: reading Standard postings");
      }
      postingsReader =
          new Lucene40PostingsReader(
              state.dir, state.segmentInfo, state.context, state.segmentSuffix);
    }

    if (random.nextBoolean()) {
      final int totTFCutoff = _TestUtil.nextInt(random, 1, 20);
      if (LuceneTestCase.VERBOSE) {
        System.out.println(
            "MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff);
      }
      postingsReader = new PulsingPostingsReader(postingsReader);
    }

    final FieldsProducer fields;

    if (random.nextBoolean()) {
      // Use BlockTree terms dict
      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: reading BlockTree terms dict");
      }

      boolean success = false;
      try {
        fields =
            new BlockTreeTermsReader(
                state.dir,
                state.fieldInfos,
                state.segmentInfo.name,
                postingsReader,
                state.context,
                state.segmentSuffix,
                state.termsIndexDivisor);
        success = true;
      } finally {
        if (!success) {
          postingsReader.close();
        }
      }
    } else {

      if (LuceneTestCase.VERBOSE) {
        System.out.println("MockRandomCodec: reading Block terms dict");
      }
      final TermsIndexReaderBase indexReader;
      boolean success = false;
      try {
        final boolean doFixedGap = random.nextBoolean();

        // randomness diverges from writer, here:
        if (state.termsIndexDivisor != -1) {
          state.termsIndexDivisor = _TestUtil.nextInt(random, 1, 10);
        }

        if (doFixedGap) {
          // if termsIndexDivisor is set to -1, we should not touch it. It means a
          // test explicitly instructed not to load the terms index.
          if (LuceneTestCase.VERBOSE) {
            System.out.println(
                "MockRandomCodec: fixed-gap terms index (divisor=" + state.termsIndexDivisor + ")");
          }
          indexReader =
              new FixedGapTermsIndexReader(
                  state.dir,
                  state.fieldInfos,
                  state.segmentInfo.name,
                  state.termsIndexDivisor,
                  BytesRef.getUTF8SortedAsUnicodeComparator(),
                  state.segmentSuffix,
                  state.context);
        } else {
          final int n2 = random.nextInt(3);
          if (n2 == 1) {
            random.nextInt();
          } else if (n2 == 2) {
            random.nextLong();
          }
          if (LuceneTestCase.VERBOSE) {
            System.out.println(
                "MockRandomCodec: variable-gap terms index (divisor="
                    + state.termsIndexDivisor
                    + ")");
          }
          indexReader =
              new VariableGapTermsIndexReader(
                  state.dir,
                  state.fieldInfos,
                  state.segmentInfo.name,
                  state.termsIndexDivisor,
                  state.segmentSuffix,
                  state.context);
        }

        success = true;
      } finally {
        if (!success) {
          postingsReader.close();
        }
      }

      final int termsCacheSize = _TestUtil.nextInt(random, 1, 1024);

      success = false;
      try {
        fields =
            new BlockTermsReader(
                indexReader,
                state.dir,
                state.fieldInfos,
                state.segmentInfo.name,
                postingsReader,
                state.context,
                termsCacheSize,
                state.segmentSuffix);
        success = true;
      } finally {
        if (!success) {
          try {
            postingsReader.close();
          } finally {
            indexReader.close();
          }
        }
      }
    }

    return fields;
  }
 @Override
 public void checkIntegrity() throws IOException {
   postingsReader.checkIntegrity();
 }
  /** Sole constructor. */
  public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state)
      throws IOException {
    boolean success = false;
    IndexInput indexIn = null;

    this.postingsReader = postingsReader;
    this.segment = state.segmentInfo.name;

    String termsName =
        IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION);
    try {
      termsIn = state.directory.openInput(termsName, state.context);
      version =
          CodecUtil.checkIndexHeader(
              termsIn,
              TERMS_CODEC_NAME,
              VERSION_START,
              VERSION_CURRENT,
              state.segmentInfo.getId(),
              state.segmentSuffix);

      String indexName =
          IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
      indexIn = state.directory.openInput(indexName, state.context);
      CodecUtil.checkIndexHeader(
          indexIn,
          TERMS_INDEX_CODEC_NAME,
          version,
          version,
          state.segmentInfo.getId(),
          state.segmentSuffix);
      CodecUtil.checksumEntireFile(indexIn);

      // Have PostingsReader init itself
      postingsReader.init(termsIn, state);

      // NOTE: data file is too costly to verify checksum against all the bytes on open,
      // but for now we at least verify proper structure of the checksum footer: which looks
      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
      // such as file truncation.
      CodecUtil.retrieveChecksum(termsIn);

      // Read per-field details
      seekDir(termsIn, dirOffset);
      seekDir(indexIn, indexDirOffset);

      final int numFields = termsIn.readVInt();
      if (numFields < 0) {
        throw new CorruptIndexException("invalid numFields: " + numFields, termsIn);
      }

      for (int i = 0; i < numFields; ++i) {
        final int field = termsIn.readVInt();
        final long numTerms = termsIn.readVLong();
        if (numTerms <= 0) {
          throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn);
        }
        final int numBytes = termsIn.readVInt();
        if (numBytes < 0) {
          throw new CorruptIndexException(
              "invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn);
        }
        final BytesRef rootCode = new BytesRef(new byte[numBytes]);
        termsIn.readBytes(rootCode.bytes, 0, numBytes);
        rootCode.length = numBytes;
        final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
        if (fieldInfo == null) {
          throw new CorruptIndexException("invalid field number: " + field, termsIn);
        }
        final long sumTotalTermFreq =
            fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong();
        final long sumDocFreq = termsIn.readVLong();
        final int docCount = termsIn.readVInt();
        final int longsSize = termsIn.readVInt();
        if (longsSize < 0) {
          throw new CorruptIndexException(
              "invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize,
              termsIn);
        }
        BytesRef minTerm = readBytesRef(termsIn);
        BytesRef maxTerm = readBytesRef(termsIn);
        if (docCount < 0
            || docCount > state.segmentInfo.getDocCount()) { // #docs with field must be <= #docs
          throw new CorruptIndexException(
              "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.getDocCount(),
              termsIn);
        }
        if (sumDocFreq < docCount) { // #postings must be >= #docs with field
          throw new CorruptIndexException(
              "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn);
        }
        if (sumTotalTermFreq != -1
            && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
          throw new CorruptIndexException(
              "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq,
              termsIn);
        }
        final long indexStartFP = indexIn.readVLong();
        FieldReader previous =
            fields.put(
                fieldInfo.name,
                new FieldReader(
                    this,
                    fieldInfo,
                    numTerms,
                    rootCode,
                    sumTotalTermFreq,
                    sumDocFreq,
                    docCount,
                    indexStartFP,
                    longsSize,
                    indexIn,
                    minTerm,
                    maxTerm));
        if (previous != null) {
          throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn);
        }
      }

      indexIn.close();
      success = true;
    } finally {
      if (!success) {
        // this.close() will close in:
        IOUtils.closeWhileHandlingException(indexIn, this);
      }
    }
  }
Beispiel #11
0
  public BlockTermsReader(
      TermsIndexReaderBase indexReader, PostingsReaderBase postingsReader, SegmentReadState state)
      throws IOException {

    this.postingsReader = postingsReader;

    String filename =
        IndexFileNames.segmentFileName(
            state.segmentInfo.name, state.segmentSuffix, BlockTermsWriter.TERMS_EXTENSION);
    in = state.directory.openInput(filename, state.context);

    boolean success = false;
    try {
      CodecUtil.checkIndexHeader(
          in,
          BlockTermsWriter.CODEC_NAME,
          BlockTermsWriter.VERSION_START,
          BlockTermsWriter.VERSION_CURRENT,
          state.segmentInfo.getId(),
          state.segmentSuffix);

      // Have PostingsReader init itself
      postingsReader.init(in, state);

      // NOTE: data file is too costly to verify checksum against all the bytes on open,
      // but for now we at least verify proper structure of the checksum footer: which looks
      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
      // such as file truncation.
      CodecUtil.retrieveChecksum(in);

      // Read per-field details
      seekDir(in);

      final int numFields = in.readVInt();
      if (numFields < 0) {
        throw new CorruptIndexException("invalid number of fields: " + numFields, in);
      }
      for (int i = 0; i < numFields; i++) {
        final int field = in.readVInt();
        final long numTerms = in.readVLong();
        assert numTerms >= 0;
        final long termsStartPointer = in.readVLong();
        final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
        final long sumTotalTermFreq =
            fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong();
        final long sumDocFreq = in.readVLong();
        final int docCount = in.readVInt();
        final int longsSize = in.readVInt();
        if (docCount < 0
            || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
          throw new CorruptIndexException(
              "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), in);
        }
        if (sumDocFreq < docCount) { // #postings must be >= #docs with field
          throw new CorruptIndexException(
              "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
        }
        if (sumTotalTermFreq != -1
            && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
          throw new CorruptIndexException(
              "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
        }
        FieldReader previous =
            fields.put(
                fieldInfo.name,
                new FieldReader(
                    fieldInfo,
                    numTerms,
                    termsStartPointer,
                    sumTotalTermFreq,
                    sumDocFreq,
                    docCount,
                    longsSize));
        if (previous != null) {
          throw new CorruptIndexException("duplicate fields: " + fieldInfo.name, in);
        }
      }
      success = true;
    } finally {
      if (!success) {
        in.close();
      }
    }

    this.indexReader = indexReader;
  }