public BloomFilteredFieldsProducer(SegmentReadState state) throws IOException {

      String bloomFileName =
          IndexFileNames.segmentFileName(
              state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
      IndexInput bloomIn = null;
      boolean success = false;
      try {
        bloomIn = state.directory.openInput(bloomFileName, state.context);
        CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION, BLOOM_CODEC_VERSION);
        // // Load the hash function used in the BloomFilter
        // hashFunction = HashFunction.forName(bloomIn.readString());
        // Load the delegate postings format
        PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn.readString());

        this.delegateFieldsProducer = delegatePostingsFormat.fieldsProducer(state);
        int numBlooms = bloomIn.readInt();
        for (int i = 0; i < numBlooms; i++) {
          int fieldNum = bloomIn.readInt();
          FuzzySet bloom = FuzzySet.deserialize(bloomIn);
          FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
          bloomsByFieldName.put(fieldInfo.name, bloom);
        }
        IOUtils.close(bloomIn);
        success = true;
      } finally {
        if (!success) {
          IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer);
        }
      }
    }
Esempio n. 2
0
 private int readHeader(IndexInput in) throws IOException {
   return CodecUtil.checkHeader(
       in,
       FSTTermsWriter.TERMS_CODEC_NAME,
       FSTTermsWriter.TERMS_VERSION_START,
       FSTTermsWriter.TERMS_VERSION_CURRENT);
 }
 public CompletionFieldsProducer(SegmentReadState state) throws IOException {
   String suggestFSTFile =
       IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
   IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
   if (state.segmentInfo.getVersion().onOrAfter(Version.LUCENE_6_2_0)) {
     // Lucene 6.2.0+ requires all index files to use index header, but prior to that we used an
     // ordinary codec header:
     version =
         CodecUtil.checkIndexHeader(
             input,
             CODEC_NAME,
             SUGGEST_CODEC_VERSION,
             SUGGEST_VERSION_CURRENT,
             state.segmentInfo.getId(),
             state.segmentSuffix);
   } else {
     version =
         CodecUtil.checkHeader(
             input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT);
   }
   FieldsProducer delegateProducer = null;
   boolean success = false;
   try {
     PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString());
     String providerName = input.readString();
     CompletionLookupProvider completionLookupProvider = providers.get(providerName);
     if (completionLookupProvider == null) {
       throw new IllegalStateException(
           "no provider with name [" + providerName + "] registered");
     }
     // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent
     // unecessary heap usage?
     delegateProducer = delegatePostingsFormat.fieldsProducer(state);
     /*
      * If we are merging we don't load the FSTs at all such that we
      * don't consume so much memory during merge
      */
     if (state.context.context != Context.MERGE) {
       // TODO: maybe we can do this in a fully lazy fashion based on some configuration
       // eventually we should have some kind of curciut breaker that prevents us from going OOM
       // here
       // with some configuration
       this.lookupFactory = completionLookupProvider.load(input);
     } else {
       this.lookupFactory = null;
     }
     this.delegateProducer = delegateProducer;
     success = true;
   } finally {
     if (!success) {
       IOUtils.closeWhileHandlingException(delegateProducer, input);
     } else {
       IOUtils.close(input);
     }
   }
 }
 @Override
 public void init(IndexInput termsIn) throws IOException {
   // Make sure we are talking to the matching past writer
   CodecUtil.checkHeader(
       termsIn,
       SepPostingsWriter.CODEC,
       SepPostingsWriter.VERSION_START,
       SepPostingsWriter.VERSION_START);
   skipInterval = termsIn.readInt();
   maxSkipLevels = termsIn.readInt();
   skipMinimum = termsIn.readInt();
 }
  @Override
  public FieldInfos read(
      Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context)
      throws IOException {
    final String fileName =
        IndexFileNames.segmentFileName(
            segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
    try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
      int codecVersion =
          CodecUtil.checkHeader(
              input,
              Lucene46FieldInfosFormat.CODEC_NAME,
              Lucene46FieldInfosFormat.FORMAT_START,
              Lucene46FieldInfosFormat.FORMAT_CURRENT);

      final int size = input.readVInt(); // read in the size
      FieldInfo infos[] = new FieldInfo[size];

      for (int i = 0; i < size; i++) {
        String name = input.readString();
        final int fieldNumber = input.readVInt();
        if (fieldNumber < 0) {
          throw new CorruptIndexException(
              "invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
        }
        byte bits = input.readByte();
        boolean isIndexed = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0;
        boolean storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0;
        boolean omitNorms = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0;
        boolean storePayloads = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0;
        final IndexOptions indexOptions;
        if (!isIndexed) {
          indexOptions = IndexOptions.NONE;
        } else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
          indexOptions = IndexOptions.DOCS;
        } else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0) {
          indexOptions = IndexOptions.DOCS_AND_FREQS;
        } else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
          indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
        } else {
          indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
        }

        // DV Types are packed in one byte
        byte val = input.readByte();
        final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
        final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
        final long dvGen = input.readLong();
        final Map<String, String> attributes = input.readStringStringMap();

        if (isIndexed && omitNorms == false && normsType == DocValuesType.NONE) {
          // Undead norms!  Lucene42NormsProducer will check this and bring norms back from the
          // dead:
          UndeadNormsProducer.setUndead(attributes);
        }

        infos[i] =
            new FieldInfo(
                name,
                fieldNumber,
                storeTermVector,
                omitNorms,
                storePayloads,
                indexOptions,
                docValuesType,
                dvGen,
                Collections.unmodifiableMap(attributes));
        infos[i].checkConsistency();
      }

      if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) {
        CodecUtil.checkFooter(input);
      } else {
        CodecUtil.checkEOF(input);
      }
      return new FieldInfos(infos);
    }
  }
  Lucene42DocValuesProducer(
      SegmentReadState state,
      String dataCodec,
      String dataExtension,
      String metaCodec,
      String metaExtension)
      throws IOException {
    maxDoc = state.segmentInfo.maxDoc();
    merging = false;
    String metaName =
        IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
    // read in the entries from the metadata file.
    ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
    boolean success = false;
    ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
    try {
      version = CodecUtil.checkHeader(in, metaCodec, VERSION_START, VERSION_CURRENT);
      numerics = new HashMap<>();
      binaries = new HashMap<>();
      fsts = new HashMap<>();
      numEntries = readFields(in, state.fieldInfos);

      if (version >= VERSION_CHECKSUM) {
        CodecUtil.checkFooter(in);
      } else {
        CodecUtil.checkEOF(in);
      }

      success = true;
    } finally {
      if (success) {
        IOUtils.close(in);
      } else {
        IOUtils.closeWhileHandlingException(in);
      }
    }

    String dataName =
        IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
    this.data = state.directory.openInput(dataName, state.context);
    success = false;
    try {
      final int version2 = CodecUtil.checkHeader(data, dataCodec, VERSION_START, VERSION_CURRENT);
      if (version != version2) {
        throw new CorruptIndexException(
            "Format versions mismatch: meta=" + version + ", data=" + version2, data);
      }

      if (version >= VERSION_CHECKSUM) {
        // NOTE: data file is too costly to verify checksum against all the bytes on open,
        // but for now we at least verify proper structure of the checksum footer: which looks
        // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
        // such as file truncation.
        CodecUtil.retrieveChecksum(data);
      }

      success = true;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(this.data);
      }
    }
  }