Lucene50NormsConsumer(
     SegmentWriteState state,
     String dataCodec,
     String dataExtension,
     String metaCodec,
     String metaExtension)
     throws IOException {
   boolean success = false;
   try {
     String dataName =
         IndexFileNames.segmentFileName(
             state.segmentInfo.name, state.segmentSuffix, dataExtension);
     data = state.directory.createOutput(dataName, state.context);
     CodecUtil.writeIndexHeader(
         data, dataCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
     String metaName =
         IndexFileNames.segmentFileName(
             state.segmentInfo.name, state.segmentSuffix, metaExtension);
     meta = state.directory.createOutput(metaName, state.context);
     CodecUtil.writeIndexHeader(
         meta, metaCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
     success = true;
   } finally {
     if (!success) {
       IOUtils.closeWhileHandlingException(this);
     }
   }
 }
 public CompletionFieldsProducer(SegmentReadState state) throws IOException {
   String suggestFSTFile =
       IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
   IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
   if (state.segmentInfo.getVersion().onOrAfter(Version.LUCENE_6_2_0)) {
     // Lucene 6.2.0+ requires all index files to use index header, but prior to that we used an
     // ordinary codec header:
     version =
         CodecUtil.checkIndexHeader(
             input,
             CODEC_NAME,
             SUGGEST_CODEC_VERSION,
             SUGGEST_VERSION_CURRENT,
             state.segmentInfo.getId(),
             state.segmentSuffix);
   } else {
     version =
         CodecUtil.checkHeader(
             input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT);
   }
   FieldsProducer delegateProducer = null;
   boolean success = false;
   try {
     PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString());
     String providerName = input.readString();
     CompletionLookupProvider completionLookupProvider = providers.get(providerName);
     if (completionLookupProvider == null) {
       throw new IllegalStateException(
           "no provider with name [" + providerName + "] registered");
     }
     // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent
     // unecessary heap usage?
     delegateProducer = delegatePostingsFormat.fieldsProducer(state);
     /*
      * If we are merging we don't load the FSTs at all such that we
      * don't consume so much memory during merge
      */
     if (state.context.context != Context.MERGE) {
       // TODO: maybe we can do this in a fully lazy fashion based on some configuration
       // eventually we should have some kind of curciut breaker that prevents us from going OOM
       // here
       // with some configuration
       this.lookupFactory = completionLookupProvider.load(input);
     } else {
       this.lookupFactory = null;
     }
     this.delegateProducer = delegateProducer;
     success = true;
   } finally {
     if (!success) {
       IOUtils.closeWhileHandlingException(delegateProducer, input);
     } else {
       IOUtils.close(input);
     }
   }
 }
  @Override
  public void write(
      Directory directory,
      SegmentInfo segmentInfo,
      String segmentSuffix,
      FieldInfos infos,
      IOContext context)
      throws IOException {
    final String fileName =
        IndexFileNames.segmentFileName(
            segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
    try (IndexOutput output = directory.createOutput(fileName, context)) {
      CodecUtil.writeHeader(
          output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT);
      output.writeVInt(infos.size());
      for (FieldInfo fi : infos) {
        IndexOptions indexOptions = fi.getIndexOptions();
        byte bits = 0x0;
        if (fi.hasVectors()) bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR;
        if (fi.omitsNorms()) bits |= Lucene46FieldInfosFormat.OMIT_NORMS;
        if (fi.hasPayloads()) bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS;
        if (fi.getIndexOptions() != IndexOptions.NONE) {
          bits |= Lucene46FieldInfosFormat.IS_INDEXED;
          assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0
              || !fi.hasPayloads();
          if (indexOptions == IndexOptions.DOCS) {
            bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
          } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
            bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
          } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
            bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS;
          }
        }
        output.writeString(fi.name);
        output.writeVInt(fi.number);
        output.writeByte(bits);

        // pack the DV types in one byte
        final byte dv = docValuesByte(fi.getDocValuesType());
        final byte nrm = docValuesByte(fi.hasNorms() ? DocValuesType.NUMERIC : DocValuesType.NONE);
        assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
        byte val = (byte) (0xff & ((nrm << 4) | dv));
        output.writeByte(val);
        output.writeLong(fi.getDocValuesGen());
        output.writeStringStringMap(fi.attributes());
      }
      CodecUtil.writeFooter(output);
    }
  }
 /** Full constructor */
 public Lucene60PointsWriter(
     SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap)
     throws IOException {
   assert writeState.fieldInfos.hasPointValues();
   this.writeState = writeState;
   this.maxPointsInLeafNode = maxPointsInLeafNode;
   this.maxMBSortInHeap = maxMBSortInHeap;
   String dataFileName =
       IndexFileNames.segmentFileName(
           writeState.segmentInfo.name,
           writeState.segmentSuffix,
           Lucene60PointsFormat.DATA_EXTENSION);
   dataOut = writeState.directory.createOutput(dataFileName, writeState.context);
   boolean success = false;
   try {
     CodecUtil.writeIndexHeader(
         dataOut,
         Lucene60PointsFormat.DATA_CODEC_NAME,
         Lucene60PointsFormat.DATA_VERSION_CURRENT,
         writeState.segmentInfo.getId(),
         writeState.segmentSuffix);
     success = true;
   } finally {
     if (success == false) {
       IOUtils.closeWhileHandlingException(dataOut);
     }
   }
 }
Example #5
0
  @Test
  public void testCleanUpWithLegacyChecksums() throws IOException {
    Map<String, StoreFileMetaData> metaDataMap = new HashMap<>();
    metaDataMap.put(
        "segments_1",
        new StoreFileMetaData("segments_1", 50, null, null, new BytesRef(new byte[] {1})));
    metaDataMap.put(
        "_0_1.del", new StoreFileMetaData("_0_1.del", 42, "foobarbaz", null, new BytesRef()));
    Store.MetadataSnapshot snapshot = new Store.MetadataSnapshot(metaDataMap);

    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    for (String file : metaDataMap.keySet()) {
      try (IndexOutput output = store.directory().createOutput(file, IOContext.DEFAULT)) {
        BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024));
        output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
        CodecUtil.writeFooter(output);
      }
    }

    store.verifyAfterCleanup(snapshot, snapshot);
    store.deleteContent();
    IOUtils.close(store);
  }
Example #6
0
 private int readHeader(IndexInput in) throws IOException {
   return CodecUtil.checkHeader(
       in,
       FSTTermsWriter.TERMS_CODEC_NAME,
       FSTTermsWriter.TERMS_VERSION_START,
       FSTTermsWriter.TERMS_VERSION_CURRENT);
 }
    public BloomFilteredFieldsProducer(SegmentReadState state) throws IOException {

      String bloomFileName =
          IndexFileNames.segmentFileName(
              state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
      IndexInput bloomIn = null;
      boolean success = false;
      try {
        bloomIn = state.directory.openInput(bloomFileName, state.context);
        CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION, BLOOM_CODEC_VERSION);
        // // Load the hash function used in the BloomFilter
        // hashFunction = HashFunction.forName(bloomIn.readString());
        // Load the delegate postings format
        PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn.readString());

        this.delegateFieldsProducer = delegatePostingsFormat.fieldsProducer(state);
        int numBlooms = bloomIn.readInt();
        for (int i = 0; i < numBlooms; i++) {
          int fieldNum = bloomIn.readInt();
          FuzzySet bloom = FuzzySet.deserialize(bloomIn);
          FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
          bloomsByFieldName.put(fieldInfo.name, bloom);
        }
        IOUtils.close(bloomIn);
        success = true;
      } finally {
        if (!success) {
          IOUtils.closeWhileHandlingException(bloomIn, delegateFieldsProducer);
        }
      }
    }
 public CompletionFieldsConsumer(SegmentWriteState state) throws IOException {
   this.delegatesFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state);
   String suggestFSTFile =
       IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
   IndexOutput output = null;
   boolean success = false;
   try {
     output = state.directory.createOutput(suggestFSTFile, state.context);
     CodecUtil.writeIndexHeader(
         output,
         CODEC_NAME,
         SUGGEST_VERSION_CURRENT,
         state.segmentInfo.getId(),
         state.segmentSuffix);
     /*
      * we write the delegate postings format name so we can load it
      * without getting an instance in the ctor
      */
     output.writeString(delegatePostingsFormat.getName());
     output.writeString(writeProvider.getName());
     this.suggestFieldsConsumer = writeProvider.consumer(output);
     success = true;
   } finally {
     if (!success) {
       IOUtils.closeWhileHandlingException(output);
     }
   }
 }
Example #9
0
  @Override
  public void checkIntegrity() throws IOException {
    // verify terms
    CodecUtil.checksumEntireFile(in);

    // verify postings
    postingsReader.checkIntegrity();
  }
  @Override
  public void checkIntegrity() throws IOException {
    // term dictionary
    CodecUtil.checksumEntireFile(termsIn);

    // postings
    postingsReader.checkIntegrity();
  }
Example #11
0
 private void seekDir(IndexInput in) throws IOException {
   if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) {
     in.seek(in.length() - CodecUtil.footerLength() - 8);
   } else {
     in.seek(in.length() - 8);
   }
   in.seek(in.readLong());
 }
 @Override
 public void init(IndexInput termsIn, SegmentReadState state) throws IOException {
   // Make sure we are talking to the matching postings writer
   CodecUtil.checkIndexHeader(
       termsIn,
       IDVersionPostingsWriter.TERMS_CODEC,
       IDVersionPostingsWriter.VERSION_START,
       IDVersionPostingsWriter.VERSION_CURRENT,
       state.segmentInfo.getId(),
       state.segmentSuffix);
 }
Example #13
0
 @Test
 public void testVerifyingIndexOutput() throws IOException {
   Directory dir = newDirectory();
   IndexOutput output = dir.createOutput("foo.bar", IOContext.DEFAULT);
   int iters = scaledRandomIntBetween(10, 100);
   for (int i = 0; i < iters; i++) {
     BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024));
     output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
   }
   CodecUtil.writeFooter(output);
   output.close();
   IndexInput indexInput = dir.openInput("foo.bar", IOContext.DEFAULT);
   String checksum = Store.digestToString(CodecUtil.retrieveChecksum(indexInput));
   indexInput.seek(0);
   BytesRef ref = new BytesRef(scaledRandomIntBetween(1, 1024));
   long length = indexInput.length();
   IndexOutput verifyingOutput =
       new Store.LuceneVerifyingIndexOutput(
           new StoreFileMetaData("foo1.bar", length, checksum),
           dir.createOutput("foo1.bar", IOContext.DEFAULT));
   while (length > 0) {
     if (random().nextInt(10) == 0) {
       verifyingOutput.writeByte(indexInput.readByte());
       length--;
     } else {
       int min = (int) Math.min(length, ref.bytes.length);
       indexInput.readBytes(ref.bytes, ref.offset, min);
       verifyingOutput.writeBytes(ref.bytes, ref.offset, min);
       length -= min;
     }
   }
   Store.verify(verifyingOutput);
   verifyingOutput.writeByte((byte) 0x0);
   try {
     Store.verify(verifyingOutput);
     fail("should be a corrupted index");
   } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
     // ok
   }
   IOUtils.close(indexInput, verifyingOutput, dir);
 }
 @Override
 public void close() throws IOException {
   boolean success = false;
   try {
     if (meta != null) {
       meta.writeVInt(-1); // write EOF marker
       CodecUtil.writeFooter(meta); // write checksum
     }
     if (data != null) {
       CodecUtil.writeFooter(data); // write checksum
     }
     success = true;
   } finally {
     if (success) {
       IOUtils.close(data, meta);
     } else {
       IOUtils.closeWhileHandlingException(data, meta);
     }
     meta = data = null;
   }
 }
 @Override
 public void init(IndexInput termsIn) throws IOException {
   // Make sure we are talking to the matching past writer
   CodecUtil.checkHeader(
       termsIn,
       SepPostingsWriter.CODEC,
       SepPostingsWriter.VERSION_START,
       SepPostingsWriter.VERSION_START);
   skipInterval = termsIn.readInt();
   maxSkipLevels = termsIn.readInt();
   skipMinimum = termsIn.readInt();
 }
Example #16
0
  @Test
  public void testVerifyingIndexInput() throws IOException {
    Directory dir = newDirectory();
    IndexOutput output = dir.createOutput("foo.bar", IOContext.DEFAULT);
    int iters = scaledRandomIntBetween(10, 100);
    for (int i = 0; i < iters; i++) {
      BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024));
      output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
    }
    CodecUtil.writeFooter(output);
    output.close();

    // Check file
    IndexInput indexInput = dir.openInput("foo.bar", IOContext.DEFAULT);
    long checksum = CodecUtil.retrieveChecksum(indexInput);
    indexInput.seek(0);
    IndexInput verifyingIndexInput =
        new Store.VerifyingIndexInput(dir.openInput("foo.bar", IOContext.DEFAULT));
    readIndexInputFullyWithRandomSeeks(verifyingIndexInput);
    Store.verify(verifyingIndexInput);
    assertThat(checksum, equalTo(((ChecksumIndexInput) verifyingIndexInput).getChecksum()));
    IOUtils.close(indexInput, verifyingIndexInput);

    // Corrupt file and check again
    corruptFile(dir, "foo.bar", "foo1.bar");
    verifyingIndexInput =
        new Store.VerifyingIndexInput(dir.openInput("foo1.bar", IOContext.DEFAULT));
    readIndexInputFullyWithRandomSeeks(verifyingIndexInput);
    try {
      Store.verify(verifyingIndexInput);
      fail("should be a corrupted index");
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
      // ok
    }
    IOUtils.close(verifyingIndexInput);
    IOUtils.close(dir);
  }
  @Override
  public void finish() throws IOException {
    if (finished) {
      throw new IllegalStateException("already finished");
    }
    finished = true;
    CodecUtil.writeFooter(dataOut);

    String indexFileName =
        IndexFileNames.segmentFileName(
            writeState.segmentInfo.name,
            writeState.segmentSuffix,
            Lucene60PointsFormat.INDEX_EXTENSION);
    // Write index file
    try (IndexOutput indexOut =
        writeState.directory.createOutput(indexFileName, writeState.context)) {
      CodecUtil.writeIndexHeader(
          indexOut,
          Lucene60PointsFormat.META_CODEC_NAME,
          Lucene60PointsFormat.INDEX_VERSION_CURRENT,
          writeState.segmentInfo.getId(),
          writeState.segmentSuffix);
      int count = indexFPs.size();
      indexOut.writeVInt(count);
      for (Map.Entry<String, Long> ent : indexFPs.entrySet()) {
        FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(ent.getKey());
        if (fieldInfo == null) {
          throw new IllegalStateException(
              "wrote field=\"" + ent.getKey() + "\" but that field doesn't exist in FieldInfos");
        }
        indexOut.writeVInt(fieldInfo.number);
        indexOut.writeVLong(ent.getValue());
      }
      CodecUtil.writeFooter(indexOut);
    }
  }
Example #18
0
  public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader)
      throws IOException {
    final String termsFileName =
        IndexFileNames.segmentFileName(
            state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);

    this.postingsReader = postingsReader;
    final IndexInput in = state.directory.openInput(termsFileName, state.context);

    boolean success = false;
    try {
      version = readHeader(in);
      if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) {
        CodecUtil.checksumEntireFile(in);
      }
      this.postingsReader.init(in);
      seekDir(in);

      final FieldInfos fieldInfos = state.fieldInfos;
      final int numFields = in.readVInt();
      for (int i = 0; i < numFields; i++) {
        int fieldNumber = in.readVInt();
        FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
        long numTerms = in.readVLong();
        long sumTotalTermFreq =
            fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
        long sumDocFreq = in.readVLong();
        int docCount = in.readVInt();
        int longsSize = in.readVInt();
        TermsReader current =
            new TermsReader(
                fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
        TermsReader previous = fields.put(fieldInfo.name, current);
        checkFieldSummary(state.segmentInfo, in, current, previous);
      }
      success = true;
    } finally {
      if (success) {
        IOUtils.close(in);
      } else {
        IOUtils.closeWhileHandlingException(in);
      }
    }
  }
    @Override
    public void close() throws IOException {
      delegateFieldsConsumer.close();
      // Now we are done accumulating values for these fields
      List<Entry<FieldInfo, FuzzySet>> nonSaturatedBlooms =
          new ArrayList<Map.Entry<FieldInfo, FuzzySet>>();

      for (Entry<FieldInfo, FuzzySet> entry : bloomFilters.entrySet()) {
        FuzzySet bloomFilter = entry.getValue();
        if (!bloomFilterFactory.isSaturated(bloomFilter, entry.getKey())) {
          nonSaturatedBlooms.add(entry);
        }
      }
      String bloomFileName =
          IndexFileNames.segmentFileName(
              state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
      IndexOutput bloomOutput = null;
      try {
        bloomOutput = state.directory.createOutput(bloomFileName, state.context);
        CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION);
        // remember the name of the postings format we will delegate to
        bloomOutput.writeString(delegatePostingsFormat.getName());

        // First field in the output file is the number of fields+blooms saved
        bloomOutput.writeInt(nonSaturatedBlooms.size());
        for (Entry<FieldInfo, FuzzySet> entry : nonSaturatedBlooms) {
          FieldInfo fieldInfo = entry.getKey();
          FuzzySet bloomFilter = entry.getValue();
          bloomOutput.writeInt(fieldInfo.number);
          saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
        }
      } finally {
        IOUtils.close(bloomOutput);
      }
      // We are done with large bitsets so no need to keep them hanging around
      bloomFilters.clear();
    }
Example #20
0
  @Test
  public void testRenameFile() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random(), false);
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    {
      IndexOutput output = store.directory().createOutput("foo.bar", IOContext.DEFAULT);
      int iters = scaledRandomIntBetween(10, 100);
      for (int i = 0; i < iters; i++) {
        BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024));
        output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
      }
      CodecUtil.writeFooter(output);
      output.close();
    }
    store.renameFile("foo.bar", "bar.foo");
    assertThat(store.directory().listAll().length, is(1));
    final long lastChecksum;
    try (IndexInput input = store.directory().openInput("bar.foo", IOContext.DEFAULT)) {
      lastChecksum = CodecUtil.checksumEntireFile(input);
    }

    try {
      store.directory().openInput("foo.bar", IOContext.DEFAULT);
      fail("file was renamed");
    } catch (FileNotFoundException | NoSuchFileException ex) {
      // expected
    }
    {
      IndexOutput output = store.directory().createOutput("foo.bar", IOContext.DEFAULT);
      int iters = scaledRandomIntBetween(10, 100);
      for (int i = 0; i < iters; i++) {
        BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024));
        output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
      }
      CodecUtil.writeFooter(output);
      output.close();
    }
    DistributorDirectory distributorDirectory =
        DirectoryUtils.getLeaf(store.directory(), DistributorDirectory.class);
    if (distributorDirectory != null
        && distributorDirectory.getDirectory("foo.bar")
            != distributorDirectory.getDirectory("bar.foo")) {
      try {
        store.renameFile("foo.bar", "bar.foo");
        fail("target file already exists in a different directory");
      } catch (IOException ex) {
        // expected
      }

      try (IndexInput input = store.directory().openInput("bar.foo", IOContext.DEFAULT)) {
        assertThat(lastChecksum, equalTo(CodecUtil.checksumEntireFile(input)));
      }
      assertThat(store.directory().listAll().length, is(2));
      assertDeleteContent(store, directoryService);
      IOUtils.close(store);
    } else {
      store.renameFile("foo.bar", "bar.foo");
      assertThat(store.directory().listAll().length, is(1));
      assertDeleteContent(store, directoryService);
      IOUtils.close(store);
    }
  }
Example #21
0
  /**
   * Given a file, return a VersionedTranslogStream based on an optionally-existing header in the
   * file. If the file does not exist, or has zero length, returns the latest version. If the header
   * does not exist, assumes Version 0 of the translog file format.
   */
  public static ImmutableTranslogReader open(
      ChannelReference channelReference, Checkpoint checkpoint, String translogUUID)
      throws IOException {
    final FileChannel channel = channelReference.getChannel();
    final Path path = channelReference.getPath();
    assert channelReference.getGeneration() == checkpoint.generation
        : "expected generation: "
            + channelReference.getGeneration()
            + " but got: "
            + checkpoint.generation;

    try {
      if (checkpoint.offset == 0
          && checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT) { // only old files can be empty
        return new LegacyTranslogReader(channelReference.getGeneration(), channelReference, 0);
      }

      InputStreamStreamInput headerStream =
          new InputStreamStreamInput(Channels.newInputStream(channel)); // don't close
      // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the
      // header, in binary this looks like:
      //
      // binary: 0011 1111 1101 0111 0110 1100 0001 0111
      // hex   :    3    f    d    7    6    c    1    7
      //
      // With version 0 of the translog, the first byte is the
      // Operation.Type, which will always be between 0-4, so we know if
      // we grab the first byte, it can be:
      // 0x3f => Lucene's magic number, so we can assume it's version 1 or later
      // 0x00 => version 0 of the translog
      //
      // otherwise the first byte of the translog is corrupted and we
      // should bail
      byte b1 = headerStream.readByte();
      if (b1 == LUCENE_CODEC_HEADER_BYTE) {
        // Read 3 more bytes, meaning a whole integer has been read
        byte b2 = headerStream.readByte();
        byte b3 = headerStream.readByte();
        byte b4 = headerStream.readByte();
        // Convert the 4 bytes that were read into an integer
        int header =
            ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0);
        // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17)
        // ourselves here, because it allows us to read the first
        // byte separately
        if (header != CodecUtil.CODEC_MAGIC) {
          throw new TranslogCorruptedException(
              "translog looks like version 1 or later, but has corrupted header");
        }
        // Confirm the rest of the header using CodecUtil, extracting
        // the translog version
        int version =
            CodecUtil.checkHeaderNoMagic(
                new InputStreamDataInput(headerStream),
                TranslogWriter.TRANSLOG_CODEC,
                1,
                Integer.MAX_VALUE);
        switch (version) {
          case TranslogWriter.VERSION_CHECKSUMS:
            assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT
                : "expected unknown op count but got: " + checkpoint.numOps;
            assert checkpoint.offset == Files.size(path)
                : "offset("
                    + checkpoint.offset
                    + ") != file_size("
                    + Files.size(path)
                    + ") for: "
                    + path;
            // legacy - we still have to support it somehow
            return new LegacyTranslogReaderBase(
                channelReference.getGeneration(),
                channelReference,
                CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC),
                checkpoint.offset);
          case TranslogWriter.VERSION_CHECKPOINTS:
            assert path.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX)
                : "new file ends with old suffix: " + path;
            assert checkpoint.numOps > TranslogReader.UNKNOWN_OP_COUNT
                : "expected at least 0 operatin but got: " + checkpoint.numOps;
            assert checkpoint.offset <= channel.size()
                : "checkpoint is inconsistent with channel length: "
                    + channel.size()
                    + " "
                    + checkpoint;
            int len = headerStream.readInt();
            if (len > channel.size()) {
              throw new TranslogCorruptedException("uuid length can't be larger than the translog");
            }
            BytesRef ref = new BytesRef(len);
            ref.length = len;
            headerStream.read(ref.bytes, ref.offset, ref.length);
            BytesRef uuidBytes = new BytesRef(translogUUID);
            if (uuidBytes.bytesEquals(ref) == false) {
              throw new TranslogCorruptedException(
                  "expected shard UUID ["
                      + uuidBytes
                      + "] but got: ["
                      + ref
                      + "] this translog file belongs to a different translog");
            }
            return new ImmutableTranslogReader(
                channelReference.getGeneration(),
                channelReference,
                ref.length
                    + CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC)
                    + RamUsageEstimator.NUM_BYTES_INT,
                checkpoint.offset,
                checkpoint.numOps);
          default:
            throw new TranslogCorruptedException(
                "No known translog stream version: " + version + " path:" + path);
        }
      } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) {
        assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT
            : "expected unknown op count but got: " + checkpoint.numOps;
        assert checkpoint.offset == Files.size(path)
            : "offset("
                + checkpoint.offset
                + ") != file_size("
                + Files.size(path)
                + ") for: "
                + path;
        return new LegacyTranslogReader(
            channelReference.getGeneration(), channelReference, checkpoint.offset);
      } else {
        throw new TranslogCorruptedException(
            "Invalid first byte in translog file, got: "
                + Long.toHexString(b1)
                + ", expected 0x00 or 0x3f");
      }
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
      throw new TranslogCorruptedException("Translog header corrupted", e);
    }
  }
  /** Sole constructor. */
  public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state)
      throws IOException {
    boolean success = false;
    IndexInput indexIn = null;

    this.postingsReader = postingsReader;
    this.segment = state.segmentInfo.name;

    String termsName =
        IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION);
    try {
      termsIn = state.directory.openInput(termsName, state.context);
      version =
          CodecUtil.checkIndexHeader(
              termsIn,
              TERMS_CODEC_NAME,
              VERSION_START,
              VERSION_CURRENT,
              state.segmentInfo.getId(),
              state.segmentSuffix);

      String indexName =
          IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
      indexIn = state.directory.openInput(indexName, state.context);
      CodecUtil.checkIndexHeader(
          indexIn,
          TERMS_INDEX_CODEC_NAME,
          version,
          version,
          state.segmentInfo.getId(),
          state.segmentSuffix);
      CodecUtil.checksumEntireFile(indexIn);

      // Have PostingsReader init itself
      postingsReader.init(termsIn, state);

      // NOTE: data file is too costly to verify checksum against all the bytes on open,
      // but for now we at least verify proper structure of the checksum footer: which looks
      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
      // such as file truncation.
      CodecUtil.retrieveChecksum(termsIn);

      // Read per-field details
      seekDir(termsIn, dirOffset);
      seekDir(indexIn, indexDirOffset);

      final int numFields = termsIn.readVInt();
      if (numFields < 0) {
        throw new CorruptIndexException("invalid numFields: " + numFields, termsIn);
      }

      for (int i = 0; i < numFields; ++i) {
        final int field = termsIn.readVInt();
        final long numTerms = termsIn.readVLong();
        if (numTerms <= 0) {
          throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn);
        }
        final int numBytes = termsIn.readVInt();
        if (numBytes < 0) {
          throw new CorruptIndexException(
              "invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn);
        }
        final BytesRef rootCode = new BytesRef(new byte[numBytes]);
        termsIn.readBytes(rootCode.bytes, 0, numBytes);
        rootCode.length = numBytes;
        final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
        if (fieldInfo == null) {
          throw new CorruptIndexException("invalid field number: " + field, termsIn);
        }
        final long sumTotalTermFreq =
            fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong();
        final long sumDocFreq = termsIn.readVLong();
        final int docCount = termsIn.readVInt();
        final int longsSize = termsIn.readVInt();
        if (longsSize < 0) {
          throw new CorruptIndexException(
              "invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize,
              termsIn);
        }
        BytesRef minTerm = readBytesRef(termsIn);
        BytesRef maxTerm = readBytesRef(termsIn);
        if (docCount < 0
            || docCount > state.segmentInfo.getDocCount()) { // #docs with field must be <= #docs
          throw new CorruptIndexException(
              "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.getDocCount(),
              termsIn);
        }
        if (sumDocFreq < docCount) { // #postings must be >= #docs with field
          throw new CorruptIndexException(
              "invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn);
        }
        if (sumTotalTermFreq != -1
            && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
          throw new CorruptIndexException(
              "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq,
              termsIn);
        }
        final long indexStartFP = indexIn.readVLong();
        FieldReader previous =
            fields.put(
                fieldInfo.name,
                new FieldReader(
                    this,
                    fieldInfo,
                    numTerms,
                    rootCode,
                    sumTotalTermFreq,
                    sumDocFreq,
                    docCount,
                    indexStartFP,
                    longsSize,
                    indexIn,
                    minTerm,
                    maxTerm));
        if (previous != null) {
          throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn);
        }
      }

      indexIn.close();
      success = true;
    } finally {
      if (!success) {
        // this.close() will close in:
        IOUtils.closeWhileHandlingException(indexIn, this);
      }
    }
  }
Example #23
0
  private boolean doRestore() throws Exception {

    Path backupPath = Paths.get(backupLocation).resolve(backupName);
    SimpleDateFormat dateFormat = new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT);
    String restoreIndexName = "restore." + dateFormat.format(new Date());
    String restoreIndexPath = core.getDataDir() + restoreIndexName;

    Directory restoreIndexDir = null;
    Directory indexDir = null;
    try (Directory backupDir = FSDirectory.open(backupPath)) {

      final Version version =
          IndexFetcher.checkOldestVersion(SegmentInfos.readLatestCommit(backupDir));

      restoreIndexDir =
          core.getDirectoryFactory()
              .get(
                  restoreIndexPath,
                  DirectoryFactory.DirContext.DEFAULT,
                  core.getSolrConfig().indexConfig.lockType);

      // Prefer local copy.
      indexDir =
          core.getDirectoryFactory()
              .get(
                  core.getIndexDir(),
                  DirectoryFactory.DirContext.DEFAULT,
                  core.getSolrConfig().indexConfig.lockType);

      // Move all files from backupDir to restoreIndexDir
      for (String filename : backupDir.listAll()) {
        checkInterrupted();
        log.info("Copying file {} to restore directory ", filename);
        try (IndexInput indexInput = backupDir.openInput(filename, IOContext.READONCE)) {
          Long checksum = null;
          try {
            checksum = CodecUtil.retrieveChecksum(indexInput);
          } catch (Exception e) {
            log.warn("Could not read checksum from index file: " + filename, e);
          }
          long length = indexInput.length();
          IndexFetcher.CompareResult compareResult =
              IndexFetcher.compareFile(indexDir, version, filename, length, checksum);
          if (!compareResult.equal
              || (!compareResult.checkSummed
                  && (filename.endsWith(".si")
                      || filename.endsWith(".liv")
                      || filename.startsWith("segments_")))) {
            restoreIndexDir.copyFrom(backupDir, filename, filename, IOContext.READONCE);
          } else {
            // prefer local copy
            restoreIndexDir.copyFrom(indexDir, filename, filename, IOContext.READONCE);
          }
        } catch (Exception e) {
          throw new SolrException(
              SolrException.ErrorCode.UNKNOWN, "Exception while restoring the backup index", e);
        }
      }
      log.debug("Switching directories");
      IndexFetcher.modifyIndexProps(core, restoreIndexName);

      boolean success;
      try {
        core.getUpdateHandler().newIndexWriter(false);
        openNewSearcher();
        success = true;
        log.info("Successfully restored to the backup index");
      } catch (Exception e) {
        // Rollback to the old index directory. Delete the restore index directory and mark the
        // restore as failed.
        log.warn("Could not switch to restored index. Rolling back to the current index");
        Directory dir = null;
        try {
          dir =
              core.getDirectoryFactory()
                  .get(
                      core.getDataDir(),
                      DirectoryFactory.DirContext.META_DATA,
                      core.getSolrConfig().indexConfig.lockType);
          dir.deleteFile(IndexFetcher.INDEX_PROPERTIES);
        } finally {
          if (dir != null) {
            core.getDirectoryFactory().release(dir);
          }
        }

        core.getDirectoryFactory().doneWithDirectory(restoreIndexDir);
        core.getDirectoryFactory().remove(restoreIndexDir);
        core.getUpdateHandler().newIndexWriter(false);
        openNewSearcher();
        throw new SolrException(
            SolrException.ErrorCode.UNKNOWN, "Exception while restoring the backup index", e);
      }
      if (success) {
        core.getDirectoryFactory().doneWithDirectory(indexDir);
        core.getDirectoryFactory().remove(indexDir);
      }

      return true;
    } finally {
      if (restoreIndexDir != null) {
        core.getDirectoryFactory().release(restoreIndexDir);
      }
      if (indexDir != null) {
        core.getDirectoryFactory().release(indexDir);
      }
    }
  }
  /**
   * Read a particular segmentFileName. Note that this may throw an IOException if a commit is in
   * process.
   *
   * @param directory -- directory containing the segments file
   * @param segmentFileName -- segment file to load
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */
  public static final SegmentInfos readCommit(Directory directory, String segmentFileName)
      throws IOException {

    long generation = generationFromSegmentsFileName(segmentFileName);
    try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) {
      // NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need
      // to read the magic ourselves.
      int magic = input.readInt();
      if (magic != CodecUtil.CODEC_MAGIC) {
        throw new IndexFormatTooOldException(
            input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
      }
      // 4.0+
      int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_CURRENT);
      // 5.0+
      byte id[] = null;
      if (format >= VERSION_50) {
        id = new byte[StringHelper.ID_LENGTH];
        input.readBytes(id, 0, id.length);
        CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
      }

      SegmentInfos infos = new SegmentInfos();
      infos.id = id;
      infos.generation = generation;
      infos.lastGeneration = generation;
      if (format >= VERSION_53) {
        // TODO: in the future (7.0?  sigh) we can use this to throw IndexFormatTooOldException ...
        // or just rely on the
        // minSegmentLuceneVersion check instead:
        infos.luceneVersion =
            Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
      } else {
        // else compute the min version down below in the for loop
      }

      infos.version = input.readLong();
      infos.counter = input.readInt();
      int numSegments = input.readInt();
      if (numSegments < 0) {
        throw new CorruptIndexException("invalid segment count: " + numSegments, input);
      }

      if (format >= VERSION_53) {
        if (numSegments > 0) {
          infos.minSegmentLuceneVersion =
              Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
          if (infos.minSegmentLuceneVersion.onOrAfter(Version.LUCENE_4_0_0_ALPHA) == false) {
            throw new IndexFormatTooOldException(
                input,
                "this index contains a too-old segment (version: "
                    + infos.minSegmentLuceneVersion
                    + ")");
          }
        } else {
          // else leave as null: no segments
        }
      } else {
        // else we recompute it below as we visit segments; it can't be used for throwing
        // IndexFormatTooOldExc, but consumers of
        // SegmentInfos can maybe still use it for other reasons
      }

      long totalDocs = 0;
      for (int seg = 0; seg < numSegments; seg++) {
        String segName = input.readString();
        final byte segmentID[];
        if (format >= VERSION_50) {
          byte hasID = input.readByte();
          if (hasID == 1) {
            segmentID = new byte[StringHelper.ID_LENGTH];
            input.readBytes(segmentID, 0, segmentID.length);
          } else if (hasID == 0) {
            segmentID = null; // 4.x segment, doesn't have an ID
          } else {
            throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input);
          }
        } else {
          segmentID = null;
        }
        Codec codec = readCodec(input, format < VERSION_53);
        SegmentInfo info =
            codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
        info.setCodec(codec);
        totalDocs += info.maxDoc();
        long delGen = input.readLong();
        int delCount = input.readInt();
        if (delCount < 0 || delCount > info.maxDoc()) {
          throw new CorruptIndexException(
              "invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input);
        }
        long fieldInfosGen = -1;
        if (format >= VERSION_46) {
          fieldInfosGen = input.readLong();
        }
        long dvGen = -1;
        if (format >= VERSION_49) {
          dvGen = input.readLong();
        } else {
          dvGen = fieldInfosGen;
        }
        SegmentCommitInfo siPerCommit =
            new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen);
        if (format >= VERSION_46) {
          if (format < VERSION_49) {
            // Recorded per-generation files, which were buggy (see
            // LUCENE-5636). We need to read and keep them so we continue to
            // reference those files. Unfortunately it means that the files will
            // be referenced even if the fields are updated again, until the
            // segment is merged.
            final int numGensUpdatesFiles = input.readInt();
            final Map<Long, Set<String>> genUpdatesFiles;
            if (numGensUpdatesFiles == 0) {
              genUpdatesFiles = Collections.emptyMap();
            } else {
              genUpdatesFiles = new HashMap<>(numGensUpdatesFiles);
              for (int i = 0; i < numGensUpdatesFiles; i++) {
                genUpdatesFiles.put(input.readLong(), input.readStringSet());
              }
            }
            siPerCommit.setGenUpdatesFiles(genUpdatesFiles);
          } else {
            if (format >= VERSION_51) {
              siPerCommit.setFieldInfosFiles(input.readSetOfStrings());
            } else {
              siPerCommit.setFieldInfosFiles(Collections.unmodifiableSet(input.readStringSet()));
            }
            final Map<Integer, Set<String>> dvUpdateFiles;
            final int numDVFields = input.readInt();
            if (numDVFields == 0) {
              dvUpdateFiles = Collections.emptyMap();
            } else {
              Map<Integer, Set<String>> map = new HashMap<>(numDVFields);
              for (int i = 0; i < numDVFields; i++) {
                if (format >= VERSION_51) {
                  map.put(input.readInt(), input.readSetOfStrings());
                } else {
                  map.put(input.readInt(), Collections.unmodifiableSet(input.readStringSet()));
                }
              }
              dvUpdateFiles = Collections.unmodifiableMap(map);
            }
            siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
          }
        }
        infos.add(siPerCommit);

        Version segmentVersion = info.getVersion();
        if (format < VERSION_53) {
          if (infos.minSegmentLuceneVersion == null
              || segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
            infos.minSegmentLuceneVersion = segmentVersion;
          }
        } else if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
          throw new CorruptIndexException(
              "segments file recorded minSegmentLuceneVersion="
                  + infos.minSegmentLuceneVersion
                  + " but segment="
                  + info
                  + " has older version="
                  + segmentVersion,
              input);
        }
      }

      if (format >= VERSION_51) {
        infos.userData = input.readMapOfStrings();
      } else {
        infos.userData = Collections.unmodifiableMap(input.readStringStringMap());
      }

      if (format >= VERSION_48) {
        CodecUtil.checkFooter(input);
      } else {
        final long checksumNow = input.getChecksum();
        final long checksumThen = input.readLong();
        if (checksumNow != checksumThen) {
          throw new CorruptIndexException(
              "checksum failed (hardware problem?) : expected="
                  + Long.toHexString(checksumThen)
                  + " actual="
                  + Long.toHexString(checksumNow),
              input);
        }
        CodecUtil.checkEOF(input);
      }

      // LUCENE-6299: check we are in bounds
      if (totalDocs > IndexWriter.getActualMaxDocs()) {
        throw new CorruptIndexException(
            "Too many documents: an index cannot exceed "
                + IndexWriter.getActualMaxDocs()
                + " but readers have total maxDoc="
                + totalDocs,
            input);
      }

      return infos;
    }
  }
  @Override
  public FieldInfos read(
      Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context)
      throws IOException {
    final String fileName =
        IndexFileNames.segmentFileName(
            segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
    try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
      int codecVersion =
          CodecUtil.checkHeader(
              input,
              Lucene46FieldInfosFormat.CODEC_NAME,
              Lucene46FieldInfosFormat.FORMAT_START,
              Lucene46FieldInfosFormat.FORMAT_CURRENT);

      final int size = input.readVInt(); // read in the size
      FieldInfo infos[] = new FieldInfo[size];

      for (int i = 0; i < size; i++) {
        String name = input.readString();
        final int fieldNumber = input.readVInt();
        if (fieldNumber < 0) {
          throw new CorruptIndexException(
              "invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
        }
        byte bits = input.readByte();
        boolean isIndexed = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0;
        boolean storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0;
        boolean omitNorms = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0;
        boolean storePayloads = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0;
        final IndexOptions indexOptions;
        if (!isIndexed) {
          indexOptions = IndexOptions.NONE;
        } else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
          indexOptions = IndexOptions.DOCS;
        } else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0) {
          indexOptions = IndexOptions.DOCS_AND_FREQS;
        } else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
          indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
        } else {
          indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
        }

        // DV Types are packed in one byte
        byte val = input.readByte();
        final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
        final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
        final long dvGen = input.readLong();
        final Map<String, String> attributes = input.readStringStringMap();

        if (isIndexed && omitNorms == false && normsType == DocValuesType.NONE) {
          // Undead norms!  Lucene42NormsProducer will check this and bring norms back from the
          // dead:
          UndeadNormsProducer.setUndead(attributes);
        }

        infos[i] =
            new FieldInfo(
                name,
                fieldNumber,
                storeTermVector,
                omitNorms,
                storePayloads,
                indexOptions,
                docValuesType,
                dvGen,
                Collections.unmodifiableMap(attributes));
        infos[i].checkConsistency();
      }

      if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) {
        CodecUtil.checkFooter(input);
      } else {
        CodecUtil.checkEOF(input);
      }
      return new FieldInfos(infos);
    }
  }
  private void write(Directory directory) throws IOException {

    long nextGeneration = getNextPendingGeneration();
    String segmentFileName =
        IndexFileNames.fileNameFromGeneration(IndexFileNames.PENDING_SEGMENTS, "", nextGeneration);

    // Always advance the generation on write:
    generation = nextGeneration;

    IndexOutput segnOutput = null;
    boolean success = false;

    try {
      segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
      CodecUtil.writeIndexHeader(
          segnOutput,
          "segments",
          VERSION_CURRENT,
          StringHelper.randomId(),
          Long.toString(nextGeneration, Character.MAX_RADIX));
      segnOutput.writeVInt(Version.LATEST.major);
      segnOutput.writeVInt(Version.LATEST.minor);
      segnOutput.writeVInt(Version.LATEST.bugfix);

      segnOutput.writeLong(version);
      segnOutput.writeInt(counter); // write counter
      segnOutput.writeInt(size());

      if (size() > 0) {

        Version minSegmentVersion = null;

        // We do a separate loop up front so we can write the minSegmentVersion before
        // any SegmentInfo; this makes it cleaner to throw IndexFormatTooOldExc at read time:
        for (SegmentCommitInfo siPerCommit : this) {
          Version segmentVersion = siPerCommit.info.getVersion();
          if (minSegmentVersion == null || segmentVersion.onOrAfter(minSegmentVersion) == false) {
            minSegmentVersion = segmentVersion;
          }
        }

        segnOutput.writeVInt(minSegmentVersion.major);
        segnOutput.writeVInt(minSegmentVersion.minor);
        segnOutput.writeVInt(minSegmentVersion.bugfix);
      }

      // write infos
      for (SegmentCommitInfo siPerCommit : this) {
        SegmentInfo si = siPerCommit.info;
        segnOutput.writeString(si.name);
        byte segmentID[] = si.getId();
        // TODO: remove this in lucene 6, we don't need to include 4.x segments in commits anymore
        if (segmentID == null) {
          segnOutput.writeByte((byte) 0);
        } else {
          if (segmentID.length != StringHelper.ID_LENGTH) {
            throw new IllegalStateException(
                "cannot write segment: invalid id segment="
                    + si.name
                    + "id="
                    + StringHelper.idToString(segmentID));
          }
          segnOutput.writeByte((byte) 1);
          segnOutput.writeBytes(segmentID, segmentID.length);
        }
        segnOutput.writeString(si.getCodec().getName());
        segnOutput.writeLong(siPerCommit.getDelGen());
        int delCount = siPerCommit.getDelCount();
        if (delCount < 0 || delCount > si.maxDoc()) {
          throw new IllegalStateException(
              "cannot write segment: invalid maxDoc segment="
                  + si.name
                  + " maxDoc="
                  + si.maxDoc()
                  + " delCount="
                  + delCount);
        }
        segnOutput.writeInt(delCount);
        segnOutput.writeLong(siPerCommit.getFieldInfosGen());
        segnOutput.writeLong(siPerCommit.getDocValuesGen());
        segnOutput.writeSetOfStrings(siPerCommit.getFieldInfosFiles());
        final Map<Integer, Set<String>> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles();
        segnOutput.writeInt(dvUpdatesFiles.size());
        for (Entry<Integer, Set<String>> e : dvUpdatesFiles.entrySet()) {
          segnOutput.writeInt(e.getKey());
          segnOutput.writeSetOfStrings(e.getValue());
        }
      }
      segnOutput.writeMapOfStrings(userData);
      CodecUtil.writeFooter(segnOutput);
      segnOutput.close();
      directory.sync(Collections.singleton(segmentFileName));
      success = true;
    } finally {
      if (success) {
        pendingCommit = true;
      } else {
        // We hit an exception above; try to close the file
        // but suppress any exception:
        IOUtils.closeWhileHandlingException(segnOutput);
        // Try not to leave a truncated segments_N file in
        // the index:
        IOUtils.deleteFilesIgnoringExceptions(directory, segmentFileName);
      }
    }
  }
 /** Seek {@code input} to the directory offset. */
 private void seekDir(IndexInput input, long dirOffset) throws IOException {
   input.seek(input.length() - CodecUtil.footerLength() - 8);
   dirOffset = input.readLong();
   input.seek(dirOffset);
 }
Example #28
0
  public void testCheckIntegrity() throws IOException {
    Directory dir = newDirectory();
    long luceneFileLength = 0;

    try (IndexOutput output = dir.createOutput("lucene_checksum.bin", IOContext.DEFAULT)) {
      int iters = scaledRandomIntBetween(10, 100);
      for (int i = 0; i < iters; i++) {
        BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024));
        output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
        luceneFileLength += bytesRef.length;
      }
      CodecUtil.writeFooter(output);
      luceneFileLength += CodecUtil.footerLength();
    }

    final Adler32 adler32 = new Adler32();
    long legacyFileLength = 0;
    try (IndexOutput output = dir.createOutput("legacy.bin", IOContext.DEFAULT)) {
      int iters = scaledRandomIntBetween(10, 100);
      for (int i = 0; i < iters; i++) {
        BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024));
        output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
        adler32.update(bytesRef.bytes, bytesRef.offset, bytesRef.length);
        legacyFileLength += bytesRef.length;
      }
    }
    final long luceneChecksum;
    final long adler32LegacyChecksum = adler32.getValue();
    try (IndexInput indexInput = dir.openInput("lucene_checksum.bin", IOContext.DEFAULT)) {
      assertEquals(luceneFileLength, indexInput.length());
      luceneChecksum = CodecUtil.retrieveChecksum(indexInput);
    }

    { // positive check
      StoreFileMetaData lucene =
          new StoreFileMetaData(
              "lucene_checksum.bin",
              luceneFileLength,
              Store.digestToString(luceneChecksum),
              Version.LUCENE_4_8_0);
      StoreFileMetaData legacy =
          new StoreFileMetaData(
              "legacy.bin", legacyFileLength, Store.digestToString(adler32LegacyChecksum));
      assertTrue(legacy.hasLegacyChecksum());
      assertFalse(lucene.hasLegacyChecksum());
      assertTrue(Store.checkIntegrityNoException(lucene, dir));
      assertTrue(Store.checkIntegrityNoException(legacy, dir));
    }

    { // negative check - wrong checksum
      StoreFileMetaData lucene =
          new StoreFileMetaData(
              "lucene_checksum.bin",
              luceneFileLength,
              Store.digestToString(luceneChecksum + 1),
              Version.LUCENE_4_8_0);
      StoreFileMetaData legacy =
          new StoreFileMetaData(
              "legacy.bin", legacyFileLength, Store.digestToString(adler32LegacyChecksum + 1));
      assertTrue(legacy.hasLegacyChecksum());
      assertFalse(lucene.hasLegacyChecksum());
      assertFalse(Store.checkIntegrityNoException(lucene, dir));
      assertFalse(Store.checkIntegrityNoException(legacy, dir));
    }

    { // negative check - wrong length
      StoreFileMetaData lucene =
          new StoreFileMetaData(
              "lucene_checksum.bin",
              luceneFileLength + 1,
              Store.digestToString(luceneChecksum),
              Version.LUCENE_4_8_0);
      StoreFileMetaData legacy =
          new StoreFileMetaData(
              "legacy.bin", legacyFileLength + 1, Store.digestToString(adler32LegacyChecksum));
      assertTrue(legacy.hasLegacyChecksum());
      assertFalse(lucene.hasLegacyChecksum());
      assertFalse(Store.checkIntegrityNoException(lucene, dir));
      assertFalse(Store.checkIntegrityNoException(legacy, dir));
    }

    { // negative check - wrong file
      StoreFileMetaData lucene =
          new StoreFileMetaData(
              "legacy.bin",
              luceneFileLength,
              Store.digestToString(luceneChecksum),
              Version.LUCENE_4_8_0);
      StoreFileMetaData legacy =
          new StoreFileMetaData(
              "lucene_checksum.bin", legacyFileLength, Store.digestToString(adler32LegacyChecksum));
      assertTrue(legacy.hasLegacyChecksum());
      assertFalse(lucene.hasLegacyChecksum());
      assertFalse(Store.checkIntegrityNoException(lucene, dir));
      assertFalse(Store.checkIntegrityNoException(legacy, dir));
    }
    dir.close();
  }
Example #29
0
  @Test
  public void testMixedChecksums() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // this time random codec....
    IndexWriter writer =
        new IndexWriter(
            store.directory(),
            newIndexWriterConfig(random(), new MockAnalyzer(random()))
                .setCodec(actualDefaultCodec()));
    int docs = 1 + random().nextInt(100);

    for (int i = 0; i < docs; i++) {
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    if (random().nextBoolean()) {
      DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
    }
    Store.MetadataSnapshot metadata;
    // check before we committed
    try {
      store.getMetadata();
      fail("no index present - expected exception");
    } catch (IndexNotFoundException ex) {
      // expected
    }
    assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed
    writer.commit();
    writer.close();
    Store.LegacyChecksums checksums = new Store.LegacyChecksums();
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
        if (meta.checksum() == null) {
          String checksum = null;
          try {
            CodecUtil.retrieveChecksum(input);
            fail("expected a corrupt index - posting format has not checksums");
          } catch (CorruptIndexException
              | IndexFormatTooOldException
              | IndexFormatTooNewException ex) {
            try (ChecksumIndexInput checksumIndexInput =
                store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) {
              checksumIndexInput.seek(meta.length());
              checksum = Store.digestToString(checksumIndexInput.getChecksum());
            }
            // fine - it's a postings format without checksums
            checksums.add(new StoreFileMetaData(meta.name(), meta.length(), checksum, null));
          }
        } else {
          String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
          assertThat(
              "File: " + meta.name() + " has a different checksum",
              meta.checksum(),
              equalTo(checksum));
          assertThat(meta.hasLegacyChecksum(), equalTo(false));
          assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        }
      }
    }
    assertConsistent(store, metadata);
    checksums.write(store);
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      assertThat(
          "file: " + meta.name() + " has a null checksum", meta.checksum(), not(nullValue()));
      if (meta.hasLegacyChecksum()) {
        try (ChecksumIndexInput checksumIndexInput =
            store.directory().openChecksumInput(meta.name(), IOContext.DEFAULT)) {
          checksumIndexInput.seek(meta.length());
          assertThat(
              meta.checksum(), equalTo(Store.digestToString(checksumIndexInput.getChecksum())));
        }
      } else {
        try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
          String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
          assertThat(
              "File: " + meta.name() + " has a different checksum",
              meta.checksum(),
              equalTo(checksum));
          assertThat(meta.hasLegacyChecksum(), equalTo(false));
          assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        }
      }
    }
    assertConsistent(store, metadata);
    TestUtil.checkIndex(store.directory());
    assertDeleteContent(store, directoryService);
    IOUtils.close(store);
  }
Example #30
0
  @Test
  public void testNewChecksums() throws IOException {
    final ShardId shardId = new ShardId(new Index("index"), 1);
    DirectoryService directoryService = new LuceneManagedDirectoryService(random());
    Store store =
        new Store(
            shardId,
            ImmutableSettings.EMPTY,
            directoryService,
            randomDistributor(directoryService),
            new DummyShardLock(shardId));
    // set default codec - all segments need checksums
    IndexWriter writer =
        new IndexWriter(
            store.directory(),
            newIndexWriterConfig(random(), new MockAnalyzer(random()))
                .setCodec(actualDefaultCodec()));
    int docs = 1 + random().nextInt(100);

    for (int i = 0; i < docs; i++) {
      Document doc = new Document();
      doc.add(
          new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      writer.addDocument(doc);
    }
    if (random().nextBoolean()) {
      for (int i = 0; i < docs; i++) {
        if (random().nextBoolean()) {
          Document doc = new Document();
          doc.add(
              new TextField(
                  "id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          doc.add(
              new TextField(
                  "body",
                  TestUtil.randomRealisticUnicodeString(random()),
                  random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
          writer.updateDocument(new Term("id", "" + i), doc);
        }
      }
    }
    if (random().nextBoolean()) {
      DirectoryReader.open(writer, random().nextBoolean()).close(); // flush
    }
    Store.MetadataSnapshot metadata;
    // check before we committed
    try {
      store.getMetadata();
      fail("no index present - expected exception");
    } catch (IndexNotFoundException ex) {
      // expected
    }
    assertThat(store.getMetadataOrEmpty(), is(Store.MetadataSnapshot.EMPTY)); // nothing committed
    writer.commit();
    writer.close();
    metadata = store.getMetadata();
    assertThat(metadata.asMap().isEmpty(), is(false));
    for (StoreFileMetaData meta : metadata) {
      try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
        String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
        assertThat(
            "File: " + meta.name() + " has a different checksum",
            meta.checksum(),
            equalTo(checksum));
        assertThat(meta.hasLegacyChecksum(), equalTo(false));
        assertThat(meta.writtenBy(), equalTo(Version.LATEST));
        if (meta.name().endsWith(".si") || meta.name().startsWith("segments_")) {
          assertThat(meta.hash().length, greaterThan(0));
        }
      }
    }
    assertConsistent(store, metadata);

    TestUtil.checkIndex(store.directory());
    assertDeleteContent(store, directoryService);
    IOUtils.close(store);
  }