Esempio n. 1
0
  /**
   * Given a file, return a VersionedTranslogStream based on an optionally-existing header in the
   * file. If the file does not exist, or has zero length, returns the latest version. If the header
   * does not exist, assumes Version 0 of the translog file format.
   */
  public static ImmutableTranslogReader open(
      ChannelReference channelReference, Checkpoint checkpoint, String translogUUID)
      throws IOException {
    final FileChannel channel = channelReference.getChannel();
    final Path path = channelReference.getPath();
    assert channelReference.getGeneration() == checkpoint.generation
        : "expected generation: "
            + channelReference.getGeneration()
            + " but got: "
            + checkpoint.generation;

    try {
      if (checkpoint.offset == 0
          && checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT) { // only old files can be empty
        return new LegacyTranslogReader(channelReference.getGeneration(), channelReference, 0);
      }

      InputStreamStreamInput headerStream =
          new InputStreamStreamInput(Channels.newInputStream(channel)); // don't close
      // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the
      // header, in binary this looks like:
      //
      // binary: 0011 1111 1101 0111 0110 1100 0001 0111
      // hex   :    3    f    d    7    6    c    1    7
      //
      // With version 0 of the translog, the first byte is the
      // Operation.Type, which will always be between 0-4, so we know if
      // we grab the first byte, it can be:
      // 0x3f => Lucene's magic number, so we can assume it's version 1 or later
      // 0x00 => version 0 of the translog
      //
      // otherwise the first byte of the translog is corrupted and we
      // should bail
      byte b1 = headerStream.readByte();
      if (b1 == LUCENE_CODEC_HEADER_BYTE) {
        // Read 3 more bytes, meaning a whole integer has been read
        byte b2 = headerStream.readByte();
        byte b3 = headerStream.readByte();
        byte b4 = headerStream.readByte();
        // Convert the 4 bytes that were read into an integer
        int header =
            ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0);
        // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17)
        // ourselves here, because it allows us to read the first
        // byte separately
        if (header != CodecUtil.CODEC_MAGIC) {
          throw new TranslogCorruptedException(
              "translog looks like version 1 or later, but has corrupted header");
        }
        // Confirm the rest of the header using CodecUtil, extracting
        // the translog version
        int version =
            CodecUtil.checkHeaderNoMagic(
                new InputStreamDataInput(headerStream),
                TranslogWriter.TRANSLOG_CODEC,
                1,
                Integer.MAX_VALUE);
        switch (version) {
          case TranslogWriter.VERSION_CHECKSUMS:
            assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT
                : "expected unknown op count but got: " + checkpoint.numOps;
            assert checkpoint.offset == Files.size(path)
                : "offset("
                    + checkpoint.offset
                    + ") != file_size("
                    + Files.size(path)
                    + ") for: "
                    + path;
            // legacy - we still have to support it somehow
            return new LegacyTranslogReaderBase(
                channelReference.getGeneration(),
                channelReference,
                CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC),
                checkpoint.offset);
          case TranslogWriter.VERSION_CHECKPOINTS:
            assert path.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX)
                : "new file ends with old suffix: " + path;
            assert checkpoint.numOps > TranslogReader.UNKNOWN_OP_COUNT
                : "expected at least 0 operatin but got: " + checkpoint.numOps;
            assert checkpoint.offset <= channel.size()
                : "checkpoint is inconsistent with channel length: "
                    + channel.size()
                    + " "
                    + checkpoint;
            int len = headerStream.readInt();
            if (len > channel.size()) {
              throw new TranslogCorruptedException("uuid length can't be larger than the translog");
            }
            BytesRef ref = new BytesRef(len);
            ref.length = len;
            headerStream.read(ref.bytes, ref.offset, ref.length);
            BytesRef uuidBytes = new BytesRef(translogUUID);
            if (uuidBytes.bytesEquals(ref) == false) {
              throw new TranslogCorruptedException(
                  "expected shard UUID ["
                      + uuidBytes
                      + "] but got: ["
                      + ref
                      + "] this translog file belongs to a different translog");
            }
            return new ImmutableTranslogReader(
                channelReference.getGeneration(),
                channelReference,
                ref.length
                    + CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC)
                    + RamUsageEstimator.NUM_BYTES_INT,
                checkpoint.offset,
                checkpoint.numOps);
          default:
            throw new TranslogCorruptedException(
                "No known translog stream version: " + version + " path:" + path);
        }
      } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) {
        assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT
            : "expected unknown op count but got: " + checkpoint.numOps;
        assert checkpoint.offset == Files.size(path)
            : "offset("
                + checkpoint.offset
                + ") != file_size("
                + Files.size(path)
                + ") for: "
                + path;
        return new LegacyTranslogReader(
            channelReference.getGeneration(), channelReference, checkpoint.offset);
      } else {
        throw new TranslogCorruptedException(
            "Invalid first byte in translog file, got: "
                + Long.toHexString(b1)
                + ", expected 0x00 or 0x3f");
      }
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
      throw new TranslogCorruptedException("Translog header corrupted", e);
    }
  }
Esempio n. 2
0
  /**
   * Given a file, return a VersionedTranslogStream based on an optionally-existing header in the
   * file. If the file does not exist, or has zero length, returns the latest version. If the header
   * does not exist, assumes Version 0 of the translog file format.
   *
   * <p>
   *
   * @throws IOException
   */
  public static TranslogStream translogStreamFor(Path translogFile) throws IOException {

    try (InputStreamStreamInput headerStream =
        new InputStreamStreamInput(Files.newInputStream(translogFile))) {
      if (Files.exists(translogFile) == false || Files.size(translogFile) == 0) {
        // if it doesn't exist or has no data, use the latest version,
        // there aren't any backwards compatibility issues
        return CHECKSUMMED_TRANSLOG_STREAM;
      }
      // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the
      // header, in binary this looks like:
      //
      // binary: 0011 1111 1101 0111 0110 1100 0001 0111
      // hex   :    3    f    d    7    6    c    1    7
      //
      // With version 0 of the translog, the first byte is the
      // Operation.Type, which will always be between 0-4, so we know if
      // we grab the first byte, it can be:
      // 0x3f => Lucene's magic number, so we can assume it's version 1 or later
      // 0x00 => version 0 of the translog
      //
      // otherwise the first byte of the translog is corrupted and we
      // should bail
      byte b1 = headerStream.readByte();
      if (b1 == LUCENE_CODEC_HEADER_BYTE) {
        // Read 3 more bytes, meaning a whole integer has been read
        byte b2 = headerStream.readByte();
        byte b3 = headerStream.readByte();
        byte b4 = headerStream.readByte();
        // Convert the 4 bytes that were read into an integer
        int header =
            ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0);
        // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17)
        // ourselves here, because it allows us to read the first
        // byte separately
        if (header != CodecUtil.CODEC_MAGIC) {
          throw new TranslogCorruptedException(
              "translog looks like version 1 or later, but has corrupted header");
        }
        // Confirm the rest of the header using CodecUtil, extracting
        // the translog version
        int version =
            CodecUtil.checkHeaderNoMagic(
                new InputStreamDataInput(headerStream), TRANSLOG_CODEC, 1, Integer.MAX_VALUE);
        switch (version) {
          case ChecksummedTranslogStream.VERSION:
            return CHECKSUMMED_TRANSLOG_STREAM;
          default:
            throw new TranslogCorruptedException("No known translog stream version: " + version);
        }
      } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) {
        return LEGACY_TRANSLOG_STREAM;
      } else {
        throw new TranslogCorruptedException(
            "Invalid first byte in translog file, got: "
                + Long.toHexString(b1)
                + ", expected 0x00 or 0x3f");
      }
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
      throw new TranslogCorruptedException("Translog header corrupted", e);
    }
  }