Beispiel #1
0
  /** Read checksums and feed compressed block data into decompressor. */
  @Override
  protected int getCompressedData() throws IOException {
    checkStream();

    // Get the size of the compressed chunk
    int compressedLen = readInt(in, buf, 4);
    noCompressedBytes += 4;

    // Get the checksum of the compressed chunk
    int checksum = readInt(in, buf, 4);
    noCompressedBytes += 4;

    if (compressedLen > FourMcCodec.FOURMC_MAX_BLOCK_SIZE) {
      throw new IOException(
          "Compressed length "
              + compressedLen
              + " exceeds max block size "
              + FourMcCodec.FOURMC_MAX_BLOCK_SIZE);
    }

    Lz4Decompressor lz4dec = (Lz4Decompressor) decompressor;

    // if compressed len == uncompressedBlockSize, 4mc wrote data w/o compression
    lz4dec.setCurrentBlockUncompressed(compressedLen >= uncompressedBlockSize);

    // Read len bytes from underlying stream
    if (compressedLen > buffer.length) {
      buffer = new byte[compressedLen];
    }
    readFully(in, buffer, 0, compressedLen);
    noCompressedBytes += compressedLen;

    // checksum check
    if (checksum != Lz4Decompressor.xxhash32(buffer, 0, compressedLen, 0)) {
      if (lz4dec.isCurrentBlockUncompressed()) {
        throw new IOException("Corrupted uncompressed block (invalid checksum)");
      } else {
        throw new IOException("Corrupted compressed block (invalid checksum)");
      }
    }

    // Send the read data to the decompressor.
    lz4dec.setInput(buffer, 0, compressedLen);

    return compressedLen;
  }
Beispiel #2
0
  static {
    if (FourMcNativeCodeLoader.isNativeCodeLoaded()) {
      boolean nativeLoaded = Lz4Decompressor.isNativeLoaded();

      if (!nativeLoaded) {
        LOG.error("Failed to load/initialize native-4mc library");
      }
    } else {
      LOG.error("Cannot load native-4mc without native-hadoop");
    }
  }
Beispiel #3
0
  @Override
  public void close() throws IOException {
    if (decompressor == null) {
      return;
    }
    byte[] b = new byte[4096];
    while (!decompressor.finished()) {
      decompressor.decompress(b, 0, b.length);
    }
    super.close();

    // force release direct buffers of decompressor
    ((Lz4Decompressor) decompressor).releaseDirectBuffers();
    decompressor = null;
  }
Beispiel #4
0
  /** Read and verify 4mc header. */
  protected void readHeader(InputStream in) throws IOException {

    readFully(in, buf, 0, 12);
    int magic = getInt(buf, 0);
    if (magic != FourMcCodec.FOURMC_MAGIC) {
      throw new IOException("Invalid 4mc header (wrong magic)");
    }
    int version = getInt(buf, 4);
    if (version != FourMcCodec.FOURMC_VERSION) {
      throw new IOException("Invalid 4mc header (wrong version)");
    }
    int hdrChecksum = getInt(buf, 8);
    if (hdrChecksum != Lz4Decompressor.xxhash32(buf, 0, 8, 0)) {
      throw new IOException("Invalid 4mc header (invalid checksum)");
    }
  }
Beispiel #5
0
  /**
   * Reads blocks index at tail of file.
   *
   * @param fs filesystem
   * @param file path to 4mc file
   * @return block index
   * @throws IOException
   */
  public static FourMcBlockIndex readIndex(FileSystem fs, Path file) throws IOException {

    long fileSize = fs.getFileStatus(file).getLen();
    if (fileSize < (12 + 20)) { // file too small
      return new FourMcBlockIndex();
    }

    FSDataInputStream indexIn = fs.open(file);

    /*
        4mc Footer:
         Footer size:        4 bytes
         Footer version:     4 byte (1)
         Block index offset: 4 bytes delta offset for each stored block, the delta between offset between previous file position and next block
         Footer size:        4 bytes (repeated to be able to read from end of file)
         MAGIC SIGNATURE:    4 bytes: "4MC\0"
         Footer checksum:    4 bytes (always in XXHASH32)

    */

    /**
     * jump to file tail and read-ahead last 4KB of file which should be enough in most cases
     * Improvement: we could estimate a best case compression factor of 10% and calc forecast based
     * on filesize and blocksize, to see if better to read-head more.
     */
    int readTailSize = 4 * 1024;
    if (readTailSize > (fileSize - 12)) readTailSize = (int) (fileSize - 12);

    indexIn.seek(fileSize - readTailSize);
    byte[] buf = new byte[readTailSize];
    readFully(indexIn, buf, 0, buf.length);
    int footerSize = getInt(buf, buf.length - 12);
    int magic = getInt(buf, buf.length - 8);
    int checksum = getInt(buf, buf.length - 4);

    if (magic != FourMcCodec.FOURMC_MAGIC) {
      throw new IOException("Invalid 4mc footer magic");
    }
    if (footerSize >= (fileSize - 12)) {
      throw new IOException("Invalid 4mc footer checksum");
    }

    // very rare case: read head was not enough! seek back and read it all
    if (footerSize > readTailSize) {
      readTailSize = footerSize;
      indexIn.seek(fileSize - readTailSize);
      buf = new byte[readTailSize];
      readFully(indexIn, buf, 0, buf.length);
    }
    indexIn.close();

    int startFooterOffset = readTailSize - footerSize;

    if (getInt(buf, startFooterOffset) != footerSize) { // size again
      throw new IOException("Invalid 4mc footer size");
    }

    if (getInt(buf, startFooterOffset + 4) != FourMcCodec.FOURMC_VERSION) { // version
      throw new IOException(
          "Invalid 4mc footer version (" + getInt(buf, startFooterOffset + 4) + ")");
    }

    if (checksum != Lz4Decompressor.xxhash32(buf, startFooterOffset, footerSize - 4, 0)) {
      throw new IOException("Invalid 4mc footer checksum");
    }

    int totalBlocks = (footerSize - 20) / 4;
    FourMcBlockIndex index = new FourMcBlockIndex(totalBlocks);
    long curOffset = 0;
    for (int i = 0; i < totalBlocks; ++i) {
      curOffset += getInt(buf, startFooterOffset + 8 + (i * 4));
      index.set(i, curOffset);
    }

    return index;
  }