int addSomeBlocks(SimulatedFSDataset fsdataset, int startingBlockId) throws IOException {
   int bytesAdded = 0;
   for (int i = startingBlockId; i < startingBlockId + NUMBLOCKS; ++i) {
     ExtendedBlock b = new ExtendedBlock(bpid, i, 0, 0);
     // we pass expected len as zero, - fsdataset should use the sizeof actual
     // data written
     ReplicaInPipelineInterface bInfo =
         fsdataset.createRbw(StorageType.DEFAULT, b, false).getReplica();
     ReplicaOutputStreams out =
         bInfo.createStreams(true, DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, 512));
     try {
       OutputStream dataOut = out.getDataOut();
       assertEquals(0, fsdataset.getLength(b));
       for (int j = 1; j <= blockIdToLen(i); ++j) {
         dataOut.write(j);
         assertEquals(j, bInfo.getBytesOnDisk()); // correct length even as we write
         bytesAdded++;
       }
     } finally {
       out.close();
     }
     b.setNumBytes(blockIdToLen(i));
     fsdataset.finalizeBlock(b);
     assertEquals(blockIdToLen(i), fsdataset.getLength(b));
   }
   return bytesAdded;
 }
  private static void truncateBlock(File blockFile, File metaFile, long oldlen, long newlen)
      throws IOException {
    if (newlen == oldlen) {
      return;
    }
    if (newlen > oldlen) {
      throw new IOException(
          "Cannout truncate block to from oldlen (=" + oldlen + ") to newlen (=" + newlen + ")");
    }

    DataChecksum dcs = BlockMetadataHeader.readHeader(metaFile).getChecksum();
    int checksumsize = dcs.getChecksumSize();
    int bpc = dcs.getBytesPerChecksum();
    long n = (newlen - 1) / bpc + 1;
    long newmetalen = BlockMetadataHeader.getHeaderSize() + n * checksumsize;
    long lastchunkoffset = (n - 1) * bpc;
    int lastchunksize = (int) (newlen - lastchunkoffset);
    byte[] b = new byte[Math.max(lastchunksize, checksumsize)];

    RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw");
    try {
      // truncate blockFile
      blockRAF.setLength(newlen);

      // read last chunk
      blockRAF.seek(lastchunkoffset);
      blockRAF.readFully(b, 0, lastchunksize);
    } finally {
      blockRAF.close();
    }

    // compute checksum
    dcs.update(b, 0, lastchunksize);
    dcs.writeValue(b, 0, false);

    // update metaFile
    RandomAccessFile metaRAF = new RandomAccessFile(metaFile, "rw");
    try {
      metaRAF.setLength(newmetalen);
      metaRAF.seek(newmetalen - checksumsize);
      metaRAF.write(b, 0, checksumsize);
    } finally {
      metaRAF.close();
    }
  }
 public void testGetMetaData() throws IOException {
   final SimulatedFSDataset fsdataset = getSimulatedFSDataset();
   ExtendedBlock b = new ExtendedBlock(bpid, 1, 5, 0);
   try {
     assertTrue(fsdataset.getMetaDataInputStream(b) == null);
     assertTrue("Expected an IO exception", false);
   } catch (IOException e) {
     // ok - as expected
   }
   addSomeBlocks(fsdataset); // Only need to add one but ....
   b = new ExtendedBlock(bpid, 1, 0, 0);
   InputStream metaInput = fsdataset.getMetaDataInputStream(b);
   DataInputStream metaDataInput = new DataInputStream(metaInput);
   short version = metaDataInput.readShort();
   assertEquals(BlockMetadataHeader.VERSION, version);
   DataChecksum checksum = DataChecksum.newDataChecksum(metaDataInput);
   assertEquals(DataChecksum.CHECKSUM_NULL, checksum.getChecksumType());
   assertEquals(0, checksum.getChecksumSize());
 }
Exemple #4
0
  /**
   * Test that when there is a failure replicating a block the temporary and meta files are cleaned
   * up and subsequent replication succeeds.
   */
  @Test
  public void testReplicationError() throws Exception {
    // create a file of replication factor of 1
    final Path fileName = new Path("/test.txt");
    final int fileLen = 1;
    DFSTestUtil.createFile(fs, fileName, 1, (short) 1, 1L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 1);

    // get the block belonged to the created file
    LocatedBlocks blocks =
        NameNodeAdapter.getBlockLocations(
            cluster.getNameNode(), fileName.toString(), 0, (long) fileLen);
    assertEquals("Should only find 1 block", blocks.locatedBlockCount(), 1);
    LocatedBlock block = blocks.get(0);

    // bring up a second datanode
    cluster.startDataNodes(conf, 1, true, null, null);
    cluster.waitActive();
    final int sndNode = 1;
    DataNode datanode = cluster.getDataNodes().get(sndNode);

    // replicate the block to the second datanode
    InetSocketAddress target = datanode.getXferAddress();
    Socket s = new Socket(target.getAddress(), target.getPort());
    // write the header.
    DataOutputStream out = new DataOutputStream(s.getOutputStream());

    DataChecksum checksum = DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, 512);
    new Sender(out)
        .writeBlock(
            block.getBlock(),
            StorageType.DEFAULT,
            BlockTokenSecretManager.DUMMY_TOKEN,
            "",
            new DatanodeInfo[0],
            new StorageType[0],
            null,
            BlockConstructionStage.PIPELINE_SETUP_CREATE,
            1,
            0L,
            0L,
            0L,
            checksum,
            CachingStrategy.newDefaultStrategy(),
            false);
    out.flush();

    // close the connection before sending the content of the block
    out.close();

    // the temporary block & meta files should be deleted
    String bpid = cluster.getNamesystem().getBlockPoolId();
    File storageDir = cluster.getInstanceStorageDir(sndNode, 0);
    File dir1 = MiniDFSCluster.getRbwDir(storageDir, bpid);
    storageDir = cluster.getInstanceStorageDir(sndNode, 1);
    File dir2 = MiniDFSCluster.getRbwDir(storageDir, bpid);
    while (dir1.listFiles().length != 0 || dir2.listFiles().length != 0) {
      Thread.sleep(100);
    }

    // then increase the file's replication factor
    fs.setReplication(fileName, (short) 2);
    // replication should succeed
    DFSTestUtil.waitReplication(fs, fileName, (short) 1);

    // clean up the file
    fs.delete(fileName, false);
  }
  /**
   * Find out the number of bytes in the block that match its crc.
   *
   * <p>This algorithm assumes that data corruption caused by unexpected datanode shutdown occurs
   * only in the last crc chunk. So it checks only the last chunk.
   *
   * @param blockFile the block file
   * @param genStamp generation stamp of the block
   * @return the number of valid bytes
   */
  private long validateIntegrityAndSetLength(File blockFile, long genStamp) {
    DataInputStream checksumIn = null;
    InputStream blockIn = null;
    try {
      final File metaFile = FsDatasetUtil.getMetaFile(blockFile, genStamp);
      long blockFileLen = blockFile.length();
      long metaFileLen = metaFile.length();
      int crcHeaderLen = DataChecksum.getChecksumHeaderSize();
      if (!blockFile.exists()
          || blockFileLen == 0
          || !metaFile.exists()
          || metaFileLen < crcHeaderLen) {
        return 0;
      }
      checksumIn =
          new DataInputStream(
              new BufferedInputStream(
                  new FileInputStream(metaFile), HdfsConstants.IO_FILE_BUFFER_SIZE));

      // read and handle the common header here. For now just a version
      final DataChecksum checksum = BlockMetadataHeader.readDataChecksum(checksumIn, metaFile);
      int bytesPerChecksum = checksum.getBytesPerChecksum();
      int checksumSize = checksum.getChecksumSize();
      long numChunks =
          Math.min(
              (blockFileLen + bytesPerChecksum - 1) / bytesPerChecksum,
              (metaFileLen - crcHeaderLen) / checksumSize);
      if (numChunks == 0) {
        return 0;
      }
      IOUtils.skipFully(checksumIn, (numChunks - 1) * checksumSize);
      blockIn = new FileInputStream(blockFile);
      long lastChunkStartPos = (numChunks - 1) * bytesPerChecksum;
      IOUtils.skipFully(blockIn, lastChunkStartPos);
      int lastChunkSize = (int) Math.min(bytesPerChecksum, blockFileLen - lastChunkStartPos);
      byte[] buf = new byte[lastChunkSize + checksumSize];
      checksumIn.readFully(buf, lastChunkSize, checksumSize);
      IOUtils.readFully(blockIn, buf, 0, lastChunkSize);

      checksum.update(buf, 0, lastChunkSize);
      long validFileLength;
      if (checksum.compare(buf, lastChunkSize)) { // last chunk matches crc
        validFileLength = lastChunkStartPos + lastChunkSize;
      } else { // last chunck is corrupt
        validFileLength = lastChunkStartPos;
      }

      // truncate if extra bytes are present without CRC
      if (blockFile.length() > validFileLength) {
        RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw");
        try {
          // truncate blockFile
          blockRAF.setLength(validFileLength);
        } finally {
          blockRAF.close();
        }
      }

      return validFileLength;
    } catch (IOException e) {
      FsDatasetImpl.LOG.warn(e);
      return 0;
    } finally {
      IOUtils.closeStream(checksumIn);
      IOUtils.closeStream(blockIn);
    }
  }
  /**
   * sendBlock() is used to read block and its metadata and stream the data to either a client or to
   * another datanode.
   *
   * @param out stream to which the block is written to
   * @param baseStream optional. if non-null, <code>out</code> is assumed to be a wrapper over this
   *     stream. This enables optimizations for sending the data, e.g. {@link
   *     SocketOutputStream#transferToFully(FileChannel, long, int)}.
   * @param throttler for sending data.
   * @return total bytes reads, including crc.
   */
  long sendBlock(DataOutputStream out, OutputStream baseStream, DataTransferThrottler throttler)
      throws IOException {
    if (out == null) {
      throw new IOException("out stream is null");
    }
    this.throttler = throttler;

    long initialOffset = offset;
    long totalRead = 0;
    OutputStream streamForSendChunks = out;

    final long startTime = ClientTraceLog.isInfoEnabled() ? System.nanoTime() : 0;
    try {
      try {
        checksum.writeHeader(out);
        if (chunkOffsetOK) {
          out.writeLong(offset);
        }
        out.flush();
      } catch (IOException e) { // socket error
        throw ioeToSocketException(e);
      }

      int maxChunksPerPacket;
      int pktSize = PacketHeader.PKT_HEADER_LEN;

      if (transferToAllowed
          && !verifyChecksum
          && baseStream instanceof SocketOutputStream
          && blockIn instanceof FileInputStream) {

        FileChannel fileChannel = ((FileInputStream) blockIn).getChannel();

        // blockInPosition also indicates sendChunks() uses transferTo.
        blockInPosition = fileChannel.position();
        streamForSendChunks = baseStream;

        // assure a mininum buffer size.
        maxChunksPerPacket =
            (Math.max(BUFFER_SIZE, MIN_BUFFER_WITH_TRANSFERTO) + bytesPerChecksum - 1)
                / bytesPerChecksum;

        // allocate smaller buffer while using transferTo().
        pktSize += checksumSize * maxChunksPerPacket;
      } else {
        maxChunksPerPacket = Math.max(1, (BUFFER_SIZE + bytesPerChecksum - 1) / bytesPerChecksum);
        pktSize += (bytesPerChecksum + checksumSize) * maxChunksPerPacket;
      }

      ByteBuffer pktBuf = ByteBuffer.allocate(pktSize);

      while (endOffset > offset) {
        long len = sendChunks(pktBuf, maxChunksPerPacket, streamForSendChunks);
        offset += len;
        totalRead += len + ((len + bytesPerChecksum - 1) / bytesPerChecksum * checksumSize);
        seqno++;
      }
      try {
        // send an empty packet to mark the end of the block
        sendChunks(pktBuf, maxChunksPerPacket, streamForSendChunks);
        out.flush();
      } catch (IOException e) { // socket error
        throw ioeToSocketException(e);
      }

      sentEntireByteRange = true;
    } finally {
      if (clientTraceFmt != null) {
        final long endTime = System.nanoTime();
        ClientTraceLog.info(
            String.format(clientTraceFmt, totalRead, initialOffset, endTime - startTime));
      }
      close();
    }

    blockReadFully = initialOffset == 0 && offset >= replicaVisibleLength;

    return totalRead;
  }
  /**
   * Sends upto maxChunks chunks of data.
   *
   * <p>When blockInPosition is >= 0, assumes 'out' is a {@link SocketOutputStream} and tries {@link
   * SocketOutputStream#transferToFully(FileChannel, long, int)} to send data (and updates
   * blockInPosition).
   */
  private int sendChunks(ByteBuffer pkt, int maxChunks, OutputStream out) throws IOException {
    // Sends multiple chunks in one packet with a single write().

    int len = (int) Math.min(endOffset - offset, (((long) bytesPerChecksum) * ((long) maxChunks)));
    int numChunks = (len + bytesPerChecksum - 1) / bytesPerChecksum;
    int packetLen = len + numChunks * checksumSize + 4;
    boolean lastDataPacket = offset + len == endOffset && len > 0;
    pkt.clear();

    PacketHeader header = new PacketHeader(packetLen, offset, seqno, (len == 0), len);
    header.putInBuffer(pkt);

    int checksumOff = pkt.position();
    int checksumLen = numChunks * checksumSize;
    byte[] buf = pkt.array();

    if (checksumSize > 0 && checksumIn != null) {
      try {
        checksumIn.readFully(buf, checksumOff, checksumLen);
      } catch (IOException e) {
        LOG.warn(
            " Could not read or failed to veirfy checksum for data"
                + " at offset "
                + offset
                + " for block "
                + block
                + " got : "
                + StringUtils.stringifyException(e));
        IOUtils.closeStream(checksumIn);
        checksumIn = null;
        if (corruptChecksumOk) {
          if (checksumOff < checksumLen) {
            // Just fill the array with zeros.
            Arrays.fill(buf, checksumOff, checksumLen, (byte) 0);
          }
        } else {
          throw e;
        }
      }

      // write in progress that we need to use to get last checksum
      if (lastDataPacket && lastChunkChecksum != null) {
        int start = checksumOff + checksumLen - checksumSize;
        byte[] updatedChecksum = lastChunkChecksum.getChecksum();

        if (updatedChecksum != null) {
          System.arraycopy(updatedChecksum, 0, buf, start, checksumSize);
        }
      }
    }

    int dataOff = checksumOff + checksumLen;

    if (blockInPosition < 0) {
      // normal transfer
      IOUtils.readFully(blockIn, buf, dataOff, len);

      if (verifyChecksum) {
        int dOff = dataOff;
        int cOff = checksumOff;
        int dLeft = len;

        for (int i = 0; i < numChunks; i++) {
          checksum.reset();
          int dLen = Math.min(dLeft, bytesPerChecksum);
          checksum.update(buf, dOff, dLen);
          if (!checksum.compare(buf, cOff)) {
            long failedPos = offset + len - dLeft;
            throw new ChecksumException("Checksum failed at " + failedPos, failedPos);
          }
          dLeft -= dLen;
          dOff += dLen;
          cOff += checksumSize;
        }
      }
      // writing is done below (mainly to handle IOException)
    }

    try {
      if (blockInPosition >= 0) {
        // use transferTo(). Checks on out and blockIn are already done.

        SocketOutputStream sockOut = (SocketOutputStream) out;
        // first write the packet
        sockOut.write(buf, 0, dataOff);
        // no need to flush. since we know out is not a buffered stream.

        sockOut.transferToFully(((FileInputStream) blockIn).getChannel(), blockInPosition, len);

        blockInPosition += len;
      } else {
        // normal transfer
        out.write(buf, 0, dataOff + len);
      }

    } catch (IOException e) {
      /* Exception while writing to the client. Connection closure from
       * the other end is mostly the case and we do not care much about
       * it. But other things can go wrong, especially in transferTo(),
       * which we do not want to ignore.
       *
       * The message parsing below should not be considered as a good
       * coding example. NEVER do it to drive a program logic. NEVER.
       * It was done here because the NIO throws an IOException for EPIPE.
       */
      String ioem = e.getMessage();
      if (!ioem.startsWith("Broken pipe") && !ioem.startsWith("Connection reset")) {
        LOG.error("BlockSender.sendChunks() exception: ", e);
      }
      throw ioeToSocketException(e);
    }

    if (throttler != null) { // rebalancing so throttle
      throttler.throttle(packetLen);
    }

    return len;
  }
  BlockSender(
      Block block,
      long startOffset,
      long length,
      boolean corruptChecksumOk,
      boolean chunkOffsetOK,
      boolean verifyChecksum,
      DataNode datanode,
      String clientTraceFmt)
      throws IOException {
    try {
      this.block = block;
      synchronized (datanode.data) {
        this.replica = datanode.data.getReplica(block.getBlockId());
        if (replica == null) {
          throw new ReplicaNotFoundException(block);
        }
        this.replicaVisibleLength = replica.getVisibleLength();
      }
      long minEndOffset = startOffset + length;
      // if this is a write in progress
      ChunkChecksum chunkChecksum = null;
      if (replica instanceof ReplicaBeingWritten) {
        for (int i = 0; i < 30 && replica.getBytesOnDisk() < minEndOffset; i++) {
          try {
            Thread.sleep(100);
          } catch (InterruptedException ie) {
            throw new IOException(ie);
          }
        }

        long currentBytesOnDisk = replica.getBytesOnDisk();

        if (currentBytesOnDisk < minEndOffset) {
          throw new IOException(
              String.format(
                  "need %d bytes, but only %d bytes available", minEndOffset, currentBytesOnDisk));
        }

        ReplicaInPipeline rip = (ReplicaInPipeline) replica;
        chunkChecksum = rip.getLastChecksumAndDataLen();
      }

      if (replica.getGenerationStamp() < block.getGenerationStamp()) {
        throw new IOException(
            "replica.getGenerationStamp() < block.getGenerationStamp(), block="
                + block
                + ", replica="
                + replica);
      }
      if (replicaVisibleLength < 0) {
        throw new IOException(
            "The replica is not readable, block=" + block + ", replica=" + replica);
      }
      if (DataNode.LOG.isDebugEnabled()) {
        DataNode.LOG.debug("block=" + block + ", replica=" + replica);
      }

      this.chunkOffsetOK = chunkOffsetOK;
      this.corruptChecksumOk = corruptChecksumOk;
      this.verifyChecksum = verifyChecksum;

      // transferToFully() fails on 32 bit platforms for block sizes >= 2GB,
      // use normal transfer in those cases
      this.transferToAllowed =
          datanode.transferToAllowed && (!is32Bit || length < (long) Integer.MAX_VALUE);
      this.clientTraceFmt = clientTraceFmt;

      if (!corruptChecksumOk || datanode.data.metaFileExists(block)) {
        checksumIn =
            new DataInputStream(
                new BufferedInputStream(datanode.data.getMetaDataInputStream(block), BUFFER_SIZE));

        // read and handle the common header here. For now just a version
        BlockMetadataHeader header = BlockMetadataHeader.readHeader(checksumIn);
        short version = header.getVersion();

        if (version != FSDataset.METADATA_VERSION) {
          LOG.warn(
              "Wrong version (" + version + ") for metadata file for " + block + " ignoring ...");
        }
        checksum = header.getChecksum();
      } else {
        LOG.warn("Could not find metadata file for " + block);
        // This only decides the buffer size. Use BUFFER_SIZE?
        checksum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_NULL, 16 * 1024);
      }

      /* If bytesPerChecksum is very large, then the metadata file
       * is mostly corrupted. For now just truncate bytesPerchecksum to
       * blockLength.
       */
      bytesPerChecksum = checksum.getBytesPerChecksum();
      if (bytesPerChecksum > 10 * 1024 * 1024 && bytesPerChecksum > replicaVisibleLength) {
        checksum =
            DataChecksum.newDataChecksum(
                checksum.getChecksumType(), Math.max((int) replicaVisibleLength, 10 * 1024 * 1024));
        bytesPerChecksum = checksum.getBytesPerChecksum();
      }
      checksumSize = checksum.getChecksumSize();

      if (length < 0) {
        length = replicaVisibleLength;
      }

      // end is either last byte on disk or the length for which we have a
      // checksum
      if (chunkChecksum != null) {
        endOffset = chunkChecksum.getDataLength();
      } else {
        endOffset = replica.getBytesOnDisk();
      }

      if (startOffset < 0 || startOffset > endOffset || (length + startOffset) > endOffset) {
        String msg =
            " Offset "
                + startOffset
                + " and length "
                + length
                + " don't match block "
                + block
                + " ( blockLen "
                + endOffset
                + " )";
        LOG.warn(datanode.dnRegistration + ":sendBlock() : " + msg);
        throw new IOException(msg);
      }

      offset = (startOffset - (startOffset % bytesPerChecksum));
      if (length >= 0) {
        // Make sure endOffset points to end of a checksumed chunk.
        long tmpLen = startOffset + length;
        if (tmpLen % bytesPerChecksum != 0) {
          tmpLen += (bytesPerChecksum - tmpLen % bytesPerChecksum);
        }
        if (tmpLen < endOffset) {
          // will use on-disk checksum here since the end is a stable chunk
          endOffset = tmpLen;
        } else if (chunkChecksum != null) {
          // in last chunk which is changing. flag that we need to use in-memory
          // checksum
          this.lastChunkChecksum = chunkChecksum;
        }
      }

      // seek to the right offsets
      if (offset > 0) {
        long checksumSkip = (offset / bytesPerChecksum) * checksumSize;
        // note blockInStream is seeked when created below
        if (checksumSkip > 0) {
          // Should we use seek() for checksum file as well?
          IOUtils.skipFully(checksumIn, checksumSkip);
        }
      }
      seqno = 0;

      if (DataNode.LOG.isDebugEnabled()) {
        DataNode.LOG.debug("replica=" + replica);
      }

      blockIn = datanode.data.getBlockInputStream(block, offset); // seek to offset
    } catch (IOException ioe) {
      IOUtils.closeStream(this);
      IOUtils.closeStream(blockIn);
      throw ioe;
    }
  }