int addSomeBlocks(SimulatedFSDataset fsdataset, int startingBlockId) throws IOException { int bytesAdded = 0; for (int i = startingBlockId; i < startingBlockId + NUMBLOCKS; ++i) { ExtendedBlock b = new ExtendedBlock(bpid, i, 0, 0); // we pass expected len as zero, - fsdataset should use the sizeof actual // data written ReplicaInPipelineInterface bInfo = fsdataset.createRbw(StorageType.DEFAULT, b, false).getReplica(); ReplicaOutputStreams out = bInfo.createStreams(true, DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, 512)); try { OutputStream dataOut = out.getDataOut(); assertEquals(0, fsdataset.getLength(b)); for (int j = 1; j <= blockIdToLen(i); ++j) { dataOut.write(j); assertEquals(j, bInfo.getBytesOnDisk()); // correct length even as we write bytesAdded++; } } finally { out.close(); } b.setNumBytes(blockIdToLen(i)); fsdataset.finalizeBlock(b); assertEquals(blockIdToLen(i), fsdataset.getLength(b)); } return bytesAdded; }
private static void truncateBlock(File blockFile, File metaFile, long oldlen, long newlen) throws IOException { if (newlen == oldlen) { return; } if (newlen > oldlen) { throw new IOException( "Cannout truncate block to from oldlen (=" + oldlen + ") to newlen (=" + newlen + ")"); } DataChecksum dcs = BlockMetadataHeader.readHeader(metaFile).getChecksum(); int checksumsize = dcs.getChecksumSize(); int bpc = dcs.getBytesPerChecksum(); long n = (newlen - 1) / bpc + 1; long newmetalen = BlockMetadataHeader.getHeaderSize() + n * checksumsize; long lastchunkoffset = (n - 1) * bpc; int lastchunksize = (int) (newlen - lastchunkoffset); byte[] b = new byte[Math.max(lastchunksize, checksumsize)]; RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw"); try { // truncate blockFile blockRAF.setLength(newlen); // read last chunk blockRAF.seek(lastchunkoffset); blockRAF.readFully(b, 0, lastchunksize); } finally { blockRAF.close(); } // compute checksum dcs.update(b, 0, lastchunksize); dcs.writeValue(b, 0, false); // update metaFile RandomAccessFile metaRAF = new RandomAccessFile(metaFile, "rw"); try { metaRAF.setLength(newmetalen); metaRAF.seek(newmetalen - checksumsize); metaRAF.write(b, 0, checksumsize); } finally { metaRAF.close(); } }
public void testGetMetaData() throws IOException { final SimulatedFSDataset fsdataset = getSimulatedFSDataset(); ExtendedBlock b = new ExtendedBlock(bpid, 1, 5, 0); try { assertTrue(fsdataset.getMetaDataInputStream(b) == null); assertTrue("Expected an IO exception", false); } catch (IOException e) { // ok - as expected } addSomeBlocks(fsdataset); // Only need to add one but .... b = new ExtendedBlock(bpid, 1, 0, 0); InputStream metaInput = fsdataset.getMetaDataInputStream(b); DataInputStream metaDataInput = new DataInputStream(metaInput); short version = metaDataInput.readShort(); assertEquals(BlockMetadataHeader.VERSION, version); DataChecksum checksum = DataChecksum.newDataChecksum(metaDataInput); assertEquals(DataChecksum.CHECKSUM_NULL, checksum.getChecksumType()); assertEquals(0, checksum.getChecksumSize()); }
/** * Test that when there is a failure replicating a block the temporary and meta files are cleaned * up and subsequent replication succeeds. */ @Test public void testReplicationError() throws Exception { // create a file of replication factor of 1 final Path fileName = new Path("/test.txt"); final int fileLen = 1; DFSTestUtil.createFile(fs, fileName, 1, (short) 1, 1L); DFSTestUtil.waitReplication(fs, fileName, (short) 1); // get the block belonged to the created file LocatedBlocks blocks = NameNodeAdapter.getBlockLocations( cluster.getNameNode(), fileName.toString(), 0, (long) fileLen); assertEquals("Should only find 1 block", blocks.locatedBlockCount(), 1); LocatedBlock block = blocks.get(0); // bring up a second datanode cluster.startDataNodes(conf, 1, true, null, null); cluster.waitActive(); final int sndNode = 1; DataNode datanode = cluster.getDataNodes().get(sndNode); // replicate the block to the second datanode InetSocketAddress target = datanode.getXferAddress(); Socket s = new Socket(target.getAddress(), target.getPort()); // write the header. DataOutputStream out = new DataOutputStream(s.getOutputStream()); DataChecksum checksum = DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, 512); new Sender(out) .writeBlock( block.getBlock(), StorageType.DEFAULT, BlockTokenSecretManager.DUMMY_TOKEN, "", new DatanodeInfo[0], new StorageType[0], null, BlockConstructionStage.PIPELINE_SETUP_CREATE, 1, 0L, 0L, 0L, checksum, CachingStrategy.newDefaultStrategy(), false); out.flush(); // close the connection before sending the content of the block out.close(); // the temporary block & meta files should be deleted String bpid = cluster.getNamesystem().getBlockPoolId(); File storageDir = cluster.getInstanceStorageDir(sndNode, 0); File dir1 = MiniDFSCluster.getRbwDir(storageDir, bpid); storageDir = cluster.getInstanceStorageDir(sndNode, 1); File dir2 = MiniDFSCluster.getRbwDir(storageDir, bpid); while (dir1.listFiles().length != 0 || dir2.listFiles().length != 0) { Thread.sleep(100); } // then increase the file's replication factor fs.setReplication(fileName, (short) 2); // replication should succeed DFSTestUtil.waitReplication(fs, fileName, (short) 1); // clean up the file fs.delete(fileName, false); }
/** * Find out the number of bytes in the block that match its crc. * * <p>This algorithm assumes that data corruption caused by unexpected datanode shutdown occurs * only in the last crc chunk. So it checks only the last chunk. * * @param blockFile the block file * @param genStamp generation stamp of the block * @return the number of valid bytes */ private long validateIntegrityAndSetLength(File blockFile, long genStamp) { DataInputStream checksumIn = null; InputStream blockIn = null; try { final File metaFile = FsDatasetUtil.getMetaFile(blockFile, genStamp); long blockFileLen = blockFile.length(); long metaFileLen = metaFile.length(); int crcHeaderLen = DataChecksum.getChecksumHeaderSize(); if (!blockFile.exists() || blockFileLen == 0 || !metaFile.exists() || metaFileLen < crcHeaderLen) { return 0; } checksumIn = new DataInputStream( new BufferedInputStream( new FileInputStream(metaFile), HdfsConstants.IO_FILE_BUFFER_SIZE)); // read and handle the common header here. For now just a version final DataChecksum checksum = BlockMetadataHeader.readDataChecksum(checksumIn, metaFile); int bytesPerChecksum = checksum.getBytesPerChecksum(); int checksumSize = checksum.getChecksumSize(); long numChunks = Math.min( (blockFileLen + bytesPerChecksum - 1) / bytesPerChecksum, (metaFileLen - crcHeaderLen) / checksumSize); if (numChunks == 0) { return 0; } IOUtils.skipFully(checksumIn, (numChunks - 1) * checksumSize); blockIn = new FileInputStream(blockFile); long lastChunkStartPos = (numChunks - 1) * bytesPerChecksum; IOUtils.skipFully(blockIn, lastChunkStartPos); int lastChunkSize = (int) Math.min(bytesPerChecksum, blockFileLen - lastChunkStartPos); byte[] buf = new byte[lastChunkSize + checksumSize]; checksumIn.readFully(buf, lastChunkSize, checksumSize); IOUtils.readFully(blockIn, buf, 0, lastChunkSize); checksum.update(buf, 0, lastChunkSize); long validFileLength; if (checksum.compare(buf, lastChunkSize)) { // last chunk matches crc validFileLength = lastChunkStartPos + lastChunkSize; } else { // last chunck is corrupt validFileLength = lastChunkStartPos; } // truncate if extra bytes are present without CRC if (blockFile.length() > validFileLength) { RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw"); try { // truncate blockFile blockRAF.setLength(validFileLength); } finally { blockRAF.close(); } } return validFileLength; } catch (IOException e) { FsDatasetImpl.LOG.warn(e); return 0; } finally { IOUtils.closeStream(checksumIn); IOUtils.closeStream(blockIn); } }
/** * sendBlock() is used to read block and its metadata and stream the data to either a client or to * another datanode. * * @param out stream to which the block is written to * @param baseStream optional. if non-null, <code>out</code> is assumed to be a wrapper over this * stream. This enables optimizations for sending the data, e.g. {@link * SocketOutputStream#transferToFully(FileChannel, long, int)}. * @param throttler for sending data. * @return total bytes reads, including crc. */ long sendBlock(DataOutputStream out, OutputStream baseStream, DataTransferThrottler throttler) throws IOException { if (out == null) { throw new IOException("out stream is null"); } this.throttler = throttler; long initialOffset = offset; long totalRead = 0; OutputStream streamForSendChunks = out; final long startTime = ClientTraceLog.isInfoEnabled() ? System.nanoTime() : 0; try { try { checksum.writeHeader(out); if (chunkOffsetOK) { out.writeLong(offset); } out.flush(); } catch (IOException e) { // socket error throw ioeToSocketException(e); } int maxChunksPerPacket; int pktSize = PacketHeader.PKT_HEADER_LEN; if (transferToAllowed && !verifyChecksum && baseStream instanceof SocketOutputStream && blockIn instanceof FileInputStream) { FileChannel fileChannel = ((FileInputStream) blockIn).getChannel(); // blockInPosition also indicates sendChunks() uses transferTo. blockInPosition = fileChannel.position(); streamForSendChunks = baseStream; // assure a mininum buffer size. maxChunksPerPacket = (Math.max(BUFFER_SIZE, MIN_BUFFER_WITH_TRANSFERTO) + bytesPerChecksum - 1) / bytesPerChecksum; // allocate smaller buffer while using transferTo(). pktSize += checksumSize * maxChunksPerPacket; } else { maxChunksPerPacket = Math.max(1, (BUFFER_SIZE + bytesPerChecksum - 1) / bytesPerChecksum); pktSize += (bytesPerChecksum + checksumSize) * maxChunksPerPacket; } ByteBuffer pktBuf = ByteBuffer.allocate(pktSize); while (endOffset > offset) { long len = sendChunks(pktBuf, maxChunksPerPacket, streamForSendChunks); offset += len; totalRead += len + ((len + bytesPerChecksum - 1) / bytesPerChecksum * checksumSize); seqno++; } try { // send an empty packet to mark the end of the block sendChunks(pktBuf, maxChunksPerPacket, streamForSendChunks); out.flush(); } catch (IOException e) { // socket error throw ioeToSocketException(e); } sentEntireByteRange = true; } finally { if (clientTraceFmt != null) { final long endTime = System.nanoTime(); ClientTraceLog.info( String.format(clientTraceFmt, totalRead, initialOffset, endTime - startTime)); } close(); } blockReadFully = initialOffset == 0 && offset >= replicaVisibleLength; return totalRead; }
/** * Sends upto maxChunks chunks of data. * * <p>When blockInPosition is >= 0, assumes 'out' is a {@link SocketOutputStream} and tries {@link * SocketOutputStream#transferToFully(FileChannel, long, int)} to send data (and updates * blockInPosition). */ private int sendChunks(ByteBuffer pkt, int maxChunks, OutputStream out) throws IOException { // Sends multiple chunks in one packet with a single write(). int len = (int) Math.min(endOffset - offset, (((long) bytesPerChecksum) * ((long) maxChunks))); int numChunks = (len + bytesPerChecksum - 1) / bytesPerChecksum; int packetLen = len + numChunks * checksumSize + 4; boolean lastDataPacket = offset + len == endOffset && len > 0; pkt.clear(); PacketHeader header = new PacketHeader(packetLen, offset, seqno, (len == 0), len); header.putInBuffer(pkt); int checksumOff = pkt.position(); int checksumLen = numChunks * checksumSize; byte[] buf = pkt.array(); if (checksumSize > 0 && checksumIn != null) { try { checksumIn.readFully(buf, checksumOff, checksumLen); } catch (IOException e) { LOG.warn( " Could not read or failed to veirfy checksum for data" + " at offset " + offset + " for block " + block + " got : " + StringUtils.stringifyException(e)); IOUtils.closeStream(checksumIn); checksumIn = null; if (corruptChecksumOk) { if (checksumOff < checksumLen) { // Just fill the array with zeros. Arrays.fill(buf, checksumOff, checksumLen, (byte) 0); } } else { throw e; } } // write in progress that we need to use to get last checksum if (lastDataPacket && lastChunkChecksum != null) { int start = checksumOff + checksumLen - checksumSize; byte[] updatedChecksum = lastChunkChecksum.getChecksum(); if (updatedChecksum != null) { System.arraycopy(updatedChecksum, 0, buf, start, checksumSize); } } } int dataOff = checksumOff + checksumLen; if (blockInPosition < 0) { // normal transfer IOUtils.readFully(blockIn, buf, dataOff, len); if (verifyChecksum) { int dOff = dataOff; int cOff = checksumOff; int dLeft = len; for (int i = 0; i < numChunks; i++) { checksum.reset(); int dLen = Math.min(dLeft, bytesPerChecksum); checksum.update(buf, dOff, dLen); if (!checksum.compare(buf, cOff)) { long failedPos = offset + len - dLeft; throw new ChecksumException("Checksum failed at " + failedPos, failedPos); } dLeft -= dLen; dOff += dLen; cOff += checksumSize; } } // writing is done below (mainly to handle IOException) } try { if (blockInPosition >= 0) { // use transferTo(). Checks on out and blockIn are already done. SocketOutputStream sockOut = (SocketOutputStream) out; // first write the packet sockOut.write(buf, 0, dataOff); // no need to flush. since we know out is not a buffered stream. sockOut.transferToFully(((FileInputStream) blockIn).getChannel(), blockInPosition, len); blockInPosition += len; } else { // normal transfer out.write(buf, 0, dataOff + len); } } catch (IOException e) { /* Exception while writing to the client. Connection closure from * the other end is mostly the case and we do not care much about * it. But other things can go wrong, especially in transferTo(), * which we do not want to ignore. * * The message parsing below should not be considered as a good * coding example. NEVER do it to drive a program logic. NEVER. * It was done here because the NIO throws an IOException for EPIPE. */ String ioem = e.getMessage(); if (!ioem.startsWith("Broken pipe") && !ioem.startsWith("Connection reset")) { LOG.error("BlockSender.sendChunks() exception: ", e); } throw ioeToSocketException(e); } if (throttler != null) { // rebalancing so throttle throttler.throttle(packetLen); } return len; }
BlockSender( Block block, long startOffset, long length, boolean corruptChecksumOk, boolean chunkOffsetOK, boolean verifyChecksum, DataNode datanode, String clientTraceFmt) throws IOException { try { this.block = block; synchronized (datanode.data) { this.replica = datanode.data.getReplica(block.getBlockId()); if (replica == null) { throw new ReplicaNotFoundException(block); } this.replicaVisibleLength = replica.getVisibleLength(); } long minEndOffset = startOffset + length; // if this is a write in progress ChunkChecksum chunkChecksum = null; if (replica instanceof ReplicaBeingWritten) { for (int i = 0; i < 30 && replica.getBytesOnDisk() < minEndOffset; i++) { try { Thread.sleep(100); } catch (InterruptedException ie) { throw new IOException(ie); } } long currentBytesOnDisk = replica.getBytesOnDisk(); if (currentBytesOnDisk < minEndOffset) { throw new IOException( String.format( "need %d bytes, but only %d bytes available", minEndOffset, currentBytesOnDisk)); } ReplicaInPipeline rip = (ReplicaInPipeline) replica; chunkChecksum = rip.getLastChecksumAndDataLen(); } if (replica.getGenerationStamp() < block.getGenerationStamp()) { throw new IOException( "replica.getGenerationStamp() < block.getGenerationStamp(), block=" + block + ", replica=" + replica); } if (replicaVisibleLength < 0) { throw new IOException( "The replica is not readable, block=" + block + ", replica=" + replica); } if (DataNode.LOG.isDebugEnabled()) { DataNode.LOG.debug("block=" + block + ", replica=" + replica); } this.chunkOffsetOK = chunkOffsetOK; this.corruptChecksumOk = corruptChecksumOk; this.verifyChecksum = verifyChecksum; // transferToFully() fails on 32 bit platforms for block sizes >= 2GB, // use normal transfer in those cases this.transferToAllowed = datanode.transferToAllowed && (!is32Bit || length < (long) Integer.MAX_VALUE); this.clientTraceFmt = clientTraceFmt; if (!corruptChecksumOk || datanode.data.metaFileExists(block)) { checksumIn = new DataInputStream( new BufferedInputStream(datanode.data.getMetaDataInputStream(block), BUFFER_SIZE)); // read and handle the common header here. For now just a version BlockMetadataHeader header = BlockMetadataHeader.readHeader(checksumIn); short version = header.getVersion(); if (version != FSDataset.METADATA_VERSION) { LOG.warn( "Wrong version (" + version + ") for metadata file for " + block + " ignoring ..."); } checksum = header.getChecksum(); } else { LOG.warn("Could not find metadata file for " + block); // This only decides the buffer size. Use BUFFER_SIZE? checksum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_NULL, 16 * 1024); } /* If bytesPerChecksum is very large, then the metadata file * is mostly corrupted. For now just truncate bytesPerchecksum to * blockLength. */ bytesPerChecksum = checksum.getBytesPerChecksum(); if (bytesPerChecksum > 10 * 1024 * 1024 && bytesPerChecksum > replicaVisibleLength) { checksum = DataChecksum.newDataChecksum( checksum.getChecksumType(), Math.max((int) replicaVisibleLength, 10 * 1024 * 1024)); bytesPerChecksum = checksum.getBytesPerChecksum(); } checksumSize = checksum.getChecksumSize(); if (length < 0) { length = replicaVisibleLength; } // end is either last byte on disk or the length for which we have a // checksum if (chunkChecksum != null) { endOffset = chunkChecksum.getDataLength(); } else { endOffset = replica.getBytesOnDisk(); } if (startOffset < 0 || startOffset > endOffset || (length + startOffset) > endOffset) { String msg = " Offset " + startOffset + " and length " + length + " don't match block " + block + " ( blockLen " + endOffset + " )"; LOG.warn(datanode.dnRegistration + ":sendBlock() : " + msg); throw new IOException(msg); } offset = (startOffset - (startOffset % bytesPerChecksum)); if (length >= 0) { // Make sure endOffset points to end of a checksumed chunk. long tmpLen = startOffset + length; if (tmpLen % bytesPerChecksum != 0) { tmpLen += (bytesPerChecksum - tmpLen % bytesPerChecksum); } if (tmpLen < endOffset) { // will use on-disk checksum here since the end is a stable chunk endOffset = tmpLen; } else if (chunkChecksum != null) { // in last chunk which is changing. flag that we need to use in-memory // checksum this.lastChunkChecksum = chunkChecksum; } } // seek to the right offsets if (offset > 0) { long checksumSkip = (offset / bytesPerChecksum) * checksumSize; // note blockInStream is seeked when created below if (checksumSkip > 0) { // Should we use seek() for checksum file as well? IOUtils.skipFully(checksumIn, checksumSkip); } } seqno = 0; if (DataNode.LOG.isDebugEnabled()) { DataNode.LOG.debug("replica=" + replica); } blockIn = datanode.data.getBlockInputStream(block, offset); // seek to offset } catch (IOException ioe) { IOUtils.closeStream(this); IOUtils.closeStream(blockIn); throw ioe; } }