/** * Test if {@link FSNamesystem#handleHeartbeat} correctly selects data node targets for block * recovery. */ @Test public void testHeartbeatBlockRecovery() throws Exception { final Configuration conf = new HdfsConfiguration(); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); try { cluster.waitActive(); final FSNamesystem namesystem = cluster.getNamesystem(); final HeartbeatManager hm = namesystem.getBlockManager().getDatanodeManager().getHeartbeatManager(); final String poolId = namesystem.getBlockPoolId(); final DatanodeRegistration nodeReg1 = DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId); final DatanodeDescriptor dd1 = NameNodeAdapter.getDatanode(namesystem, nodeReg1); dd1.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid())); final DatanodeRegistration nodeReg2 = DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(1), poolId); final DatanodeDescriptor dd2 = NameNodeAdapter.getDatanode(namesystem, nodeReg2); dd2.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid())); final DatanodeRegistration nodeReg3 = DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(2), poolId); final DatanodeDescriptor dd3 = NameNodeAdapter.getDatanode(namesystem, nodeReg3); dd3.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid())); try { namesystem.writeLock(); synchronized (hm) { NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem); NameNodeAdapter.sendHeartBeat(nodeReg2, dd2, namesystem); NameNodeAdapter.sendHeartBeat(nodeReg3, dd3, namesystem); // Test with all alive nodes. DFSTestUtil.resetLastUpdatesWithOffset(dd1, 0); DFSTestUtil.resetLastUpdatesWithOffset(dd2, 0); DFSTestUtil.resetLastUpdatesWithOffset(dd3, 0); final DatanodeStorageInfo[] storages = { dd1.getStorageInfos()[0], dd2.getStorageInfos()[0], dd3.getStorageInfos()[0] }; BlockInfo blockInfo = new BlockInfoContiguous( new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3); blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages); dd1.addBlockToBeRecovered(blockInfo); DatanodeCommand[] cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands(); assertEquals(1, cmds.length); assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction()); BlockRecoveryCommand recoveryCommand = (BlockRecoveryCommand) cmds[0]; assertEquals(1, recoveryCommand.getRecoveringBlocks().size()); DatanodeInfo[] recoveringNodes = recoveryCommand .getRecoveringBlocks() .toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0] .getLocations(); assertEquals(3, recoveringNodes.length); assertEquals(recoveringNodes[0], dd1); assertEquals(recoveringNodes[1], dd2); assertEquals(recoveringNodes[2], dd3); // Test with one stale node. DFSTestUtil.resetLastUpdatesWithOffset(dd1, 0); // More than the default stale interval of 30 seconds. DFSTestUtil.resetLastUpdatesWithOffset(dd2, -40 * 1000); DFSTestUtil.resetLastUpdatesWithOffset(dd3, 0); blockInfo = new BlockInfoContiguous( new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3); blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages); dd1.addBlockToBeRecovered(blockInfo); cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands(); assertEquals(1, cmds.length); assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction()); recoveryCommand = (BlockRecoveryCommand) cmds[0]; assertEquals(1, recoveryCommand.getRecoveringBlocks().size()); recoveringNodes = recoveryCommand .getRecoveringBlocks() .toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0] .getLocations(); assertEquals(2, recoveringNodes.length); // dd2 is skipped. assertEquals(recoveringNodes[0], dd1); assertEquals(recoveringNodes[1], dd3); // Test with all stale node. DFSTestUtil.resetLastUpdatesWithOffset(dd1, -60 * 1000); // More than the default stale interval of 30 seconds. DFSTestUtil.resetLastUpdatesWithOffset(dd2, -40 * 1000); DFSTestUtil.resetLastUpdatesWithOffset(dd3, -80 * 1000); blockInfo = new BlockInfoContiguous( new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3); blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages); dd1.addBlockToBeRecovered(blockInfo); cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands(); assertEquals(1, cmds.length); assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction()); recoveryCommand = (BlockRecoveryCommand) cmds[0]; assertEquals(1, recoveryCommand.getRecoveringBlocks().size()); recoveringNodes = recoveryCommand .getRecoveringBlocks() .toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0] .getLocations(); // Only dd1 is included since it heart beated and hence its not stale // when the list of recovery blocks is constructed. assertEquals(3, recoveringNodes.length); assertEquals(recoveringNodes[0], dd1); assertEquals(recoveringNodes[1], dd2); assertEquals(recoveringNodes[2], dd3); } } finally { namesystem.writeUnlock(); } } finally { cluster.shutdown(); } }
/** * Test if {@link FSNamesystem#handleHeartbeat} can pick up replication and/or invalidate requests * and observes the max limit */ @Test public void testHeartbeat() throws Exception { final Configuration conf = new HdfsConfiguration(); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); try { cluster.waitActive(); final FSNamesystem namesystem = cluster.getNamesystem(); final HeartbeatManager hm = namesystem.getBlockManager().getDatanodeManager().getHeartbeatManager(); final String poolId = namesystem.getBlockPoolId(); final DatanodeRegistration nodeReg = DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId); final DatanodeDescriptor dd = NameNodeAdapter.getDatanode(namesystem, nodeReg); final String storageID = DatanodeStorage.generateUuid(); dd.updateStorage(new DatanodeStorage(storageID)); final int REMAINING_BLOCKS = 1; final int MAX_REPLICATE_LIMIT = conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 2); final int MAX_INVALIDATE_LIMIT = DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_DEFAULT; final int MAX_INVALIDATE_BLOCKS = 2 * MAX_INVALIDATE_LIMIT + REMAINING_BLOCKS; final int MAX_REPLICATE_BLOCKS = 2 * MAX_REPLICATE_LIMIT + REMAINING_BLOCKS; final DatanodeStorageInfo[] ONE_TARGET = {dd.getStorageInfo(storageID)}; try { namesystem.writeLock(); synchronized (hm) { for (int i = 0; i < MAX_REPLICATE_BLOCKS; i++) { dd.addBlockToBeReplicated( new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP), ONE_TARGET); } DatanodeCommand[] cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands(); assertEquals(1, cmds.length); assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction()); assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand) cmds[0]).getBlocks().length); ArrayList<Block> blockList = new ArrayList<Block>(MAX_INVALIDATE_BLOCKS); for (int i = 0; i < MAX_INVALIDATE_BLOCKS; i++) { blockList.add(new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP)); } dd.addBlocksToBeInvalidated(blockList); cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands(); assertEquals(2, cmds.length); assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction()); assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand) cmds[0]).getBlocks().length); assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction()); assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand) cmds[1]).getBlocks().length); cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands(); assertEquals(2, cmds.length); assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction()); assertEquals(REMAINING_BLOCKS, ((BlockCommand) cmds[0]).getBlocks().length); assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction()); assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand) cmds[1]).getBlocks().length); cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands(); assertEquals(1, cmds.length); assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[0].getAction()); assertEquals(REMAINING_BLOCKS, ((BlockCommand) cmds[0]).getBlocks().length); cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands(); assertEquals(0, cmds.length); } } finally { namesystem.writeUnlock(); } } finally { cluster.shutdown(); } }
/** * Test the case that a replica is reported corrupt while it is not in blocksMap. Make sure that * ArrayIndexOutOfBounds does not thrown. See Hadoop-4351. * * <p>TODO HOPS This test fails as it tries to remove a non-existing replica. Calling * findAndMarkBlockAsCorrupt from a DataNode that does not store any replica for this specific * block will lead to a tuple did not exist exception. The reason for this is that * BlockManager.removeStoredBlock is called with a node that does not store a replica and hence * the delete will not be able to succeed during commit. */ @Test public void testArrayOutOfBoundsException() throws Exception { MiniDFSCluster cluster = null; try { Configuration conf = new HdfsConfiguration(); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); cluster.waitActive(); FileSystem fs = cluster.getFileSystem(); final Path FILE_PATH = new Path("/tmp.txt"); final long FILE_LEN = 1L; DFSTestUtil.createFile(fs, FILE_PATH, FILE_LEN, (short) 2, 1L); // get the block final String bpid = cluster.getNamesystem().getBlockPoolId(); File storageDir = cluster.getInstanceStorageDir(0, 0); File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); assertTrue("Data directory does not exist", dataDir.exists()); ExtendedBlock blk = getBlock(bpid, dataDir); if (blk == null) { storageDir = cluster.getInstanceStorageDir(0, 1); dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); blk = getBlock(bpid, dataDir); } assertFalse( "Data directory does not contain any blocks or there was an " + "IO error", blk == null); // start a third datanode cluster.startDataNodes(conf, 1, true, null, null); ArrayList<DataNode> datanodes = cluster.getDataNodes(); assertEquals(datanodes.size(), 3); DataNode dataNode = datanodes.get(2); // report corrupted block by the third datanode DatanodeRegistration dnR = DataNodeTestUtils.getDNRegistrationForBP(dataNode, blk.getBlockPoolId()); // Get the storage id of one of the storages on the datanode String storageId = cluster .getNamesystem() .getBlockManager() .getDatanodeManager() .getDatanode(dataNode.getDatanodeId()) .getStorageInfos()[0] .getStorageID(); cluster .getNamesystem() .getBlockManager() .findAndMarkBlockAsCorrupt(blk, new DatanodeInfo(dnR), storageId, "some test reason"); // open the file fs.open(FILE_PATH); // clean up fs.delete(FILE_PATH, false); } finally { if (cluster != null) { cluster.shutdown(); } } }