/**
   * Test if {@link FSNamesystem#handleHeartbeat} correctly selects data node targets for block
   * recovery.
   */
  @Test
  public void testHeartbeatBlockRecovery() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    try {
      cluster.waitActive();
      final FSNamesystem namesystem = cluster.getNamesystem();
      final HeartbeatManager hm =
          namesystem.getBlockManager().getDatanodeManager().getHeartbeatManager();
      final String poolId = namesystem.getBlockPoolId();
      final DatanodeRegistration nodeReg1 =
          DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
      final DatanodeDescriptor dd1 = NameNodeAdapter.getDatanode(namesystem, nodeReg1);
      dd1.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid()));
      final DatanodeRegistration nodeReg2 =
          DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(1), poolId);
      final DatanodeDescriptor dd2 = NameNodeAdapter.getDatanode(namesystem, nodeReg2);
      dd2.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid()));
      final DatanodeRegistration nodeReg3 =
          DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(2), poolId);
      final DatanodeDescriptor dd3 = NameNodeAdapter.getDatanode(namesystem, nodeReg3);
      dd3.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid()));

      try {
        namesystem.writeLock();
        synchronized (hm) {
          NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem);
          NameNodeAdapter.sendHeartBeat(nodeReg2, dd2, namesystem);
          NameNodeAdapter.sendHeartBeat(nodeReg3, dd3, namesystem);

          // Test with all alive nodes.
          DFSTestUtil.resetLastUpdatesWithOffset(dd1, 0);
          DFSTestUtil.resetLastUpdatesWithOffset(dd2, 0);
          DFSTestUtil.resetLastUpdatesWithOffset(dd3, 0);
          final DatanodeStorageInfo[] storages = {
            dd1.getStorageInfos()[0], dd2.getStorageInfos()[0], dd3.getStorageInfos()[0]
          };
          BlockInfo blockInfo =
              new BlockInfoContiguous(
                  new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3);
          blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages);
          dd1.addBlockToBeRecovered(blockInfo);
          DatanodeCommand[] cmds =
              NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands();
          assertEquals(1, cmds.length);
          assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction());
          BlockRecoveryCommand recoveryCommand = (BlockRecoveryCommand) cmds[0];
          assertEquals(1, recoveryCommand.getRecoveringBlocks().size());
          DatanodeInfo[] recoveringNodes =
              recoveryCommand
                  .getRecoveringBlocks()
                  .toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0]
                  .getLocations();
          assertEquals(3, recoveringNodes.length);
          assertEquals(recoveringNodes[0], dd1);
          assertEquals(recoveringNodes[1], dd2);
          assertEquals(recoveringNodes[2], dd3);

          // Test with one stale node.
          DFSTestUtil.resetLastUpdatesWithOffset(dd1, 0);
          // More than the default stale interval of 30 seconds.
          DFSTestUtil.resetLastUpdatesWithOffset(dd2, -40 * 1000);
          DFSTestUtil.resetLastUpdatesWithOffset(dd3, 0);
          blockInfo =
              new BlockInfoContiguous(
                  new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3);
          blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages);
          dd1.addBlockToBeRecovered(blockInfo);
          cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands();
          assertEquals(1, cmds.length);
          assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction());
          recoveryCommand = (BlockRecoveryCommand) cmds[0];
          assertEquals(1, recoveryCommand.getRecoveringBlocks().size());
          recoveringNodes =
              recoveryCommand
                  .getRecoveringBlocks()
                  .toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0]
                  .getLocations();
          assertEquals(2, recoveringNodes.length);
          // dd2 is skipped.
          assertEquals(recoveringNodes[0], dd1);
          assertEquals(recoveringNodes[1], dd3);

          // Test with all stale node.
          DFSTestUtil.resetLastUpdatesWithOffset(dd1, -60 * 1000);
          // More than the default stale interval of 30 seconds.
          DFSTestUtil.resetLastUpdatesWithOffset(dd2, -40 * 1000);
          DFSTestUtil.resetLastUpdatesWithOffset(dd3, -80 * 1000);
          blockInfo =
              new BlockInfoContiguous(
                  new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3);
          blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages);
          dd1.addBlockToBeRecovered(blockInfo);
          cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands();
          assertEquals(1, cmds.length);
          assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction());
          recoveryCommand = (BlockRecoveryCommand) cmds[0];
          assertEquals(1, recoveryCommand.getRecoveringBlocks().size());
          recoveringNodes =
              recoveryCommand
                  .getRecoveringBlocks()
                  .toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0]
                  .getLocations();
          // Only dd1 is included since it heart beated and hence its not stale
          // when the list of recovery blocks is constructed.
          assertEquals(3, recoveringNodes.length);
          assertEquals(recoveringNodes[0], dd1);
          assertEquals(recoveringNodes[1], dd2);
          assertEquals(recoveringNodes[2], dd3);
        }
      } finally {
        namesystem.writeUnlock();
      }
    } finally {
      cluster.shutdown();
    }
  }
  /**
   * Test if {@link FSNamesystem#handleHeartbeat} can pick up replication and/or invalidate requests
   * and observes the max limit
   */
  @Test
  public void testHeartbeat() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
    try {
      cluster.waitActive();
      final FSNamesystem namesystem = cluster.getNamesystem();
      final HeartbeatManager hm =
          namesystem.getBlockManager().getDatanodeManager().getHeartbeatManager();
      final String poolId = namesystem.getBlockPoolId();
      final DatanodeRegistration nodeReg =
          DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
      final DatanodeDescriptor dd = NameNodeAdapter.getDatanode(namesystem, nodeReg);
      final String storageID = DatanodeStorage.generateUuid();
      dd.updateStorage(new DatanodeStorage(storageID));

      final int REMAINING_BLOCKS = 1;
      final int MAX_REPLICATE_LIMIT =
          conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 2);
      final int MAX_INVALIDATE_LIMIT = DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_DEFAULT;
      final int MAX_INVALIDATE_BLOCKS = 2 * MAX_INVALIDATE_LIMIT + REMAINING_BLOCKS;
      final int MAX_REPLICATE_BLOCKS = 2 * MAX_REPLICATE_LIMIT + REMAINING_BLOCKS;
      final DatanodeStorageInfo[] ONE_TARGET = {dd.getStorageInfo(storageID)};

      try {
        namesystem.writeLock();
        synchronized (hm) {
          for (int i = 0; i < MAX_REPLICATE_BLOCKS; i++) {
            dd.addBlockToBeReplicated(
                new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP), ONE_TARGET);
          }
          DatanodeCommand[] cmds =
              NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
          assertEquals(1, cmds.length);
          assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
          assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand) cmds[0]).getBlocks().length);

          ArrayList<Block> blockList = new ArrayList<Block>(MAX_INVALIDATE_BLOCKS);
          for (int i = 0; i < MAX_INVALIDATE_BLOCKS; i++) {
            blockList.add(new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP));
          }
          dd.addBlocksToBeInvalidated(blockList);
          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
          assertEquals(2, cmds.length);
          assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
          assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand) cmds[0]).getBlocks().length);
          assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
          assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand) cmds[1]).getBlocks().length);

          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
          assertEquals(2, cmds.length);
          assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
          assertEquals(REMAINING_BLOCKS, ((BlockCommand) cmds[0]).getBlocks().length);
          assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
          assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand) cmds[1]).getBlocks().length);

          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
          assertEquals(1, cmds.length);
          assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[0].getAction());
          assertEquals(REMAINING_BLOCKS, ((BlockCommand) cmds[0]).getBlocks().length);

          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
          assertEquals(0, cmds.length);
        }
      } finally {
        namesystem.writeUnlock();
      }
    } finally {
      cluster.shutdown();
    }
  }
Exemplo n.º 3
0
  /**
   * Test the case that a replica is reported corrupt while it is not in blocksMap. Make sure that
   * ArrayIndexOutOfBounds does not thrown. See Hadoop-4351.
   *
   * <p>TODO HOPS This test fails as it tries to remove a non-existing replica. Calling
   * findAndMarkBlockAsCorrupt from a DataNode that does not store any replica for this specific
   * block will lead to a tuple did not exist exception. The reason for this is that
   * BlockManager.removeStoredBlock is called with a node that does not store a replica and hence
   * the delete will not be able to succeed during commit.
   */
  @Test
  public void testArrayOutOfBoundsException() throws Exception {
    MiniDFSCluster cluster = null;
    try {
      Configuration conf = new HdfsConfiguration();
      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
      cluster.waitActive();

      FileSystem fs = cluster.getFileSystem();
      final Path FILE_PATH = new Path("/tmp.txt");
      final long FILE_LEN = 1L;
      DFSTestUtil.createFile(fs, FILE_PATH, FILE_LEN, (short) 2, 1L);

      // get the block
      final String bpid = cluster.getNamesystem().getBlockPoolId();
      File storageDir = cluster.getInstanceStorageDir(0, 0);
      File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
      assertTrue("Data directory does not exist", dataDir.exists());
      ExtendedBlock blk = getBlock(bpid, dataDir);
      if (blk == null) {
        storageDir = cluster.getInstanceStorageDir(0, 1);
        dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
        blk = getBlock(bpid, dataDir);
      }
      assertFalse(
          "Data directory does not contain any blocks or there was an " + "IO error", blk == null);

      // start a third datanode
      cluster.startDataNodes(conf, 1, true, null, null);
      ArrayList<DataNode> datanodes = cluster.getDataNodes();
      assertEquals(datanodes.size(), 3);
      DataNode dataNode = datanodes.get(2);

      // report corrupted block by the third datanode
      DatanodeRegistration dnR =
          DataNodeTestUtils.getDNRegistrationForBP(dataNode, blk.getBlockPoolId());

      // Get the storage id of one of the storages on the datanode
      String storageId =
          cluster
              .getNamesystem()
              .getBlockManager()
              .getDatanodeManager()
              .getDatanode(dataNode.getDatanodeId())
              .getStorageInfos()[0]
              .getStorageID();

      cluster
          .getNamesystem()
          .getBlockManager()
          .findAndMarkBlockAsCorrupt(blk, new DatanodeInfo(dnR), storageId, "some test reason");

      // open the file
      fs.open(FILE_PATH);

      // clean up
      fs.delete(FILE_PATH, false);
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }