protected final boolean verifyDeletedBlocks(LocatedBlocks locatedBlocks)
      throws IOException, InterruptedException {

    LOG.info("Verifying replica has no saved copy after deletion.");
    triggerBlockReport();

    while (DataNodeTestUtils.getPendingAsyncDeletions(cluster.getDataNodes().get(0)) > 0L) {
      Thread.sleep(1000);
    }

    final String bpid = cluster.getNamesystem().getBlockPoolId();
    List<? extends FsVolumeSpi> volumes = cluster.getDataNodes().get(0).getFSDataset().getVolumes();

    // Make sure deleted replica does not have a copy on either finalized dir of
    // transient volume or finalized dir of non-transient volume
    for (FsVolumeSpi v : volumes) {
      FsVolumeImpl volume = (FsVolumeImpl) v;
      File targetDir =
          (v.isTransientStorage())
              ? volume.getBlockPoolSlice(bpid).getFinalizedDir()
              : volume.getBlockPoolSlice(bpid).getLazypersistDir();
      if (verifyBlockDeletedFromDir(targetDir, locatedBlocks) == false) {
        return false;
      }
    }
    return true;
  }
  /**
   * Setup a {@link MiniDFSCluster}. Create a block with both {@link State#NORMAL} and {@link
   * State#READ_ONLY_SHARED} replicas.
   */
  @Before
  public void setup() throws IOException, InterruptedException {
    conf = new HdfsConfiguration();
    SimulatedFSDataset.setFactory(conf);

    Configuration[] overlays = new Configuration[NUM_DATANODES];
    for (int i = 0; i < overlays.length; i++) {
      overlays[i] = new Configuration();
      if (i == RO_NODE_INDEX) {
        overlays[i].setEnum(
            SimulatedFSDataset.CONFIG_PROPERTY_STATE,
            i == RO_NODE_INDEX ? READ_ONLY_SHARED : NORMAL);
      }
    }

    cluster =
        new MiniDFSCluster.Builder(conf)
            .numDataNodes(NUM_DATANODES)
            .dataNodeConfOverlays(overlays)
            .build();
    fs = cluster.getFileSystem();
    blockManager = cluster.getNameNode().getNamesystem().getBlockManager();
    datanodeManager = blockManager.getDatanodeManager();
    client =
        new DFSClient(
            new InetSocketAddress("localhost", cluster.getNameNodePort()),
            cluster.getConfiguration(0));

    for (int i = 0; i < NUM_DATANODES; i++) {
      DataNode dataNode = cluster.getDataNodes().get(i);
      validateStorageState(
          BlockManagerTestUtil.getStorageReportsForDatanode(
              datanodeManager.getDatanode(dataNode.getDatanodeId())),
          i == RO_NODE_INDEX ? READ_ONLY_SHARED : NORMAL);
    }

    // Create a 1 block file
    DFSTestUtil.createFile(fs, PATH, BLOCK_SIZE, BLOCK_SIZE, BLOCK_SIZE, (short) 1, seed);

    LocatedBlock locatedBlock = getLocatedBlock();
    extendedBlock = locatedBlock.getBlock();
    block = extendedBlock.getLocalBlock();

    assertThat(locatedBlock.getLocations().length, is(1));
    normalDataNode = locatedBlock.getLocations()[0];
    readOnlyDataNode =
        datanodeManager.getDatanode(cluster.getDataNodes().get(RO_NODE_INDEX).getDatanodeId());
    assertThat(normalDataNode, is(not(readOnlyDataNode)));

    validateNumberReplicas(1);

    // Inject the block into the datanode with READ_ONLY_SHARED storage
    cluster.injectBlocks(0, RO_NODE_INDEX, Collections.singleton(block));

    // There should now be 2 *locations* for the block
    // Must wait until the NameNode has processed the block report for the injected blocks
    waitForLocations(2);
  }
示例#3
0
  /* Start balancer and check if the cluster is balanced after the run */
  private void runBalancer(long totalUsedSpace, long totalCapacity) throws Exception {
    waitForHeartBeat(totalUsedSpace, totalCapacity);

    // start rebalancing
    balancer = new Balancer(CONF);
    balancer.run(new String[0]);

    waitForHeartBeat(totalUsedSpace, totalCapacity);
    boolean balanced;
    do {
      DatanodeInfo[] datanodeReport = client.getDatanodeReport(DatanodeReportType.ALL);
      assertEquals(datanodeReport.length, cluster.getDataNodes().size());
      balanced = true;
      double avgUtilization = ((double) totalUsedSpace) / totalCapacity * 100;
      for (DatanodeInfo datanode : datanodeReport) {
        if (Math.abs(
                avgUtilization - ((double) datanode.getDfsUsed()) / datanode.getCapacity() * 100)
            > 10) {
          balanced = false;
          try {
            Thread.sleep(100);
          } catch (InterruptedException ignored) {
          }
          break;
        }
      }
    } while (!balanced);
  }
  /** Test that DN does not shutdown, as long as failure volumes being hot swapped. */
  @Test
  public void testVolumeFailureRecoveredByHotSwappingVolume()
      throws InterruptedException, ReconfigurationException, IOException {
    final File dn0Vol1 = new File(dataDir, "data" + (2 * 0 + 1));
    final File dn0Vol2 = new File(dataDir, "data" + (2 * 0 + 2));
    final DataNode dn0 = cluster.getDataNodes().get(0);
    final String oldDataDirs = dn0.getConf().get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);

    // Fail dn0Vol1 first.
    DataNodeTestUtils.injectDataDirFailure(dn0Vol1);
    checkDiskErrorSync(dn0);

    // Hot swap out the failure volume.
    String dataDirs = dn0Vol2.getPath();
    dn0.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dataDirs);

    // Fix failure volume dn0Vol1 and remount it back.
    DataNodeTestUtils.restoreDataDirFromFailure(dn0Vol1);
    dn0.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, oldDataDirs);

    // Fail dn0Vol2. Now since dn0Vol1 has been fixed, DN0 has sufficient
    // resources, thus it should keep running.
    DataNodeTestUtils.injectDataDirFailure(dn0Vol2);
    checkDiskErrorSync(dn0);
    assertTrue(dn0.shouldRun());
  }
  /**
   * Make sure at least one non-transient volume has a saved copy of the replica. An infinite loop
   * is used to ensure the async lazy persist tasks are completely done before verification. Caller
   * of ensureLazyPersistBlocksAreSaved expects either a successful pass or timeout failure.
   */
  protected final void ensureLazyPersistBlocksAreSaved(LocatedBlocks locatedBlocks)
      throws IOException, InterruptedException {
    final String bpid = cluster.getNamesystem().getBlockPoolId();
    List<? extends FsVolumeSpi> volumes = cluster.getDataNodes().get(0).getFSDataset().getVolumes();
    final Set<Long> persistedBlockIds = new HashSet<Long>();

    while (persistedBlockIds.size() < locatedBlocks.getLocatedBlocks().size()) {
      // Take 1 second sleep before each verification iteration
      Thread.sleep(1000);

      for (LocatedBlock lb : locatedBlocks.getLocatedBlocks()) {
        for (FsVolumeSpi v : volumes) {
          if (v.isTransientStorage()) {
            continue;
          }

          FsVolumeImpl volume = (FsVolumeImpl) v;
          File lazyPersistDir = volume.getBlockPoolSlice(bpid).getLazypersistDir();

          long blockId = lb.getBlock().getBlockId();
          File targetDir = DatanodeUtil.idToBlockDir(lazyPersistDir, blockId);
          File blockFile = new File(targetDir, lb.getBlock().getBlockName());
          if (blockFile.exists()) {
            // Found a persisted copy for this block and added to the Set
            persistedBlockIds.add(blockId);
          }
        }
      }
    }

    // We should have found a persisted copy for each located block.
    assertThat(persistedBlockIds.size(), is(locatedBlocks.getLocatedBlocks().size()));
  }
示例#6
0
 /**
  * Checks whether {@link DataNode#checkDiskErrorAsync()} is being called or not. Before
  * refactoring the code the above function was not getting called
  *
  * @throws IOException, InterruptedException
  */
 @Test
 public void testcheckDiskError() throws IOException, InterruptedException {
   if (cluster.getDataNodes().size() <= 0) {
     cluster.startDataNodes(conf, 1, true, null, null);
     cluster.waitActive();
   }
   DataNode dataNode = cluster.getDataNodes().get(0);
   long slackTime = dataNode.checkDiskErrorInterval / 2;
   // checking for disk error
   dataNode.checkDiskErrorAsync();
   Thread.sleep(dataNode.checkDiskErrorInterval);
   long lastDiskErrorCheck = dataNode.getLastDiskErrorCheck();
   assertTrue(
       "Disk Error check is not performed within  " + dataNode.checkDiskErrorInterval + "  ms",
       ((Time.monotonicNow() - lastDiskErrorCheck)
           < (dataNode.checkDiskErrorInterval + slackTime)));
 }
 /** Test BPService Thread Exit */
 @Test
 public void testBPServiceExit() throws Exception {
   DataNode dn = cluster.getDataNodes().get(0);
   stopBPServiceThreads(1, dn);
   assertTrue("DataNode should not exit", dn.isDatanodeUp());
   stopBPServiceThreads(2, dn);
   assertFalse("DataNode should exit", dn.isDatanodeUp());
 }
  /**
   * testing that APPEND operation can handle token expiration when re-establishing pipeline is
   * needed
   */
  @Test
  public void testAppend() throws Exception {
    MiniDFSCluster cluster = null;
    int numDataNodes = 2;
    Configuration conf = getConf(numDataNodes);

    try {
      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build();
      cluster.waitActive();
      assertEquals(numDataNodes, cluster.getDataNodes().size());

      final NameNode nn = cluster.getNameNode();
      final BlockManager bm = nn.getNamesystem().getBlockManager();
      final BlockTokenSecretManager sm = bm.getBlockTokenSecretManager();

      // set a short token lifetime (1 second)
      SecurityTestUtil.setBlockTokenLifetime(sm, 1000L);
      Path fileToAppend = new Path(FILE_TO_APPEND);
      FileSystem fs = cluster.getFileSystem();
      byte[] expected = generateBytes(FILE_SIZE);
      // write a one-byte file
      FSDataOutputStream stm = writeFile(fs, fileToAppend, (short) numDataNodes, BLOCK_SIZE);
      stm.write(expected, 0, 1);
      stm.close();
      // open the file again for append
      stm = fs.append(fileToAppend);
      int mid = expected.length - 1;
      stm.write(expected, 1, mid - 1);
      stm.hflush();

      /*
       * wait till token used in stm expires
       */
      Token<BlockTokenIdentifier> token = DFSTestUtil.getBlockToken(stm);
      while (!SecurityTestUtil.isBlockTokenExpired(token)) {
        try {
          Thread.sleep(10);
        } catch (InterruptedException ignored) {
        }
      }

      // remove a datanode to force re-establishing pipeline
      cluster.stopDataNode(0);
      // append the rest of the file
      stm.write(expected, mid, expected.length - mid);
      stm.close();
      // check if append is successful
      FSDataInputStream in5 = fs.open(fileToAppend);
      assertTrue(checkFile1(in5, expected));
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }
 private int getTrueReplication(MiniDFSCluster cluster, ExtendedBlock block) throws IOException {
   int count = 0;
   for (DataNode dn : cluster.getDataNodes()) {
     if (DataNodeTestUtils.getFSDataset(dn)
             .getStoredBlock(block.getBlockPoolId(), block.getBlockId())
         != null) {
       count++;
     }
   }
   return count;
 }
  /**
   * Test whether we can delay the deletion of unknown blocks in DataNode's first several block
   * reports.
   */
  @Test
  public void testPendingDeleteUnknownBlocks() throws Exception {
    final int fileNum = 5; // 5 files
    final Path[] files = new Path[fileNum];
    final DataNodeProperties[] dnprops = new DataNodeProperties[REPLICATION];
    // create a group of files, each file contains 1 block
    for (int i = 0; i < fileNum; i++) {
      files[i] = new Path("/file" + i);
      DFSTestUtil.createFile(dfs, files[i], BLOCKSIZE, REPLICATION, i);
    }
    // wait until all DataNodes have replicas
    waitForReplication();
    for (int i = REPLICATION - 1; i >= 0; i--) {
      dnprops[i] = cluster.stopDataNode(i);
    }
    Thread.sleep(2000);
    // delete 2 files, we still have 3 files remaining so that we can cover
    // every DN storage
    for (int i = 0; i < 2; i++) {
      dfs.delete(files[i], true);
    }

    // restart NameNode
    cluster.restartNameNode(false);
    InvalidateBlocks invalidateBlocks =
        (InvalidateBlocks)
            Whitebox.getInternalState(
                cluster.getNamesystem().getBlockManager(), "invalidateBlocks");
    InvalidateBlocks mockIb = Mockito.spy(invalidateBlocks);
    Mockito.doReturn(1L).when(mockIb).getInvalidationDelay();
    Whitebox.setInternalState(
        cluster.getNamesystem().getBlockManager(), "invalidateBlocks", mockIb);

    Assert.assertEquals(0L, cluster.getNamesystem().getPendingDeletionBlocks());
    // restart DataNodes
    for (int i = 0; i < REPLICATION; i++) {
      cluster.restartDataNode(dnprops[i], true);
    }
    cluster.waitActive();

    for (int i = 0; i < REPLICATION; i++) {
      DataNodeTestUtils.triggerBlockReport(cluster.getDataNodes().get(i));
    }
    Thread.sleep(2000);
    // make sure we have received block reports by checking the total block #
    Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal());
    Assert.assertEquals(4, cluster.getNamesystem().getPendingDeletionBlocks());

    cluster.restartNameNode(true);
    Thread.sleep(6000);
    Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal());
    Assert.assertEquals(0, cluster.getNamesystem().getPendingDeletionBlocks());
  }
  /** Test that DataStorage and BlockPoolSliceStorage remove the failed volume after failure. */
  @Test(timeout = 150000)
  public void testFailedVolumeBeingRemovedFromDataNode()
      throws InterruptedException, IOException, TimeoutException {
    // The test uses DataNodeTestUtils#injectDataDirFailure() to simulate
    // volume failures which is currently not supported on Windows.
    assumeTrue(!Path.WINDOWS);

    Path file1 = new Path("/test1");
    DFSTestUtil.createFile(fs, file1, 1024, (short) 2, 1L);
    DFSTestUtil.waitReplication(fs, file1, (short) 2);

    File dn0Vol1 = new File(dataDir, "data" + (2 * 0 + 1));
    DataNodeTestUtils.injectDataDirFailure(dn0Vol1);
    DataNode dn0 = cluster.getDataNodes().get(0);
    checkDiskErrorSync(dn0);

    // Verify dn0Vol1 has been completely removed from DN0.
    // 1. dn0Vol1 is removed from DataStorage.
    DataStorage storage = dn0.getStorage();
    assertEquals(1, storage.getNumStorageDirs());
    for (int i = 0; i < storage.getNumStorageDirs(); i++) {
      Storage.StorageDirectory sd = storage.getStorageDir(i);
      assertFalse(sd.getRoot().getAbsolutePath().startsWith(dn0Vol1.getAbsolutePath()));
    }
    final String bpid = cluster.getNamesystem().getBlockPoolId();
    BlockPoolSliceStorage bpsStorage = storage.getBPStorage(bpid);
    assertEquals(1, bpsStorage.getNumStorageDirs());
    for (int i = 0; i < bpsStorage.getNumStorageDirs(); i++) {
      Storage.StorageDirectory sd = bpsStorage.getStorageDir(i);
      assertFalse(sd.getRoot().getAbsolutePath().startsWith(dn0Vol1.getAbsolutePath()));
    }

    // 2. dn0Vol1 is removed from FsDataset
    FsDatasetSpi<? extends FsVolumeSpi> data = dn0.getFSDataset();
    try (FsDatasetSpi.FsVolumeReferences vols = data.getFsVolumeReferences()) {
      for (FsVolumeSpi volume : vols) {
        assertNotEquals(
            new File(volume.getBasePath()).getAbsoluteFile(), dn0Vol1.getAbsoluteFile());
      }
    }

    // 3. all blocks on dn0Vol1 have been removed.
    for (ReplicaInfo replica : FsDatasetTestUtil.getReplicas(data, bpid)) {
      assertNotNull(replica.getVolume());
      assertNotEquals(
          new File(replica.getVolume().getBasePath()).getAbsoluteFile(), dn0Vol1.getAbsoluteFile());
    }

    // 4. dn0Vol1 is not in DN0's configuration and dataDirs anymore.
    String[] dataDirStrs = dn0.getConf().get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(",");
    assertEquals(1, dataDirStrs.length);
    assertFalse(dataDirStrs[0].contains(dn0Vol1.getAbsolutePath()));
  }
  /**
   * Another regression test for HDFS-2742. This tests the following sequence: - DN does a block
   * report while file is open. This BR contains the block in RBW state. - The block report is
   * delayed in reaching the standby. - The file is closed. - The standby processes the OP_ADD and
   * OP_CLOSE operations before the RBW block report arrives. - The standby should not mark the
   * block as corrupt.
   */
  @Test
  public void testRBWReportArrivesAfterEdits() throws Exception {
    final CountDownLatch brFinished = new CountDownLatch(1);
    DelayAnswer delayer =
        new GenericTestUtils.DelayAnswer(LOG) {
          @Override
          protected Object passThrough(InvocationOnMock invocation) throws Throwable {
            try {
              return super.passThrough(invocation);
            } finally {
              // inform the test that our block report went through.
              brFinished.countDown();
            }
          }
        };

    FSDataOutputStream out = fs.create(TEST_FILE_PATH);
    try {
      AppendTestUtil.write(out, 0, 10);
      out.hflush();

      DataNode dn = cluster.getDataNodes().get(0);
      DatanodeProtocolClientSideTranslatorPB spy = DataNodeTestUtils.spyOnBposToNN(dn, nn2);

      Mockito.doAnswer(delayer)
          .when(spy)
          .blockReport(
              Mockito.<DatanodeRegistration>anyObject(),
              Mockito.anyString(),
              Mockito.<StorageBlockReport[]>anyObject());
      dn.scheduleAllBlockReport(0);
      delayer.waitForCall();

    } finally {
      IOUtils.closeStream(out);
    }

    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);

    delayer.proceed();
    brFinished.await();

    // Verify that no replicas are marked corrupt, and that the
    // file is readable from the failed-over standby.
    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
    BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
    assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
    assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());

    DFSTestUtil.readFile(fs, TEST_FILE_PATH);
  }
 @Test
 public void testSendOOBToPeers() throws Exception {
   DataNode dn = cluster.getDataNodes().get(0);
   DataXceiverServer spyXserver = Mockito.spy(dn.getXferServer());
   NullPointerException npe = new NullPointerException();
   Mockito.doThrow(npe).when(spyXserver).sendOOBToPeers();
   dn.xserver = spyXserver;
   try {
     dn.shutdown();
   } catch (Exception e) {
     fail("DataNode shutdown should not have thrown exception " + e);
   }
 }
示例#14
0
  /**
   * If ramDiskStorageLimit is >=0, then RAM_DISK capacity is artificially capped. If
   * ramDiskStorageLimit < 0 then it is ignored.
   */
  protected final void startUpCluster(
      final int numDataNodes,
      final StorageType[] storageTypes,
      final long ramDiskStorageLimit,
      final boolean useSCR)
      throws IOException {

    Configuration conf = new Configuration();
    conf.setLong(DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    conf.setInt(
        DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC, LAZY_WRITE_FILE_SCRUBBER_INTERVAL_SEC);
    conf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, HEARTBEAT_INTERVAL_SEC);
    conf.setInt(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, HEARTBEAT_RECHECK_INTERVAL_MSEC);
    conf.setInt(DFS_DATANODE_LAZY_WRITER_INTERVAL_SEC, LAZY_WRITER_INTERVAL_SEC);

    if (useSCR) {
      conf.setBoolean(DFS_CLIENT_READ_SHORTCIRCUIT_KEY, useSCR);
      conf.set(DFS_CLIENT_CONTEXT, UUID.randomUUID().toString());
      sockDir = new TemporarySocketDirectory();
      conf.set(
          DFS_DOMAIN_SOCKET_PATH_KEY,
          new File(sockDir.getDir(), this.getClass().getSimpleName() + "._PORT.sock")
              .getAbsolutePath());
      conf.set(
          DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
          UserGroupInformation.getCurrentUser().getShortUserName());
    }

    cluster =
        new MiniDFSCluster.Builder(conf)
            .numDataNodes(numDataNodes)
            .storageTypes(
                storageTypes != null ? storageTypes : new StorageType[] {DEFAULT, DEFAULT})
            .build();
    fs = cluster.getFileSystem();
    client = fs.getClient();

    // Artificially cap the storage capacity of the RAM_DISK volume.
    if (ramDiskStorageLimit >= 0) {
      List<? extends FsVolumeSpi> volumes =
          cluster.getDataNodes().get(0).getFSDataset().getVolumes();

      for (FsVolumeSpi volume : volumes) {
        if (volume.getStorageType() == RAM_DISK) {
          ((FsVolumeImpl) volume).setCapacityForTesting(ramDiskStorageLimit);
        }
      }
    }

    LOG.info("Cluster startup complete");
  }
  /**
   * Test that, when a block is re-opened for append, the related datanode messages are correctly
   * queued by the SBN because they have future states and genstamps.
   */
  @Test
  public void testQueueingWithAppend() throws Exception {
    int numQueued = 0;
    int numDN = cluster.getDataNodes().size();

    FSDataOutputStream out = fs.create(TEST_FILE_PATH);
    try {
      AppendTestUtil.write(out, 0, 10);
      out.hflush();

      // Opening the file will report RBW replicas, but will be
      // queued on the StandbyNode.
      numQueued += numDN; // RBW messages
    } finally {
      IOUtils.closeStream(out);
      numQueued += numDN; // blockReceived messages
    }

    cluster.triggerBlockReports();
    numQueued += numDN;

    try {
      out = fs.append(TEST_FILE_PATH);
      AppendTestUtil.write(out, 10, 10);
      // RBW replicas once it's opened for append
      numQueued += numDN;

    } finally {
      IOUtils.closeStream(out);
      numQueued += numDN; // blockReceived
    }

    cluster.triggerBlockReports();
    numQueued += numDN;

    assertEquals(
        numQueued, cluster.getNameNode(1).getNamesystem().getPendingDataNodeMessageCount());

    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);

    // Verify that no replicas are marked corrupt, and that the
    // file is readable from the failed-over standby.
    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
    BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
    assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
    assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());

    AppendTestUtil.check(fs, TEST_FILE_PATH, 20);
  }
  /**
   * Test DataNode stops when the number of failed volumes exceeds
   * dfs.datanode.failed.volumes.tolerated .
   */
  @Test(timeout = 10000)
  public void testDataNodeShutdownAfterNumFailedVolumeExceedsTolerated()
      throws InterruptedException, IOException {
    // make both data directories to fail on dn0
    final File dn0Vol1 = new File(dataDir, "data" + (2 * 0 + 1));
    final File dn0Vol2 = new File(dataDir, "data" + (2 * 0 + 2));
    DataNodeTestUtils.injectDataDirFailure(dn0Vol1, dn0Vol2);
    DataNode dn0 = cluster.getDataNodes().get(0);
    checkDiskErrorSync(dn0);

    // DN0 should stop after the number of failure disks exceed tolerated
    // value (1).
    assertFalse(dn0.shouldRun());
  }
  @Test
  public void testRead() throws Exception {
    MiniDFSCluster cluster = null;
    int numDataNodes = 2;
    Configuration conf = getConf(numDataNodes);

    try {
      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build();
      cluster.waitActive();
      assertEquals(numDataNodes, cluster.getDataNodes().size());
      doTestRead(conf, cluster, false);
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
    }
  }
示例#18
0
  // test writeToRbw
  @Test
  public void testWriteToRbw() throws Exception {
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(new HdfsConfiguration()).build();
    try {
      cluster.waitActive();
      DataNode dn = cluster.getDataNodes().get(0);
      FsDatasetImpl dataSet = (FsDatasetImpl) DataNodeTestUtils.getFSDataset(dn);

      // set up replicasMap
      String bpid = cluster.getNamesystem().getBlockPoolId();
      ExtendedBlock[] blocks = setup(bpid, dataSet);

      // test writeToRbw
      testWriteToRbw(dataSet, blocks);
    } finally {
      cluster.shutdown();
    }
  }
示例#19
0
  /** Check that the permissions of the local DN directories are as expected. */
  @Test
  public void testLocalDirs() throws Exception {
    Configuration conf = new Configuration();
    final String permStr = conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_KEY);
    FsPermission expected = new FsPermission(permStr);

    // Check permissions on directories in 'dfs.datanode.data.dir'
    FileSystem localFS = FileSystem.getLocal(conf);
    for (DataNode dn : cluster.getDataNodes()) {
      for (FsVolumeSpi v : dn.getFSDataset().getVolumes()) {
        String dir = v.getBasePath();
        Path dataDir = new Path(dir);
        FsPermission actual = localFS.getFileStatus(dataDir).getPermission();
        assertEquals(
            "Permission for dir: " + dataDir + ", is " + actual + ", while expected is " + expected,
            expected,
            actual);
      }
    }
  }
示例#20
0
  /** Test to check that a DN goes down when all its volumes have failed. */
  @Test
  public void testShutdown() throws Exception {
    if (System.getProperty("os.name").startsWith("Windows")) {
      /**
       * This test depends on OS not allowing file creations on a directory that does not have write
       * permissions for the user. Apparently it is not the case on Windows (at least under Cygwin),
       * and possibly AIX. This is disabled on Windows.
       */
      return;
    }
    // Bring up two more datanodes
    cluster.startDataNodes(conf, 2, true, null, null);
    cluster.waitActive();
    final int dnIndex = 0;
    String bpid = cluster.getNamesystem().getBlockPoolId();
    File storageDir = cluster.getInstanceStorageDir(dnIndex, 0);
    File dir1 = MiniDFSCluster.getRbwDir(storageDir, bpid);
    storageDir = cluster.getInstanceStorageDir(dnIndex, 1);
    File dir2 = MiniDFSCluster.getRbwDir(storageDir, bpid);
    try {
      // make the data directory of the first datanode to be readonly
      assertTrue("Couldn't chmod local vol", dir1.setReadOnly());
      assertTrue("Couldn't chmod local vol", dir2.setReadOnly());

      // create files and make sure that first datanode will be down
      DataNode dn = cluster.getDataNodes().get(dnIndex);
      for (int i = 0; dn.isDatanodeUp(); i++) {
        Path fileName = new Path("/test.txt" + i);
        DFSTestUtil.createFile(fs, fileName, 1024, (short) 2, 1L);
        DFSTestUtil.waitReplication(fs, fileName, (short) 2);
        fs.delete(fileName, true);
      }
    } finally {
      // restore its old permission
      FileUtil.setWritable(dir1, true);
      FileUtil.setWritable(dir2, true);
    }
  }
  /** Test changing the number of volumes does not impact the disk failure tolerance. */
  @Test
  public void testTolerateVolumeFailuresAfterAddingMoreVolumes()
      throws InterruptedException, ReconfigurationException, IOException {
    final File dn0Vol1 = new File(dataDir, "data" + (2 * 0 + 1));
    final File dn0Vol2 = new File(dataDir, "data" + (2 * 0 + 2));
    final File dn0VolNew = new File(dataDir, "data_new");
    final DataNode dn0 = cluster.getDataNodes().get(0);
    final String oldDataDirs = dn0.getConf().get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);

    // Add a new volume to DN0
    dn0.reconfigurePropertyImpl(
        DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, oldDataDirs + "," + dn0VolNew.getAbsolutePath());

    // Fail dn0Vol1 first and hot swap it.
    DataNodeTestUtils.injectDataDirFailure(dn0Vol1);
    checkDiskErrorSync(dn0);
    assertTrue(dn0.shouldRun());

    // Fail dn0Vol2, now dn0 should stop, because we only tolerate 1 disk failure.
    DataNodeTestUtils.injectDataDirFailure(dn0Vol2);
    checkDiskErrorSync(dn0);
    assertFalse(dn0.shouldRun());
  }
示例#22
0
 protected final void triggerBlockReport() throws IOException, InterruptedException {
   // Trigger block report to NN
   DataNodeTestUtils.triggerBlockReport(cluster.getDataNodes().get(0));
   Thread.sleep(10 * 1000);
 }
  /**
   * Test if {@link FSNamesystem#handleHeartbeat} can pick up replication and/or invalidate requests
   * and observes the max limit
   */
  @Test
  public void testHeartbeat() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
    try {
      cluster.waitActive();
      final FSNamesystem namesystem = cluster.getNamesystem();
      final HeartbeatManager hm =
          namesystem.getBlockManager().getDatanodeManager().getHeartbeatManager();
      final String poolId = namesystem.getBlockPoolId();
      final DatanodeRegistration nodeReg =
          DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
      final DatanodeDescriptor dd = NameNodeAdapter.getDatanode(namesystem, nodeReg);
      final String storageID = DatanodeStorage.generateUuid();
      dd.updateStorage(new DatanodeStorage(storageID));

      final int REMAINING_BLOCKS = 1;
      final int MAX_REPLICATE_LIMIT =
          conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 2);
      final int MAX_INVALIDATE_LIMIT = DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_DEFAULT;
      final int MAX_INVALIDATE_BLOCKS = 2 * MAX_INVALIDATE_LIMIT + REMAINING_BLOCKS;
      final int MAX_REPLICATE_BLOCKS = 2 * MAX_REPLICATE_LIMIT + REMAINING_BLOCKS;
      final DatanodeStorageInfo[] ONE_TARGET = {dd.getStorageInfo(storageID)};

      try {
        namesystem.writeLock();
        synchronized (hm) {
          for (int i = 0; i < MAX_REPLICATE_BLOCKS; i++) {
            dd.addBlockToBeReplicated(
                new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP), ONE_TARGET);
          }
          DatanodeCommand[] cmds =
              NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
          assertEquals(1, cmds.length);
          assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
          assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand) cmds[0]).getBlocks().length);

          ArrayList<Block> blockList = new ArrayList<Block>(MAX_INVALIDATE_BLOCKS);
          for (int i = 0; i < MAX_INVALIDATE_BLOCKS; i++) {
            blockList.add(new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP));
          }
          dd.addBlocksToBeInvalidated(blockList);
          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
          assertEquals(2, cmds.length);
          assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
          assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand) cmds[0]).getBlocks().length);
          assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
          assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand) cmds[1]).getBlocks().length);

          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
          assertEquals(2, cmds.length);
          assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
          assertEquals(REMAINING_BLOCKS, ((BlockCommand) cmds[0]).getBlocks().length);
          assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
          assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand) cmds[1]).getBlocks().length);

          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
          assertEquals(1, cmds.length);
          assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[0].getAction());
          assertEquals(REMAINING_BLOCKS, ((BlockCommand) cmds[0]).getBlocks().length);

          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands();
          assertEquals(0, cmds.length);
        }
      } finally {
        namesystem.writeUnlock();
      }
    } finally {
      cluster.shutdown();
    }
  }
  /**
   * Test if {@link FSNamesystem#handleHeartbeat} correctly selects data node targets for block
   * recovery.
   */
  @Test
  public void testHeartbeatBlockRecovery() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    try {
      cluster.waitActive();
      final FSNamesystem namesystem = cluster.getNamesystem();
      final HeartbeatManager hm =
          namesystem.getBlockManager().getDatanodeManager().getHeartbeatManager();
      final String poolId = namesystem.getBlockPoolId();
      final DatanodeRegistration nodeReg1 =
          DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
      final DatanodeDescriptor dd1 = NameNodeAdapter.getDatanode(namesystem, nodeReg1);
      dd1.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid()));
      final DatanodeRegistration nodeReg2 =
          DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(1), poolId);
      final DatanodeDescriptor dd2 = NameNodeAdapter.getDatanode(namesystem, nodeReg2);
      dd2.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid()));
      final DatanodeRegistration nodeReg3 =
          DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(2), poolId);
      final DatanodeDescriptor dd3 = NameNodeAdapter.getDatanode(namesystem, nodeReg3);
      dd3.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid()));

      try {
        namesystem.writeLock();
        synchronized (hm) {
          NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem);
          NameNodeAdapter.sendHeartBeat(nodeReg2, dd2, namesystem);
          NameNodeAdapter.sendHeartBeat(nodeReg3, dd3, namesystem);

          // Test with all alive nodes.
          DFSTestUtil.resetLastUpdatesWithOffset(dd1, 0);
          DFSTestUtil.resetLastUpdatesWithOffset(dd2, 0);
          DFSTestUtil.resetLastUpdatesWithOffset(dd3, 0);
          final DatanodeStorageInfo[] storages = {
            dd1.getStorageInfos()[0], dd2.getStorageInfos()[0], dd3.getStorageInfos()[0]
          };
          BlockInfo blockInfo =
              new BlockInfoContiguous(
                  new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3);
          blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages);
          dd1.addBlockToBeRecovered(blockInfo);
          DatanodeCommand[] cmds =
              NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands();
          assertEquals(1, cmds.length);
          assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction());
          BlockRecoveryCommand recoveryCommand = (BlockRecoveryCommand) cmds[0];
          assertEquals(1, recoveryCommand.getRecoveringBlocks().size());
          DatanodeInfo[] recoveringNodes =
              recoveryCommand
                  .getRecoveringBlocks()
                  .toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0]
                  .getLocations();
          assertEquals(3, recoveringNodes.length);
          assertEquals(recoveringNodes[0], dd1);
          assertEquals(recoveringNodes[1], dd2);
          assertEquals(recoveringNodes[2], dd3);

          // Test with one stale node.
          DFSTestUtil.resetLastUpdatesWithOffset(dd1, 0);
          // More than the default stale interval of 30 seconds.
          DFSTestUtil.resetLastUpdatesWithOffset(dd2, -40 * 1000);
          DFSTestUtil.resetLastUpdatesWithOffset(dd3, 0);
          blockInfo =
              new BlockInfoContiguous(
                  new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3);
          blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages);
          dd1.addBlockToBeRecovered(blockInfo);
          cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands();
          assertEquals(1, cmds.length);
          assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction());
          recoveryCommand = (BlockRecoveryCommand) cmds[0];
          assertEquals(1, recoveryCommand.getRecoveringBlocks().size());
          recoveringNodes =
              recoveryCommand
                  .getRecoveringBlocks()
                  .toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0]
                  .getLocations();
          assertEquals(2, recoveringNodes.length);
          // dd2 is skipped.
          assertEquals(recoveringNodes[0], dd1);
          assertEquals(recoveringNodes[1], dd3);

          // Test with all stale node.
          DFSTestUtil.resetLastUpdatesWithOffset(dd1, -60 * 1000);
          // More than the default stale interval of 30 seconds.
          DFSTestUtil.resetLastUpdatesWithOffset(dd2, -40 * 1000);
          DFSTestUtil.resetLastUpdatesWithOffset(dd3, -80 * 1000);
          blockInfo =
              new BlockInfoContiguous(
                  new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3);
          blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages);
          dd1.addBlockToBeRecovered(blockInfo);
          cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands();
          assertEquals(1, cmds.length);
          assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction());
          recoveryCommand = (BlockRecoveryCommand) cmds[0];
          assertEquals(1, recoveryCommand.getRecoveringBlocks().size());
          recoveringNodes =
              recoveryCommand
                  .getRecoveringBlocks()
                  .toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0]
                  .getLocations();
          // Only dd1 is included since it heart beated and hence its not stale
          // when the list of recovery blocks is constructed.
          assertEquals(3, recoveringNodes.length);
          assertEquals(recoveringNodes[0], dd1);
          assertEquals(recoveringNodes[1], dd2);
          assertEquals(recoveringNodes[2], dd3);
        }
      } finally {
        namesystem.writeUnlock();
      }
    } finally {
      cluster.shutdown();
    }
  }
  /*
   * Verify the number of blocks and files are correct after volume failure,
   * and that we can replicate to both datanodes even after a single volume
   * failure if the configuration parameter allows this.
   */
  @Test
  public void testVolumeFailure() throws Exception {
    System.out.println("Data dir: is " + dataDir.getPath());

    // Data dir structure is dataDir/data[1-4]/[current,tmp...]
    // data1,2 is for datanode 1, data2,3 - datanode2
    String filename = "/test.txt";
    Path filePath = new Path(filename);

    // we use only small number of blocks to avoid creating subdirs in the data dir..
    int filesize = block_size * blocks_num;
    DFSTestUtil.createFile(fs, filePath, filesize, repl, 1L);
    DFSTestUtil.waitReplication(fs, filePath, repl);
    System.out.println("file " + filename + "(size " + filesize + ") is created and replicated");

    // fail the volume
    // delete/make non-writable one of the directories (failed volume)
    data_fail = new File(dataDir, "data3");
    failedDir = MiniDFSCluster.getFinalizedDir(dataDir, cluster.getNamesystem().getBlockPoolId());
    if (failedDir.exists()
        &&
        // !FileUtil.fullyDelete(failedDir)
        !deteteBlocks(failedDir)) {
      throw new IOException("Could not delete hdfs directory '" + failedDir + "'");
    }
    data_fail.setReadOnly();
    failedDir.setReadOnly();
    System.out.println("Deleteing " + failedDir.getPath() + "; exist=" + failedDir.exists());

    // access all the blocks on the "failed" DataNode,
    // we need to make sure that the "failed" volume is being accessed -
    // and that will cause failure, blocks removal, "emergency" block report
    triggerFailure(filename, filesize);

    // make sure a block report is sent
    DataNode dn = cluster.getDataNodes().get(1); // corresponds to dir data3
    String bpid = cluster.getNamesystem().getBlockPoolId();
    DatanodeRegistration dnR = dn.getDNRegistrationForBP(bpid);

    Map<DatanodeStorage, BlockListAsLongs> perVolumeBlockLists =
        dn.getFSDataset().getBlockReports(bpid);

    // Send block report
    StorageBlockReport[] reports = new StorageBlockReport[perVolumeBlockLists.size()];

    int reportIndex = 0;
    for (Map.Entry<DatanodeStorage, BlockListAsLongs> kvPair : perVolumeBlockLists.entrySet()) {
      DatanodeStorage dnStorage = kvPair.getKey();
      BlockListAsLongs blockList = kvPair.getValue();
      reports[reportIndex++] = new StorageBlockReport(dnStorage, blockList);
    }

    cluster.getNameNodeRpc().blockReport(dnR, bpid, reports, null);

    // verify number of blocks and files...
    verify(filename, filesize);

    // create another file (with one volume failed).
    System.out.println("creating file test1.txt");
    Path fileName1 = new Path("/test1.txt");
    DFSTestUtil.createFile(fs, fileName1, filesize, repl, 1L);

    // should be able to replicate to both nodes (2 DN, repl=2)
    DFSTestUtil.waitReplication(fs, fileName1, repl);
    System.out.println("file " + fileName1.getName() + " is created and replicated");
  }
示例#26
0
  /**
   * Test that when there is a failure replicating a block the temporary and meta files are cleaned
   * up and subsequent replication succeeds.
   */
  @Test
  public void testReplicationError() throws Exception {
    // create a file of replication factor of 1
    final Path fileName = new Path("/test.txt");
    final int fileLen = 1;
    DFSTestUtil.createFile(fs, fileName, 1, (short) 1, 1L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 1);

    // get the block belonged to the created file
    LocatedBlocks blocks =
        NameNodeAdapter.getBlockLocations(
            cluster.getNameNode(), fileName.toString(), 0, (long) fileLen);
    assertEquals("Should only find 1 block", blocks.locatedBlockCount(), 1);
    LocatedBlock block = blocks.get(0);

    // bring up a second datanode
    cluster.startDataNodes(conf, 1, true, null, null);
    cluster.waitActive();
    final int sndNode = 1;
    DataNode datanode = cluster.getDataNodes().get(sndNode);

    // replicate the block to the second datanode
    InetSocketAddress target = datanode.getXferAddress();
    Socket s = new Socket(target.getAddress(), target.getPort());
    // write the header.
    DataOutputStream out = new DataOutputStream(s.getOutputStream());

    DataChecksum checksum = DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, 512);
    new Sender(out)
        .writeBlock(
            block.getBlock(),
            StorageType.DEFAULT,
            BlockTokenSecretManager.DUMMY_TOKEN,
            "",
            new DatanodeInfo[0],
            new StorageType[0],
            null,
            BlockConstructionStage.PIPELINE_SETUP_CREATE,
            1,
            0L,
            0L,
            0L,
            checksum,
            CachingStrategy.newDefaultStrategy(),
            false);
    out.flush();

    // close the connection before sending the content of the block
    out.close();

    // the temporary block & meta files should be deleted
    String bpid = cluster.getNamesystem().getBlockPoolId();
    File storageDir = cluster.getInstanceStorageDir(sndNode, 0);
    File dir1 = MiniDFSCluster.getRbwDir(storageDir, bpid);
    storageDir = cluster.getInstanceStorageDir(sndNode, 1);
    File dir2 = MiniDFSCluster.getRbwDir(storageDir, bpid);
    while (dir1.listFiles().length != 0 || dir2.listFiles().length != 0) {
      Thread.sleep(100);
    }

    // then increase the file's replication factor
    fs.setReplication(fileName, (short) 2);
    // replication should succeed
    DFSTestUtil.waitReplication(fs, fileName, (short) 1);

    // clean up the file
    fs.delete(fileName, false);
  }
  /**
   * Test that individual volume failures do not cause DNs to fail, that all volumes failed on a
   * single datanode do cause it to fail, and that the capacities and liveliness is adjusted
   * correctly in the NN.
   */
  @Test
  public void testSuccessiveVolumeFailures() throws Exception {
    assumeTrue(!System.getProperty("os.name").startsWith("Windows"));

    // Bring up two more datanodes
    cluster.startDataNodes(conf, 2, true, null, null);
    cluster.waitActive();

    /*
     * Calculate the total capacity of all the datanodes. Sleep for
     * three seconds to be sure the datanodes have had a chance to
     * heartbeat their capacities.
     */
    Thread.sleep(WAIT_FOR_HEARTBEATS);
    final DatanodeManager dm = cluster.getNamesystem().getBlockManager().getDatanodeManager();

    final long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
    long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);

    File dn1Vol1 = new File(dataDir, "data" + (2 * 0 + 1));
    File dn2Vol1 = new File(dataDir, "data" + (2 * 1 + 1));
    File dn3Vol1 = new File(dataDir, "data" + (2 * 2 + 1));
    File dn3Vol2 = new File(dataDir, "data" + (2 * 2 + 2));

    /*
     * Make the 1st volume directories on the first two datanodes
     * non-accessible.  We don't make all three 1st volume directories
     * readonly since that would cause the entire pipeline to
     * fail. The client does not retry failed nodes even though
     * perhaps they could succeed because just a single volume failed.
     */
    assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn1Vol1, false));
    assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn2Vol1, false));

    /*
     * Create file1 and wait for 3 replicas (ie all DNs can still
     * store a block).  Then assert that all DNs are up, despite the
     * volume failures.
     */
    Path file1 = new Path("/test1");
    DFSTestUtil.createFile(fs, file1, 1024, (short) 3, 1L);
    DFSTestUtil.waitReplication(fs, file1, (short) 3);
    ArrayList<DataNode> dns = cluster.getDataNodes();
    assertTrue("DN1 should be up", dns.get(0).isDatanodeUp());
    assertTrue("DN2 should be up", dns.get(1).isDatanodeUp());
    assertTrue("DN3 should be up", dns.get(2).isDatanodeUp());

    /*
     * The metrics should confirm the volume failures.
     */
    assertCounter("VolumeFailures", 1L, getMetrics(dns.get(0).getMetrics().name()));
    assertCounter("VolumeFailures", 1L, getMetrics(dns.get(1).getMetrics().name()));
    assertCounter("VolumeFailures", 0L, getMetrics(dns.get(2).getMetrics().name()));

    // Ensure we wait a sufficient amount of time
    assert (WAIT_FOR_HEARTBEATS * 10) > WAIT_FOR_DEATH;

    // Eventually the NN should report two volume failures
    DFSTestUtil.waitForDatanodeStatus(
        dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WAIT_FOR_HEARTBEATS);

    /*
     * Now fail a volume on the third datanode. We should be able to get
     * three replicas since we've already identified the other failures.
     */
    assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn3Vol1, false));
    Path file2 = new Path("/test2");
    DFSTestUtil.createFile(fs, file2, 1024, (short) 3, 1L);
    DFSTestUtil.waitReplication(fs, file2, (short) 3);
    assertTrue("DN3 should still be up", dns.get(2).isDatanodeUp());
    assertCounter("VolumeFailures", 1L, getMetrics(dns.get(2).getMetrics().name()));

    ArrayList<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
    ArrayList<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
    dm.fetchDatanodes(live, dead, false);
    live.clear();
    dead.clear();
    dm.fetchDatanodes(live, dead, false);
    assertEquals("DN3 should have 1 failed volume", 1, live.get(2).getVolumeFailures());

    /*
     * Once the datanodes have a chance to heartbeat their new capacity the
     * total capacity should be down by three volumes (assuming the host
     * did not grow or shrink the data volume while the test was running).
     */
    dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);
    DFSTestUtil.waitForDatanodeStatus(
        dm, 3, 0, 3, origCapacity - (3 * dnCapacity), WAIT_FOR_HEARTBEATS);

    /*
     * Now fail the 2nd volume on the 3rd datanode. All its volumes
     * are now failed and so it should report two volume failures
     * and that it's no longer up. Only wait for two replicas since
     * we'll never get a third.
     */
    assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn3Vol2, false));
    Path file3 = new Path("/test3");
    DFSTestUtil.createFile(fs, file3, 1024, (short) 3, 1L);
    DFSTestUtil.waitReplication(fs, file3, (short) 2);

    // The DN should consider itself dead
    DFSTestUtil.waitForDatanodeDeath(dns.get(2));

    // And report two failed volumes
    assertCounter("VolumeFailures", 2L, getMetrics(dns.get(2).getMetrics().name()));

    // The NN considers the DN dead
    DFSTestUtil.waitForDatanodeStatus(
        dm, 2, 1, 2, origCapacity - (4 * dnCapacity), WAIT_FOR_HEARTBEATS);

    /*
     * The datanode never tries to restore the failed volume, even if
     * it's subsequently repaired, but it should see this volume on
     * restart, so file creation should be able to succeed after
     * restoring the data directories and restarting the datanodes.
     */
    assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn1Vol1, true));
    assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn2Vol1, true));
    assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn3Vol1, true));
    assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn3Vol2, true));
    cluster.restartDataNodes();
    cluster.waitActive();
    Path file4 = new Path("/test4");
    DFSTestUtil.createFile(fs, file4, 1024, (short) 3, 1L);
    DFSTestUtil.waitReplication(fs, file4, (short) 3);

    /*
     * Eventually the capacity should be restored to its original value,
     * and that the volume failure count should be reported as zero by
     * both the metrics and the NN.
     */
    DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 0, origCapacity, WAIT_FOR_HEARTBEATS);
  }
  protected void doTestRead(Configuration conf, MiniDFSCluster cluster, boolean isStriped)
      throws Exception {
    final int numDataNodes = cluster.getDataNodes().size();
    final NameNode nn = cluster.getNameNode();
    final NamenodeProtocols nnProto = nn.getRpcServer();
    final BlockManager bm = nn.getNamesystem().getBlockManager();
    final BlockTokenSecretManager sm = bm.getBlockTokenSecretManager();

    // set a short token lifetime (1 second) initially
    SecurityTestUtil.setBlockTokenLifetime(sm, 1000L);

    Path fileToRead = new Path(FILE_TO_READ);
    FileSystem fs = cluster.getFileSystem();
    byte[] expected = generateBytes(FILE_SIZE);
    createFile(fs, fileToRead, expected);

    /*
     * setup for testing expiration handling of cached tokens
     */

    // read using blockSeekTo(). Acquired tokens are cached in in1
    FSDataInputStream in1 = fs.open(fileToRead);
    assertTrue(checkFile1(in1, expected));
    // read using blockSeekTo(). Acquired tokens are cached in in2
    FSDataInputStream in2 = fs.open(fileToRead);
    assertTrue(checkFile1(in2, expected));
    // read using fetchBlockByteRange(). Acquired tokens are cached in in3
    FSDataInputStream in3 = fs.open(fileToRead);
    assertTrue(checkFile2(in3, expected));

    /*
     * testing READ interface on DN using a BlockReader
     */
    DFSClient client = null;
    try {
      client = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);
    } finally {
      if (client != null) client.close();
    }
    List<LocatedBlock> locatedBlocks =
        nnProto.getBlockLocations(FILE_TO_READ, 0, FILE_SIZE).getLocatedBlocks();
    LocatedBlock lblock = locatedBlocks.get(0); // first block
    // verify token is not expired
    assertFalse(isBlockTokenExpired(lblock));
    // read with valid token, should succeed
    tryRead(conf, lblock, true);

    /*
     * wait till myToken and all cached tokens in in1, in2 and in3 expire
     */

    while (!isBlockTokenExpired(lblock)) {
      try {
        Thread.sleep(10);
      } catch (InterruptedException ignored) {
      }
    }

    /*
     * continue testing READ interface on DN using a BlockReader
     */

    // verify token is expired
    assertTrue(isBlockTokenExpired(lblock));
    // read should fail
    tryRead(conf, lblock, false);
    // use a valid new token
    bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.READ);
    // read should succeed
    tryRead(conf, lblock, true);
    // use a token with wrong blockID
    long rightId = lblock.getBlock().getBlockId();
    long wrongId = rightId + 1;
    lblock.getBlock().setBlockId(wrongId);
    bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.READ);
    lblock.getBlock().setBlockId(rightId);
    // read should fail
    tryRead(conf, lblock, false);
    // use a token with wrong access modes
    bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.WRITE);
    // read should fail
    tryRead(conf, lblock, false);

    // set a long token lifetime for future tokens
    SecurityTestUtil.setBlockTokenLifetime(sm, 600 * 1000L);

    /*
     * testing that when cached tokens are expired, DFSClient will re-fetch
     * tokens transparently for READ.
     */

    // confirm all tokens cached in in1 are expired by now
    List<LocatedBlock> lblocks = DFSTestUtil.getAllBlocks(in1);
    for (LocatedBlock blk : lblocks) {
      assertTrue(isBlockTokenExpired(blk));
    }
    // verify blockSeekTo() is able to re-fetch token transparently
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));

    // confirm all tokens cached in in2 are expired by now
    List<LocatedBlock> lblocks2 = DFSTestUtil.getAllBlocks(in2);
    for (LocatedBlock blk : lblocks2) {
      assertTrue(isBlockTokenExpired(blk));
    }
    // verify blockSeekTo() is able to re-fetch token transparently (testing
    // via another interface method)
    if (isStriped) {
      // striped block doesn't support seekToNewSource
      in2.seek(0);
    } else {
      assertTrue(in2.seekToNewSource(0));
    }
    assertTrue(checkFile1(in2, expected));

    // confirm all tokens cached in in3 are expired by now
    List<LocatedBlock> lblocks3 = DFSTestUtil.getAllBlocks(in3);
    for (LocatedBlock blk : lblocks3) {
      assertTrue(isBlockTokenExpired(blk));
    }
    // verify fetchBlockByteRange() is able to re-fetch token transparently
    assertTrue(checkFile2(in3, expected));

    /*
     * testing that after datanodes are restarted on the same ports, cached
     * tokens should still work and there is no need to fetch new tokens from
     * namenode. This test should run while namenode is down (to make sure no
     * new tokens can be fetched from namenode).
     */

    // restart datanodes on the same ports that they currently use
    assertTrue(cluster.restartDataNodes(true));
    cluster.waitActive();
    assertEquals(numDataNodes, cluster.getDataNodes().size());
    cluster.shutdownNameNode(0);

    // confirm tokens cached in in1 are still valid
    lblocks = DFSTestUtil.getAllBlocks(in1);
    for (LocatedBlock blk : lblocks) {
      assertFalse(isBlockTokenExpired(blk));
    }
    // verify blockSeekTo() still works (forced to use cached tokens)
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));

    // confirm tokens cached in in2 are still valid
    lblocks2 = DFSTestUtil.getAllBlocks(in2);
    for (LocatedBlock blk : lblocks2) {
      assertFalse(isBlockTokenExpired(blk));
    }

    // verify blockSeekTo() still works (forced to use cached tokens)
    if (isStriped) {
      in2.seek(0);
    } else {
      in2.seekToNewSource(0);
    }
    assertTrue(checkFile1(in2, expected));

    // confirm tokens cached in in3 are still valid
    lblocks3 = DFSTestUtil.getAllBlocks(in3);
    for (LocatedBlock blk : lblocks3) {
      assertFalse(isBlockTokenExpired(blk));
    }
    // verify fetchBlockByteRange() still works (forced to use cached tokens)
    assertTrue(checkFile2(in3, expected));

    /*
     * testing that when namenode is restarted, cached tokens should still
     * work and there is no need to fetch new tokens from namenode. Like the
     * previous test, this test should also run while namenode is down. The
     * setup for this test depends on the previous test.
     */

    // restart the namenode and then shut it down for test
    cluster.restartNameNode(0);
    cluster.shutdownNameNode(0);

    // verify blockSeekTo() still works (forced to use cached tokens)
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));
    // verify again blockSeekTo() still works (forced to use cached tokens)
    if (isStriped) {
      in2.seek(0);
    } else {
      in2.seekToNewSource(0);
    }
    assertTrue(checkFile1(in2, expected));

    // verify fetchBlockByteRange() still works (forced to use cached tokens)
    assertTrue(checkFile2(in3, expected));

    /*
     * testing that after both namenode and datanodes got restarted (namenode
     * first, followed by datanodes), DFSClient can't access DN without
     * re-fetching tokens and is able to re-fetch tokens transparently. The
     * setup of this test depends on the previous test.
     */

    // restore the cluster and restart the datanodes for test
    cluster.restartNameNode(0);
    assertTrue(cluster.restartDataNodes(true));
    cluster.waitActive();
    assertEquals(numDataNodes, cluster.getDataNodes().size());

    // shutdown namenode so that DFSClient can't get new tokens from namenode
    cluster.shutdownNameNode(0);

    // verify blockSeekTo() fails (cached tokens become invalid)
    in1.seek(0);
    assertFalse(checkFile1(in1, expected));
    // verify fetchBlockByteRange() fails (cached tokens become invalid)
    assertFalse(checkFile2(in3, expected));

    // restart the namenode to allow DFSClient to re-fetch tokens
    cluster.restartNameNode(0);
    // verify blockSeekTo() works again (by transparently re-fetching
    // tokens from namenode)
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));
    if (isStriped) {
      in2.seek(0);
    } else {
      in2.seekToNewSource(0);
    }
    assertTrue(checkFile1(in2, expected));
    // verify fetchBlockByteRange() works again (by transparently
    // re-fetching tokens from namenode)
    assertTrue(checkFile2(in3, expected));

    /*
     * testing that when datanodes are restarted on different ports, DFSClient
     * is able to re-fetch tokens transparently to connect to them
     */

    // restart datanodes on newly assigned ports
    assertTrue(cluster.restartDataNodes(false));
    cluster.waitActive();
    assertEquals(numDataNodes, cluster.getDataNodes().size());
    // verify blockSeekTo() is able to re-fetch token transparently
    in1.seek(0);
    assertTrue(checkFile1(in1, expected));
    // verify blockSeekTo() is able to re-fetch token transparently
    if (isStriped) {
      in2.seek(0);
    } else {
      in2.seekToNewSource(0);
    }
    assertTrue(checkFile1(in2, expected));
    // verify fetchBlockByteRange() is able to re-fetch token transparently
    assertTrue(checkFile2(in3, expected));
  }