protected final boolean verifyDeletedBlocks(LocatedBlocks locatedBlocks) throws IOException, InterruptedException { LOG.info("Verifying replica has no saved copy after deletion."); triggerBlockReport(); while (DataNodeTestUtils.getPendingAsyncDeletions(cluster.getDataNodes().get(0)) > 0L) { Thread.sleep(1000); } final String bpid = cluster.getNamesystem().getBlockPoolId(); List<? extends FsVolumeSpi> volumes = cluster.getDataNodes().get(0).getFSDataset().getVolumes(); // Make sure deleted replica does not have a copy on either finalized dir of // transient volume or finalized dir of non-transient volume for (FsVolumeSpi v : volumes) { FsVolumeImpl volume = (FsVolumeImpl) v; File targetDir = (v.isTransientStorage()) ? volume.getBlockPoolSlice(bpid).getFinalizedDir() : volume.getBlockPoolSlice(bpid).getLazypersistDir(); if (verifyBlockDeletedFromDir(targetDir, locatedBlocks) == false) { return false; } } return true; }
/** * Setup a {@link MiniDFSCluster}. Create a block with both {@link State#NORMAL} and {@link * State#READ_ONLY_SHARED} replicas. */ @Before public void setup() throws IOException, InterruptedException { conf = new HdfsConfiguration(); SimulatedFSDataset.setFactory(conf); Configuration[] overlays = new Configuration[NUM_DATANODES]; for (int i = 0; i < overlays.length; i++) { overlays[i] = new Configuration(); if (i == RO_NODE_INDEX) { overlays[i].setEnum( SimulatedFSDataset.CONFIG_PROPERTY_STATE, i == RO_NODE_INDEX ? READ_ONLY_SHARED : NORMAL); } } cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(NUM_DATANODES) .dataNodeConfOverlays(overlays) .build(); fs = cluster.getFileSystem(); blockManager = cluster.getNameNode().getNamesystem().getBlockManager(); datanodeManager = blockManager.getDatanodeManager(); client = new DFSClient( new InetSocketAddress("localhost", cluster.getNameNodePort()), cluster.getConfiguration(0)); for (int i = 0; i < NUM_DATANODES; i++) { DataNode dataNode = cluster.getDataNodes().get(i); validateStorageState( BlockManagerTestUtil.getStorageReportsForDatanode( datanodeManager.getDatanode(dataNode.getDatanodeId())), i == RO_NODE_INDEX ? READ_ONLY_SHARED : NORMAL); } // Create a 1 block file DFSTestUtil.createFile(fs, PATH, BLOCK_SIZE, BLOCK_SIZE, BLOCK_SIZE, (short) 1, seed); LocatedBlock locatedBlock = getLocatedBlock(); extendedBlock = locatedBlock.getBlock(); block = extendedBlock.getLocalBlock(); assertThat(locatedBlock.getLocations().length, is(1)); normalDataNode = locatedBlock.getLocations()[0]; readOnlyDataNode = datanodeManager.getDatanode(cluster.getDataNodes().get(RO_NODE_INDEX).getDatanodeId()); assertThat(normalDataNode, is(not(readOnlyDataNode))); validateNumberReplicas(1); // Inject the block into the datanode with READ_ONLY_SHARED storage cluster.injectBlocks(0, RO_NODE_INDEX, Collections.singleton(block)); // There should now be 2 *locations* for the block // Must wait until the NameNode has processed the block report for the injected blocks waitForLocations(2); }
/* Start balancer and check if the cluster is balanced after the run */ private void runBalancer(long totalUsedSpace, long totalCapacity) throws Exception { waitForHeartBeat(totalUsedSpace, totalCapacity); // start rebalancing balancer = new Balancer(CONF); balancer.run(new String[0]); waitForHeartBeat(totalUsedSpace, totalCapacity); boolean balanced; do { DatanodeInfo[] datanodeReport = client.getDatanodeReport(DatanodeReportType.ALL); assertEquals(datanodeReport.length, cluster.getDataNodes().size()); balanced = true; double avgUtilization = ((double) totalUsedSpace) / totalCapacity * 100; for (DatanodeInfo datanode : datanodeReport) { if (Math.abs( avgUtilization - ((double) datanode.getDfsUsed()) / datanode.getCapacity() * 100) > 10) { balanced = false; try { Thread.sleep(100); } catch (InterruptedException ignored) { } break; } } } while (!balanced); }
/** Test that DN does not shutdown, as long as failure volumes being hot swapped. */ @Test public void testVolumeFailureRecoveredByHotSwappingVolume() throws InterruptedException, ReconfigurationException, IOException { final File dn0Vol1 = new File(dataDir, "data" + (2 * 0 + 1)); final File dn0Vol2 = new File(dataDir, "data" + (2 * 0 + 2)); final DataNode dn0 = cluster.getDataNodes().get(0); final String oldDataDirs = dn0.getConf().get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY); // Fail dn0Vol1 first. DataNodeTestUtils.injectDataDirFailure(dn0Vol1); checkDiskErrorSync(dn0); // Hot swap out the failure volume. String dataDirs = dn0Vol2.getPath(); dn0.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dataDirs); // Fix failure volume dn0Vol1 and remount it back. DataNodeTestUtils.restoreDataDirFromFailure(dn0Vol1); dn0.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, oldDataDirs); // Fail dn0Vol2. Now since dn0Vol1 has been fixed, DN0 has sufficient // resources, thus it should keep running. DataNodeTestUtils.injectDataDirFailure(dn0Vol2); checkDiskErrorSync(dn0); assertTrue(dn0.shouldRun()); }
/** * Make sure at least one non-transient volume has a saved copy of the replica. An infinite loop * is used to ensure the async lazy persist tasks are completely done before verification. Caller * of ensureLazyPersistBlocksAreSaved expects either a successful pass or timeout failure. */ protected final void ensureLazyPersistBlocksAreSaved(LocatedBlocks locatedBlocks) throws IOException, InterruptedException { final String bpid = cluster.getNamesystem().getBlockPoolId(); List<? extends FsVolumeSpi> volumes = cluster.getDataNodes().get(0).getFSDataset().getVolumes(); final Set<Long> persistedBlockIds = new HashSet<Long>(); while (persistedBlockIds.size() < locatedBlocks.getLocatedBlocks().size()) { // Take 1 second sleep before each verification iteration Thread.sleep(1000); for (LocatedBlock lb : locatedBlocks.getLocatedBlocks()) { for (FsVolumeSpi v : volumes) { if (v.isTransientStorage()) { continue; } FsVolumeImpl volume = (FsVolumeImpl) v; File lazyPersistDir = volume.getBlockPoolSlice(bpid).getLazypersistDir(); long blockId = lb.getBlock().getBlockId(); File targetDir = DatanodeUtil.idToBlockDir(lazyPersistDir, blockId); File blockFile = new File(targetDir, lb.getBlock().getBlockName()); if (blockFile.exists()) { // Found a persisted copy for this block and added to the Set persistedBlockIds.add(blockId); } } } } // We should have found a persisted copy for each located block. assertThat(persistedBlockIds.size(), is(locatedBlocks.getLocatedBlocks().size())); }
/** * Checks whether {@link DataNode#checkDiskErrorAsync()} is being called or not. Before * refactoring the code the above function was not getting called * * @throws IOException, InterruptedException */ @Test public void testcheckDiskError() throws IOException, InterruptedException { if (cluster.getDataNodes().size() <= 0) { cluster.startDataNodes(conf, 1, true, null, null); cluster.waitActive(); } DataNode dataNode = cluster.getDataNodes().get(0); long slackTime = dataNode.checkDiskErrorInterval / 2; // checking for disk error dataNode.checkDiskErrorAsync(); Thread.sleep(dataNode.checkDiskErrorInterval); long lastDiskErrorCheck = dataNode.getLastDiskErrorCheck(); assertTrue( "Disk Error check is not performed within " + dataNode.checkDiskErrorInterval + " ms", ((Time.monotonicNow() - lastDiskErrorCheck) < (dataNode.checkDiskErrorInterval + slackTime))); }
/** Test BPService Thread Exit */ @Test public void testBPServiceExit() throws Exception { DataNode dn = cluster.getDataNodes().get(0); stopBPServiceThreads(1, dn); assertTrue("DataNode should not exit", dn.isDatanodeUp()); stopBPServiceThreads(2, dn); assertFalse("DataNode should exit", dn.isDatanodeUp()); }
/** * testing that APPEND operation can handle token expiration when re-establishing pipeline is * needed */ @Test public void testAppend() throws Exception { MiniDFSCluster cluster = null; int numDataNodes = 2; Configuration conf = getConf(numDataNodes); try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build(); cluster.waitActive(); assertEquals(numDataNodes, cluster.getDataNodes().size()); final NameNode nn = cluster.getNameNode(); final BlockManager bm = nn.getNamesystem().getBlockManager(); final BlockTokenSecretManager sm = bm.getBlockTokenSecretManager(); // set a short token lifetime (1 second) SecurityTestUtil.setBlockTokenLifetime(sm, 1000L); Path fileToAppend = new Path(FILE_TO_APPEND); FileSystem fs = cluster.getFileSystem(); byte[] expected = generateBytes(FILE_SIZE); // write a one-byte file FSDataOutputStream stm = writeFile(fs, fileToAppend, (short) numDataNodes, BLOCK_SIZE); stm.write(expected, 0, 1); stm.close(); // open the file again for append stm = fs.append(fileToAppend); int mid = expected.length - 1; stm.write(expected, 1, mid - 1); stm.hflush(); /* * wait till token used in stm expires */ Token<BlockTokenIdentifier> token = DFSTestUtil.getBlockToken(stm); while (!SecurityTestUtil.isBlockTokenExpired(token)) { try { Thread.sleep(10); } catch (InterruptedException ignored) { } } // remove a datanode to force re-establishing pipeline cluster.stopDataNode(0); // append the rest of the file stm.write(expected, mid, expected.length - mid); stm.close(); // check if append is successful FSDataInputStream in5 = fs.open(fileToAppend); assertTrue(checkFile1(in5, expected)); } finally { if (cluster != null) { cluster.shutdown(); } } }
private int getTrueReplication(MiniDFSCluster cluster, ExtendedBlock block) throws IOException { int count = 0; for (DataNode dn : cluster.getDataNodes()) { if (DataNodeTestUtils.getFSDataset(dn) .getStoredBlock(block.getBlockPoolId(), block.getBlockId()) != null) { count++; } } return count; }
/** * Test whether we can delay the deletion of unknown blocks in DataNode's first several block * reports. */ @Test public void testPendingDeleteUnknownBlocks() throws Exception { final int fileNum = 5; // 5 files final Path[] files = new Path[fileNum]; final DataNodeProperties[] dnprops = new DataNodeProperties[REPLICATION]; // create a group of files, each file contains 1 block for (int i = 0; i < fileNum; i++) { files[i] = new Path("/file" + i); DFSTestUtil.createFile(dfs, files[i], BLOCKSIZE, REPLICATION, i); } // wait until all DataNodes have replicas waitForReplication(); for (int i = REPLICATION - 1; i >= 0; i--) { dnprops[i] = cluster.stopDataNode(i); } Thread.sleep(2000); // delete 2 files, we still have 3 files remaining so that we can cover // every DN storage for (int i = 0; i < 2; i++) { dfs.delete(files[i], true); } // restart NameNode cluster.restartNameNode(false); InvalidateBlocks invalidateBlocks = (InvalidateBlocks) Whitebox.getInternalState( cluster.getNamesystem().getBlockManager(), "invalidateBlocks"); InvalidateBlocks mockIb = Mockito.spy(invalidateBlocks); Mockito.doReturn(1L).when(mockIb).getInvalidationDelay(); Whitebox.setInternalState( cluster.getNamesystem().getBlockManager(), "invalidateBlocks", mockIb); Assert.assertEquals(0L, cluster.getNamesystem().getPendingDeletionBlocks()); // restart DataNodes for (int i = 0; i < REPLICATION; i++) { cluster.restartDataNode(dnprops[i], true); } cluster.waitActive(); for (int i = 0; i < REPLICATION; i++) { DataNodeTestUtils.triggerBlockReport(cluster.getDataNodes().get(i)); } Thread.sleep(2000); // make sure we have received block reports by checking the total block # Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal()); Assert.assertEquals(4, cluster.getNamesystem().getPendingDeletionBlocks()); cluster.restartNameNode(true); Thread.sleep(6000); Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal()); Assert.assertEquals(0, cluster.getNamesystem().getPendingDeletionBlocks()); }
/** Test that DataStorage and BlockPoolSliceStorage remove the failed volume after failure. */ @Test(timeout = 150000) public void testFailedVolumeBeingRemovedFromDataNode() throws InterruptedException, IOException, TimeoutException { // The test uses DataNodeTestUtils#injectDataDirFailure() to simulate // volume failures which is currently not supported on Windows. assumeTrue(!Path.WINDOWS); Path file1 = new Path("/test1"); DFSTestUtil.createFile(fs, file1, 1024, (short) 2, 1L); DFSTestUtil.waitReplication(fs, file1, (short) 2); File dn0Vol1 = new File(dataDir, "data" + (2 * 0 + 1)); DataNodeTestUtils.injectDataDirFailure(dn0Vol1); DataNode dn0 = cluster.getDataNodes().get(0); checkDiskErrorSync(dn0); // Verify dn0Vol1 has been completely removed from DN0. // 1. dn0Vol1 is removed from DataStorage. DataStorage storage = dn0.getStorage(); assertEquals(1, storage.getNumStorageDirs()); for (int i = 0; i < storage.getNumStorageDirs(); i++) { Storage.StorageDirectory sd = storage.getStorageDir(i); assertFalse(sd.getRoot().getAbsolutePath().startsWith(dn0Vol1.getAbsolutePath())); } final String bpid = cluster.getNamesystem().getBlockPoolId(); BlockPoolSliceStorage bpsStorage = storage.getBPStorage(bpid); assertEquals(1, bpsStorage.getNumStorageDirs()); for (int i = 0; i < bpsStorage.getNumStorageDirs(); i++) { Storage.StorageDirectory sd = bpsStorage.getStorageDir(i); assertFalse(sd.getRoot().getAbsolutePath().startsWith(dn0Vol1.getAbsolutePath())); } // 2. dn0Vol1 is removed from FsDataset FsDatasetSpi<? extends FsVolumeSpi> data = dn0.getFSDataset(); try (FsDatasetSpi.FsVolumeReferences vols = data.getFsVolumeReferences()) { for (FsVolumeSpi volume : vols) { assertNotEquals( new File(volume.getBasePath()).getAbsoluteFile(), dn0Vol1.getAbsoluteFile()); } } // 3. all blocks on dn0Vol1 have been removed. for (ReplicaInfo replica : FsDatasetTestUtil.getReplicas(data, bpid)) { assertNotNull(replica.getVolume()); assertNotEquals( new File(replica.getVolume().getBasePath()).getAbsoluteFile(), dn0Vol1.getAbsoluteFile()); } // 4. dn0Vol1 is not in DN0's configuration and dataDirs anymore. String[] dataDirStrs = dn0.getConf().get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY).split(","); assertEquals(1, dataDirStrs.length); assertFalse(dataDirStrs[0].contains(dn0Vol1.getAbsolutePath())); }
/** * Another regression test for HDFS-2742. This tests the following sequence: - DN does a block * report while file is open. This BR contains the block in RBW state. - The block report is * delayed in reaching the standby. - The file is closed. - The standby processes the OP_ADD and * OP_CLOSE operations before the RBW block report arrives. - The standby should not mark the * block as corrupt. */ @Test public void testRBWReportArrivesAfterEdits() throws Exception { final CountDownLatch brFinished = new CountDownLatch(1); DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) { @Override protected Object passThrough(InvocationOnMock invocation) throws Throwable { try { return super.passThrough(invocation); } finally { // inform the test that our block report went through. brFinished.countDown(); } } }; FSDataOutputStream out = fs.create(TEST_FILE_PATH); try { AppendTestUtil.write(out, 0, 10); out.hflush(); DataNode dn = cluster.getDataNodes().get(0); DatanodeProtocolClientSideTranslatorPB spy = DataNodeTestUtils.spyOnBposToNN(dn, nn2); Mockito.doAnswer(delayer) .when(spy) .blockReport( Mockito.<DatanodeRegistration>anyObject(), Mockito.anyString(), Mockito.<StorageBlockReport[]>anyObject()); dn.scheduleAllBlockReport(0); delayer.waitForCall(); } finally { IOUtils.closeStream(out); } cluster.transitionToStandby(0); cluster.transitionToActive(1); delayer.proceed(); brFinished.await(); // Verify that no replicas are marked corrupt, and that the // file is readable from the failed-over standby. BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager()); BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager()); assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks()); assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks()); DFSTestUtil.readFile(fs, TEST_FILE_PATH); }
@Test public void testSendOOBToPeers() throws Exception { DataNode dn = cluster.getDataNodes().get(0); DataXceiverServer spyXserver = Mockito.spy(dn.getXferServer()); NullPointerException npe = new NullPointerException(); Mockito.doThrow(npe).when(spyXserver).sendOOBToPeers(); dn.xserver = spyXserver; try { dn.shutdown(); } catch (Exception e) { fail("DataNode shutdown should not have thrown exception " + e); } }
/** * If ramDiskStorageLimit is >=0, then RAM_DISK capacity is artificially capped. If * ramDiskStorageLimit < 0 then it is ignored. */ protected final void startUpCluster( final int numDataNodes, final StorageType[] storageTypes, final long ramDiskStorageLimit, final boolean useSCR) throws IOException { Configuration conf = new Configuration(); conf.setLong(DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); conf.setInt( DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC, LAZY_WRITE_FILE_SCRUBBER_INTERVAL_SEC); conf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, HEARTBEAT_INTERVAL_SEC); conf.setInt(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, HEARTBEAT_RECHECK_INTERVAL_MSEC); conf.setInt(DFS_DATANODE_LAZY_WRITER_INTERVAL_SEC, LAZY_WRITER_INTERVAL_SEC); if (useSCR) { conf.setBoolean(DFS_CLIENT_READ_SHORTCIRCUIT_KEY, useSCR); conf.set(DFS_CLIENT_CONTEXT, UUID.randomUUID().toString()); sockDir = new TemporarySocketDirectory(); conf.set( DFS_DOMAIN_SOCKET_PATH_KEY, new File(sockDir.getDir(), this.getClass().getSimpleName() + "._PORT.sock") .getAbsolutePath()); conf.set( DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY, UserGroupInformation.getCurrentUser().getShortUserName()); } cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(numDataNodes) .storageTypes( storageTypes != null ? storageTypes : new StorageType[] {DEFAULT, DEFAULT}) .build(); fs = cluster.getFileSystem(); client = fs.getClient(); // Artificially cap the storage capacity of the RAM_DISK volume. if (ramDiskStorageLimit >= 0) { List<? extends FsVolumeSpi> volumes = cluster.getDataNodes().get(0).getFSDataset().getVolumes(); for (FsVolumeSpi volume : volumes) { if (volume.getStorageType() == RAM_DISK) { ((FsVolumeImpl) volume).setCapacityForTesting(ramDiskStorageLimit); } } } LOG.info("Cluster startup complete"); }
/** * Test that, when a block is re-opened for append, the related datanode messages are correctly * queued by the SBN because they have future states and genstamps. */ @Test public void testQueueingWithAppend() throws Exception { int numQueued = 0; int numDN = cluster.getDataNodes().size(); FSDataOutputStream out = fs.create(TEST_FILE_PATH); try { AppendTestUtil.write(out, 0, 10); out.hflush(); // Opening the file will report RBW replicas, but will be // queued on the StandbyNode. numQueued += numDN; // RBW messages } finally { IOUtils.closeStream(out); numQueued += numDN; // blockReceived messages } cluster.triggerBlockReports(); numQueued += numDN; try { out = fs.append(TEST_FILE_PATH); AppendTestUtil.write(out, 10, 10); // RBW replicas once it's opened for append numQueued += numDN; } finally { IOUtils.closeStream(out); numQueued += numDN; // blockReceived } cluster.triggerBlockReports(); numQueued += numDN; assertEquals( numQueued, cluster.getNameNode(1).getNamesystem().getPendingDataNodeMessageCount()); cluster.transitionToStandby(0); cluster.transitionToActive(1); // Verify that no replicas are marked corrupt, and that the // file is readable from the failed-over standby. BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager()); BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager()); assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks()); assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks()); AppendTestUtil.check(fs, TEST_FILE_PATH, 20); }
/** * Test DataNode stops when the number of failed volumes exceeds * dfs.datanode.failed.volumes.tolerated . */ @Test(timeout = 10000) public void testDataNodeShutdownAfterNumFailedVolumeExceedsTolerated() throws InterruptedException, IOException { // make both data directories to fail on dn0 final File dn0Vol1 = new File(dataDir, "data" + (2 * 0 + 1)); final File dn0Vol2 = new File(dataDir, "data" + (2 * 0 + 2)); DataNodeTestUtils.injectDataDirFailure(dn0Vol1, dn0Vol2); DataNode dn0 = cluster.getDataNodes().get(0); checkDiskErrorSync(dn0); // DN0 should stop after the number of failure disks exceed tolerated // value (1). assertFalse(dn0.shouldRun()); }
@Test public void testRead() throws Exception { MiniDFSCluster cluster = null; int numDataNodes = 2; Configuration conf = getConf(numDataNodes); try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build(); cluster.waitActive(); assertEquals(numDataNodes, cluster.getDataNodes().size()); doTestRead(conf, cluster, false); } finally { if (cluster != null) { cluster.shutdown(); } } }
// test writeToRbw @Test public void testWriteToRbw() throws Exception { MiniDFSCluster cluster = new MiniDFSCluster.Builder(new HdfsConfiguration()).build(); try { cluster.waitActive(); DataNode dn = cluster.getDataNodes().get(0); FsDatasetImpl dataSet = (FsDatasetImpl) DataNodeTestUtils.getFSDataset(dn); // set up replicasMap String bpid = cluster.getNamesystem().getBlockPoolId(); ExtendedBlock[] blocks = setup(bpid, dataSet); // test writeToRbw testWriteToRbw(dataSet, blocks); } finally { cluster.shutdown(); } }
/** Check that the permissions of the local DN directories are as expected. */ @Test public void testLocalDirs() throws Exception { Configuration conf = new Configuration(); final String permStr = conf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_KEY); FsPermission expected = new FsPermission(permStr); // Check permissions on directories in 'dfs.datanode.data.dir' FileSystem localFS = FileSystem.getLocal(conf); for (DataNode dn : cluster.getDataNodes()) { for (FsVolumeSpi v : dn.getFSDataset().getVolumes()) { String dir = v.getBasePath(); Path dataDir = new Path(dir); FsPermission actual = localFS.getFileStatus(dataDir).getPermission(); assertEquals( "Permission for dir: " + dataDir + ", is " + actual + ", while expected is " + expected, expected, actual); } } }
/** Test to check that a DN goes down when all its volumes have failed. */ @Test public void testShutdown() throws Exception { if (System.getProperty("os.name").startsWith("Windows")) { /** * This test depends on OS not allowing file creations on a directory that does not have write * permissions for the user. Apparently it is not the case on Windows (at least under Cygwin), * and possibly AIX. This is disabled on Windows. */ return; } // Bring up two more datanodes cluster.startDataNodes(conf, 2, true, null, null); cluster.waitActive(); final int dnIndex = 0; String bpid = cluster.getNamesystem().getBlockPoolId(); File storageDir = cluster.getInstanceStorageDir(dnIndex, 0); File dir1 = MiniDFSCluster.getRbwDir(storageDir, bpid); storageDir = cluster.getInstanceStorageDir(dnIndex, 1); File dir2 = MiniDFSCluster.getRbwDir(storageDir, bpid); try { // make the data directory of the first datanode to be readonly assertTrue("Couldn't chmod local vol", dir1.setReadOnly()); assertTrue("Couldn't chmod local vol", dir2.setReadOnly()); // create files and make sure that first datanode will be down DataNode dn = cluster.getDataNodes().get(dnIndex); for (int i = 0; dn.isDatanodeUp(); i++) { Path fileName = new Path("/test.txt" + i); DFSTestUtil.createFile(fs, fileName, 1024, (short) 2, 1L); DFSTestUtil.waitReplication(fs, fileName, (short) 2); fs.delete(fileName, true); } } finally { // restore its old permission FileUtil.setWritable(dir1, true); FileUtil.setWritable(dir2, true); } }
/** Test changing the number of volumes does not impact the disk failure tolerance. */ @Test public void testTolerateVolumeFailuresAfterAddingMoreVolumes() throws InterruptedException, ReconfigurationException, IOException { final File dn0Vol1 = new File(dataDir, "data" + (2 * 0 + 1)); final File dn0Vol2 = new File(dataDir, "data" + (2 * 0 + 2)); final File dn0VolNew = new File(dataDir, "data_new"); final DataNode dn0 = cluster.getDataNodes().get(0); final String oldDataDirs = dn0.getConf().get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY); // Add a new volume to DN0 dn0.reconfigurePropertyImpl( DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, oldDataDirs + "," + dn0VolNew.getAbsolutePath()); // Fail dn0Vol1 first and hot swap it. DataNodeTestUtils.injectDataDirFailure(dn0Vol1); checkDiskErrorSync(dn0); assertTrue(dn0.shouldRun()); // Fail dn0Vol2, now dn0 should stop, because we only tolerate 1 disk failure. DataNodeTestUtils.injectDataDirFailure(dn0Vol2); checkDiskErrorSync(dn0); assertFalse(dn0.shouldRun()); }
protected final void triggerBlockReport() throws IOException, InterruptedException { // Trigger block report to NN DataNodeTestUtils.triggerBlockReport(cluster.getDataNodes().get(0)); Thread.sleep(10 * 1000); }
/** * Test if {@link FSNamesystem#handleHeartbeat} can pick up replication and/or invalidate requests * and observes the max limit */ @Test public void testHeartbeat() throws Exception { final Configuration conf = new HdfsConfiguration(); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); try { cluster.waitActive(); final FSNamesystem namesystem = cluster.getNamesystem(); final HeartbeatManager hm = namesystem.getBlockManager().getDatanodeManager().getHeartbeatManager(); final String poolId = namesystem.getBlockPoolId(); final DatanodeRegistration nodeReg = DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId); final DatanodeDescriptor dd = NameNodeAdapter.getDatanode(namesystem, nodeReg); final String storageID = DatanodeStorage.generateUuid(); dd.updateStorage(new DatanodeStorage(storageID)); final int REMAINING_BLOCKS = 1; final int MAX_REPLICATE_LIMIT = conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 2); final int MAX_INVALIDATE_LIMIT = DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_DEFAULT; final int MAX_INVALIDATE_BLOCKS = 2 * MAX_INVALIDATE_LIMIT + REMAINING_BLOCKS; final int MAX_REPLICATE_BLOCKS = 2 * MAX_REPLICATE_LIMIT + REMAINING_BLOCKS; final DatanodeStorageInfo[] ONE_TARGET = {dd.getStorageInfo(storageID)}; try { namesystem.writeLock(); synchronized (hm) { for (int i = 0; i < MAX_REPLICATE_BLOCKS; i++) { dd.addBlockToBeReplicated( new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP), ONE_TARGET); } DatanodeCommand[] cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands(); assertEquals(1, cmds.length); assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction()); assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand) cmds[0]).getBlocks().length); ArrayList<Block> blockList = new ArrayList<Block>(MAX_INVALIDATE_BLOCKS); for (int i = 0; i < MAX_INVALIDATE_BLOCKS; i++) { blockList.add(new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP)); } dd.addBlocksToBeInvalidated(blockList); cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands(); assertEquals(2, cmds.length); assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction()); assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand) cmds[0]).getBlocks().length); assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction()); assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand) cmds[1]).getBlocks().length); cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands(); assertEquals(2, cmds.length); assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction()); assertEquals(REMAINING_BLOCKS, ((BlockCommand) cmds[0]).getBlocks().length); assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction()); assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand) cmds[1]).getBlocks().length); cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands(); assertEquals(1, cmds.length); assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[0].getAction()); assertEquals(REMAINING_BLOCKS, ((BlockCommand) cmds[0]).getBlocks().length); cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem).getCommands(); assertEquals(0, cmds.length); } } finally { namesystem.writeUnlock(); } } finally { cluster.shutdown(); } }
/** * Test if {@link FSNamesystem#handleHeartbeat} correctly selects data node targets for block * recovery. */ @Test public void testHeartbeatBlockRecovery() throws Exception { final Configuration conf = new HdfsConfiguration(); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); try { cluster.waitActive(); final FSNamesystem namesystem = cluster.getNamesystem(); final HeartbeatManager hm = namesystem.getBlockManager().getDatanodeManager().getHeartbeatManager(); final String poolId = namesystem.getBlockPoolId(); final DatanodeRegistration nodeReg1 = DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId); final DatanodeDescriptor dd1 = NameNodeAdapter.getDatanode(namesystem, nodeReg1); dd1.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid())); final DatanodeRegistration nodeReg2 = DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(1), poolId); final DatanodeDescriptor dd2 = NameNodeAdapter.getDatanode(namesystem, nodeReg2); dd2.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid())); final DatanodeRegistration nodeReg3 = DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(2), poolId); final DatanodeDescriptor dd3 = NameNodeAdapter.getDatanode(namesystem, nodeReg3); dd3.updateStorage(new DatanodeStorage(DatanodeStorage.generateUuid())); try { namesystem.writeLock(); synchronized (hm) { NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem); NameNodeAdapter.sendHeartBeat(nodeReg2, dd2, namesystem); NameNodeAdapter.sendHeartBeat(nodeReg3, dd3, namesystem); // Test with all alive nodes. DFSTestUtil.resetLastUpdatesWithOffset(dd1, 0); DFSTestUtil.resetLastUpdatesWithOffset(dd2, 0); DFSTestUtil.resetLastUpdatesWithOffset(dd3, 0); final DatanodeStorageInfo[] storages = { dd1.getStorageInfos()[0], dd2.getStorageInfos()[0], dd3.getStorageInfos()[0] }; BlockInfo blockInfo = new BlockInfoContiguous( new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3); blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages); dd1.addBlockToBeRecovered(blockInfo); DatanodeCommand[] cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands(); assertEquals(1, cmds.length); assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction()); BlockRecoveryCommand recoveryCommand = (BlockRecoveryCommand) cmds[0]; assertEquals(1, recoveryCommand.getRecoveringBlocks().size()); DatanodeInfo[] recoveringNodes = recoveryCommand .getRecoveringBlocks() .toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0] .getLocations(); assertEquals(3, recoveringNodes.length); assertEquals(recoveringNodes[0], dd1); assertEquals(recoveringNodes[1], dd2); assertEquals(recoveringNodes[2], dd3); // Test with one stale node. DFSTestUtil.resetLastUpdatesWithOffset(dd1, 0); // More than the default stale interval of 30 seconds. DFSTestUtil.resetLastUpdatesWithOffset(dd2, -40 * 1000); DFSTestUtil.resetLastUpdatesWithOffset(dd3, 0); blockInfo = new BlockInfoContiguous( new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3); blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages); dd1.addBlockToBeRecovered(blockInfo); cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands(); assertEquals(1, cmds.length); assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction()); recoveryCommand = (BlockRecoveryCommand) cmds[0]; assertEquals(1, recoveryCommand.getRecoveringBlocks().size()); recoveringNodes = recoveryCommand .getRecoveringBlocks() .toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0] .getLocations(); assertEquals(2, recoveringNodes.length); // dd2 is skipped. assertEquals(recoveringNodes[0], dd1); assertEquals(recoveringNodes[1], dd3); // Test with all stale node. DFSTestUtil.resetLastUpdatesWithOffset(dd1, -60 * 1000); // More than the default stale interval of 30 seconds. DFSTestUtil.resetLastUpdatesWithOffset(dd2, -40 * 1000); DFSTestUtil.resetLastUpdatesWithOffset(dd3, -80 * 1000); blockInfo = new BlockInfoContiguous( new Block(0, 0, GenerationStamp.LAST_RESERVED_STAMP), (short) 3); blockInfo.convertToBlockUnderConstruction(BlockUCState.UNDER_RECOVERY, storages); dd1.addBlockToBeRecovered(blockInfo); cmds = NameNodeAdapter.sendHeartBeat(nodeReg1, dd1, namesystem).getCommands(); assertEquals(1, cmds.length); assertEquals(DatanodeProtocol.DNA_RECOVERBLOCK, cmds[0].getAction()); recoveryCommand = (BlockRecoveryCommand) cmds[0]; assertEquals(1, recoveryCommand.getRecoveringBlocks().size()); recoveringNodes = recoveryCommand .getRecoveringBlocks() .toArray(new BlockRecoveryCommand.RecoveringBlock[0])[0] .getLocations(); // Only dd1 is included since it heart beated and hence its not stale // when the list of recovery blocks is constructed. assertEquals(3, recoveringNodes.length); assertEquals(recoveringNodes[0], dd1); assertEquals(recoveringNodes[1], dd2); assertEquals(recoveringNodes[2], dd3); } } finally { namesystem.writeUnlock(); } } finally { cluster.shutdown(); } }
/* * Verify the number of blocks and files are correct after volume failure, * and that we can replicate to both datanodes even after a single volume * failure if the configuration parameter allows this. */ @Test public void testVolumeFailure() throws Exception { System.out.println("Data dir: is " + dataDir.getPath()); // Data dir structure is dataDir/data[1-4]/[current,tmp...] // data1,2 is for datanode 1, data2,3 - datanode2 String filename = "/test.txt"; Path filePath = new Path(filename); // we use only small number of blocks to avoid creating subdirs in the data dir.. int filesize = block_size * blocks_num; DFSTestUtil.createFile(fs, filePath, filesize, repl, 1L); DFSTestUtil.waitReplication(fs, filePath, repl); System.out.println("file " + filename + "(size " + filesize + ") is created and replicated"); // fail the volume // delete/make non-writable one of the directories (failed volume) data_fail = new File(dataDir, "data3"); failedDir = MiniDFSCluster.getFinalizedDir(dataDir, cluster.getNamesystem().getBlockPoolId()); if (failedDir.exists() && // !FileUtil.fullyDelete(failedDir) !deteteBlocks(failedDir)) { throw new IOException("Could not delete hdfs directory '" + failedDir + "'"); } data_fail.setReadOnly(); failedDir.setReadOnly(); System.out.println("Deleteing " + failedDir.getPath() + "; exist=" + failedDir.exists()); // access all the blocks on the "failed" DataNode, // we need to make sure that the "failed" volume is being accessed - // and that will cause failure, blocks removal, "emergency" block report triggerFailure(filename, filesize); // make sure a block report is sent DataNode dn = cluster.getDataNodes().get(1); // corresponds to dir data3 String bpid = cluster.getNamesystem().getBlockPoolId(); DatanodeRegistration dnR = dn.getDNRegistrationForBP(bpid); Map<DatanodeStorage, BlockListAsLongs> perVolumeBlockLists = dn.getFSDataset().getBlockReports(bpid); // Send block report StorageBlockReport[] reports = new StorageBlockReport[perVolumeBlockLists.size()]; int reportIndex = 0; for (Map.Entry<DatanodeStorage, BlockListAsLongs> kvPair : perVolumeBlockLists.entrySet()) { DatanodeStorage dnStorage = kvPair.getKey(); BlockListAsLongs blockList = kvPair.getValue(); reports[reportIndex++] = new StorageBlockReport(dnStorage, blockList); } cluster.getNameNodeRpc().blockReport(dnR, bpid, reports, null); // verify number of blocks and files... verify(filename, filesize); // create another file (with one volume failed). System.out.println("creating file test1.txt"); Path fileName1 = new Path("/test1.txt"); DFSTestUtil.createFile(fs, fileName1, filesize, repl, 1L); // should be able to replicate to both nodes (2 DN, repl=2) DFSTestUtil.waitReplication(fs, fileName1, repl); System.out.println("file " + fileName1.getName() + " is created and replicated"); }
/** * Test that when there is a failure replicating a block the temporary and meta files are cleaned * up and subsequent replication succeeds. */ @Test public void testReplicationError() throws Exception { // create a file of replication factor of 1 final Path fileName = new Path("/test.txt"); final int fileLen = 1; DFSTestUtil.createFile(fs, fileName, 1, (short) 1, 1L); DFSTestUtil.waitReplication(fs, fileName, (short) 1); // get the block belonged to the created file LocatedBlocks blocks = NameNodeAdapter.getBlockLocations( cluster.getNameNode(), fileName.toString(), 0, (long) fileLen); assertEquals("Should only find 1 block", blocks.locatedBlockCount(), 1); LocatedBlock block = blocks.get(0); // bring up a second datanode cluster.startDataNodes(conf, 1, true, null, null); cluster.waitActive(); final int sndNode = 1; DataNode datanode = cluster.getDataNodes().get(sndNode); // replicate the block to the second datanode InetSocketAddress target = datanode.getXferAddress(); Socket s = new Socket(target.getAddress(), target.getPort()); // write the header. DataOutputStream out = new DataOutputStream(s.getOutputStream()); DataChecksum checksum = DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, 512); new Sender(out) .writeBlock( block.getBlock(), StorageType.DEFAULT, BlockTokenSecretManager.DUMMY_TOKEN, "", new DatanodeInfo[0], new StorageType[0], null, BlockConstructionStage.PIPELINE_SETUP_CREATE, 1, 0L, 0L, 0L, checksum, CachingStrategy.newDefaultStrategy(), false); out.flush(); // close the connection before sending the content of the block out.close(); // the temporary block & meta files should be deleted String bpid = cluster.getNamesystem().getBlockPoolId(); File storageDir = cluster.getInstanceStorageDir(sndNode, 0); File dir1 = MiniDFSCluster.getRbwDir(storageDir, bpid); storageDir = cluster.getInstanceStorageDir(sndNode, 1); File dir2 = MiniDFSCluster.getRbwDir(storageDir, bpid); while (dir1.listFiles().length != 0 || dir2.listFiles().length != 0) { Thread.sleep(100); } // then increase the file's replication factor fs.setReplication(fileName, (short) 2); // replication should succeed DFSTestUtil.waitReplication(fs, fileName, (short) 1); // clean up the file fs.delete(fileName, false); }
/** * Test that individual volume failures do not cause DNs to fail, that all volumes failed on a * single datanode do cause it to fail, and that the capacities and liveliness is adjusted * correctly in the NN. */ @Test public void testSuccessiveVolumeFailures() throws Exception { assumeTrue(!System.getProperty("os.name").startsWith("Windows")); // Bring up two more datanodes cluster.startDataNodes(conf, 2, true, null, null); cluster.waitActive(); /* * Calculate the total capacity of all the datanodes. Sleep for * three seconds to be sure the datanodes have had a chance to * heartbeat their capacities. */ Thread.sleep(WAIT_FOR_HEARTBEATS); final DatanodeManager dm = cluster.getNamesystem().getBlockManager().getDatanodeManager(); final long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm); long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0); File dn1Vol1 = new File(dataDir, "data" + (2 * 0 + 1)); File dn2Vol1 = new File(dataDir, "data" + (2 * 1 + 1)); File dn3Vol1 = new File(dataDir, "data" + (2 * 2 + 1)); File dn3Vol2 = new File(dataDir, "data" + (2 * 2 + 2)); /* * Make the 1st volume directories on the first two datanodes * non-accessible. We don't make all three 1st volume directories * readonly since that would cause the entire pipeline to * fail. The client does not retry failed nodes even though * perhaps they could succeed because just a single volume failed. */ assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn1Vol1, false)); assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn2Vol1, false)); /* * Create file1 and wait for 3 replicas (ie all DNs can still * store a block). Then assert that all DNs are up, despite the * volume failures. */ Path file1 = new Path("/test1"); DFSTestUtil.createFile(fs, file1, 1024, (short) 3, 1L); DFSTestUtil.waitReplication(fs, file1, (short) 3); ArrayList<DataNode> dns = cluster.getDataNodes(); assertTrue("DN1 should be up", dns.get(0).isDatanodeUp()); assertTrue("DN2 should be up", dns.get(1).isDatanodeUp()); assertTrue("DN3 should be up", dns.get(2).isDatanodeUp()); /* * The metrics should confirm the volume failures. */ assertCounter("VolumeFailures", 1L, getMetrics(dns.get(0).getMetrics().name())); assertCounter("VolumeFailures", 1L, getMetrics(dns.get(1).getMetrics().name())); assertCounter("VolumeFailures", 0L, getMetrics(dns.get(2).getMetrics().name())); // Ensure we wait a sufficient amount of time assert (WAIT_FOR_HEARTBEATS * 10) > WAIT_FOR_DEATH; // Eventually the NN should report two volume failures DFSTestUtil.waitForDatanodeStatus( dm, 3, 0, 2, origCapacity - (1 * dnCapacity), WAIT_FOR_HEARTBEATS); /* * Now fail a volume on the third datanode. We should be able to get * three replicas since we've already identified the other failures. */ assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn3Vol1, false)); Path file2 = new Path("/test2"); DFSTestUtil.createFile(fs, file2, 1024, (short) 3, 1L); DFSTestUtil.waitReplication(fs, file2, (short) 3); assertTrue("DN3 should still be up", dns.get(2).isDatanodeUp()); assertCounter("VolumeFailures", 1L, getMetrics(dns.get(2).getMetrics().name())); ArrayList<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); ArrayList<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); dm.fetchDatanodes(live, dead, false); live.clear(); dead.clear(); dm.fetchDatanodes(live, dead, false); assertEquals("DN3 should have 1 failed volume", 1, live.get(2).getVolumeFailures()); /* * Once the datanodes have a chance to heartbeat their new capacity the * total capacity should be down by three volumes (assuming the host * did not grow or shrink the data volume while the test was running). */ dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0); DFSTestUtil.waitForDatanodeStatus( dm, 3, 0, 3, origCapacity - (3 * dnCapacity), WAIT_FOR_HEARTBEATS); /* * Now fail the 2nd volume on the 3rd datanode. All its volumes * are now failed and so it should report two volume failures * and that it's no longer up. Only wait for two replicas since * we'll never get a third. */ assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn3Vol2, false)); Path file3 = new Path("/test3"); DFSTestUtil.createFile(fs, file3, 1024, (short) 3, 1L); DFSTestUtil.waitReplication(fs, file3, (short) 2); // The DN should consider itself dead DFSTestUtil.waitForDatanodeDeath(dns.get(2)); // And report two failed volumes assertCounter("VolumeFailures", 2L, getMetrics(dns.get(2).getMetrics().name())); // The NN considers the DN dead DFSTestUtil.waitForDatanodeStatus( dm, 2, 1, 2, origCapacity - (4 * dnCapacity), WAIT_FOR_HEARTBEATS); /* * The datanode never tries to restore the failed volume, even if * it's subsequently repaired, but it should see this volume on * restart, so file creation should be able to succeed after * restoring the data directories and restarting the datanodes. */ assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn1Vol1, true)); assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn2Vol1, true)); assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn3Vol1, true)); assertTrue("Couldn't chmod local vol", FileUtil.setExecutable(dn3Vol2, true)); cluster.restartDataNodes(); cluster.waitActive(); Path file4 = new Path("/test4"); DFSTestUtil.createFile(fs, file4, 1024, (short) 3, 1L); DFSTestUtil.waitReplication(fs, file4, (short) 3); /* * Eventually the capacity should be restored to its original value, * and that the volume failure count should be reported as zero by * both the metrics and the NN. */ DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 0, origCapacity, WAIT_FOR_HEARTBEATS); }
protected void doTestRead(Configuration conf, MiniDFSCluster cluster, boolean isStriped) throws Exception { final int numDataNodes = cluster.getDataNodes().size(); final NameNode nn = cluster.getNameNode(); final NamenodeProtocols nnProto = nn.getRpcServer(); final BlockManager bm = nn.getNamesystem().getBlockManager(); final BlockTokenSecretManager sm = bm.getBlockTokenSecretManager(); // set a short token lifetime (1 second) initially SecurityTestUtil.setBlockTokenLifetime(sm, 1000L); Path fileToRead = new Path(FILE_TO_READ); FileSystem fs = cluster.getFileSystem(); byte[] expected = generateBytes(FILE_SIZE); createFile(fs, fileToRead, expected); /* * setup for testing expiration handling of cached tokens */ // read using blockSeekTo(). Acquired tokens are cached in in1 FSDataInputStream in1 = fs.open(fileToRead); assertTrue(checkFile1(in1, expected)); // read using blockSeekTo(). Acquired tokens are cached in in2 FSDataInputStream in2 = fs.open(fileToRead); assertTrue(checkFile1(in2, expected)); // read using fetchBlockByteRange(). Acquired tokens are cached in in3 FSDataInputStream in3 = fs.open(fileToRead); assertTrue(checkFile2(in3, expected)); /* * testing READ interface on DN using a BlockReader */ DFSClient client = null; try { client = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf); } finally { if (client != null) client.close(); } List<LocatedBlock> locatedBlocks = nnProto.getBlockLocations(FILE_TO_READ, 0, FILE_SIZE).getLocatedBlocks(); LocatedBlock lblock = locatedBlocks.get(0); // first block // verify token is not expired assertFalse(isBlockTokenExpired(lblock)); // read with valid token, should succeed tryRead(conf, lblock, true); /* * wait till myToken and all cached tokens in in1, in2 and in3 expire */ while (!isBlockTokenExpired(lblock)) { try { Thread.sleep(10); } catch (InterruptedException ignored) { } } /* * continue testing READ interface on DN using a BlockReader */ // verify token is expired assertTrue(isBlockTokenExpired(lblock)); // read should fail tryRead(conf, lblock, false); // use a valid new token bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.READ); // read should succeed tryRead(conf, lblock, true); // use a token with wrong blockID long rightId = lblock.getBlock().getBlockId(); long wrongId = rightId + 1; lblock.getBlock().setBlockId(wrongId); bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.READ); lblock.getBlock().setBlockId(rightId); // read should fail tryRead(conf, lblock, false); // use a token with wrong access modes bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.WRITE); // read should fail tryRead(conf, lblock, false); // set a long token lifetime for future tokens SecurityTestUtil.setBlockTokenLifetime(sm, 600 * 1000L); /* * testing that when cached tokens are expired, DFSClient will re-fetch * tokens transparently for READ. */ // confirm all tokens cached in in1 are expired by now List<LocatedBlock> lblocks = DFSTestUtil.getAllBlocks(in1); for (LocatedBlock blk : lblocks) { assertTrue(isBlockTokenExpired(blk)); } // verify blockSeekTo() is able to re-fetch token transparently in1.seek(0); assertTrue(checkFile1(in1, expected)); // confirm all tokens cached in in2 are expired by now List<LocatedBlock> lblocks2 = DFSTestUtil.getAllBlocks(in2); for (LocatedBlock blk : lblocks2) { assertTrue(isBlockTokenExpired(blk)); } // verify blockSeekTo() is able to re-fetch token transparently (testing // via another interface method) if (isStriped) { // striped block doesn't support seekToNewSource in2.seek(0); } else { assertTrue(in2.seekToNewSource(0)); } assertTrue(checkFile1(in2, expected)); // confirm all tokens cached in in3 are expired by now List<LocatedBlock> lblocks3 = DFSTestUtil.getAllBlocks(in3); for (LocatedBlock blk : lblocks3) { assertTrue(isBlockTokenExpired(blk)); } // verify fetchBlockByteRange() is able to re-fetch token transparently assertTrue(checkFile2(in3, expected)); /* * testing that after datanodes are restarted on the same ports, cached * tokens should still work and there is no need to fetch new tokens from * namenode. This test should run while namenode is down (to make sure no * new tokens can be fetched from namenode). */ // restart datanodes on the same ports that they currently use assertTrue(cluster.restartDataNodes(true)); cluster.waitActive(); assertEquals(numDataNodes, cluster.getDataNodes().size()); cluster.shutdownNameNode(0); // confirm tokens cached in in1 are still valid lblocks = DFSTestUtil.getAllBlocks(in1); for (LocatedBlock blk : lblocks) { assertFalse(isBlockTokenExpired(blk)); } // verify blockSeekTo() still works (forced to use cached tokens) in1.seek(0); assertTrue(checkFile1(in1, expected)); // confirm tokens cached in in2 are still valid lblocks2 = DFSTestUtil.getAllBlocks(in2); for (LocatedBlock blk : lblocks2) { assertFalse(isBlockTokenExpired(blk)); } // verify blockSeekTo() still works (forced to use cached tokens) if (isStriped) { in2.seek(0); } else { in2.seekToNewSource(0); } assertTrue(checkFile1(in2, expected)); // confirm tokens cached in in3 are still valid lblocks3 = DFSTestUtil.getAllBlocks(in3); for (LocatedBlock blk : lblocks3) { assertFalse(isBlockTokenExpired(blk)); } // verify fetchBlockByteRange() still works (forced to use cached tokens) assertTrue(checkFile2(in3, expected)); /* * testing that when namenode is restarted, cached tokens should still * work and there is no need to fetch new tokens from namenode. Like the * previous test, this test should also run while namenode is down. The * setup for this test depends on the previous test. */ // restart the namenode and then shut it down for test cluster.restartNameNode(0); cluster.shutdownNameNode(0); // verify blockSeekTo() still works (forced to use cached tokens) in1.seek(0); assertTrue(checkFile1(in1, expected)); // verify again blockSeekTo() still works (forced to use cached tokens) if (isStriped) { in2.seek(0); } else { in2.seekToNewSource(0); } assertTrue(checkFile1(in2, expected)); // verify fetchBlockByteRange() still works (forced to use cached tokens) assertTrue(checkFile2(in3, expected)); /* * testing that after both namenode and datanodes got restarted (namenode * first, followed by datanodes), DFSClient can't access DN without * re-fetching tokens and is able to re-fetch tokens transparently. The * setup of this test depends on the previous test. */ // restore the cluster and restart the datanodes for test cluster.restartNameNode(0); assertTrue(cluster.restartDataNodes(true)); cluster.waitActive(); assertEquals(numDataNodes, cluster.getDataNodes().size()); // shutdown namenode so that DFSClient can't get new tokens from namenode cluster.shutdownNameNode(0); // verify blockSeekTo() fails (cached tokens become invalid) in1.seek(0); assertFalse(checkFile1(in1, expected)); // verify fetchBlockByteRange() fails (cached tokens become invalid) assertFalse(checkFile2(in3, expected)); // restart the namenode to allow DFSClient to re-fetch tokens cluster.restartNameNode(0); // verify blockSeekTo() works again (by transparently re-fetching // tokens from namenode) in1.seek(0); assertTrue(checkFile1(in1, expected)); if (isStriped) { in2.seek(0); } else { in2.seekToNewSource(0); } assertTrue(checkFile1(in2, expected)); // verify fetchBlockByteRange() works again (by transparently // re-fetching tokens from namenode) assertTrue(checkFile2(in3, expected)); /* * testing that when datanodes are restarted on different ports, DFSClient * is able to re-fetch tokens transparently to connect to them */ // restart datanodes on newly assigned ports assertTrue(cluster.restartDataNodes(false)); cluster.waitActive(); assertEquals(numDataNodes, cluster.getDataNodes().size()); // verify blockSeekTo() is able to re-fetch token transparently in1.seek(0); assertTrue(checkFile1(in1, expected)); // verify blockSeekTo() is able to re-fetch token transparently if (isStriped) { in2.seek(0); } else { in2.seekToNewSource(0); } assertTrue(checkFile1(in2, expected)); // verify fetchBlockByteRange() is able to re-fetch token transparently assertTrue(checkFile2(in3, expected)); }