/** check if DFS can handle corrupted blocks properly */ @Test public void testFileCorruption() throws Exception { MiniDFSCluster cluster = null; DFSTestUtil util = new DFSTestUtil.Builder().setName("TestFileCorruption").setNumFiles(20).build(); try { Configuration conf = new HdfsConfiguration(); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); FileSystem fs = cluster.getFileSystem(); util.createFiles(fs, "/srcdat"); // Now deliberately remove the blocks File storageDir = cluster.getInstanceStorageDir(2, 0); String bpid = cluster.getNamesystem().getBlockPoolId(); File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); assertTrue("data directory does not exist", data_dir.exists()); File[] blocks = data_dir.listFiles(); assertTrue("Blocks do not exist in data-dir", (blocks != null) && (blocks.length > 0)); for (int idx = 0; idx < blocks.length; idx++) { if (!blocks[idx].getName().startsWith("blk_")) { continue; } System.out.println("Deliberately removing file " + blocks[idx].getName()); assertTrue("Cannot remove file.", blocks[idx].delete()); } assertTrue("Corrupted replicas not handled properly.", util.checkFiles(fs, "/srcdat")); util.cleanup(fs, "/srcdat"); } finally { if (cluster != null) { cluster.shutdown(); } } }
/** * Test that an append with no locations fails with an exception showing insufficient locations. */ @Test(timeout = 60000) public void testAppendInsufficientLocations() throws Exception { Configuration conf = new Configuration(); // lower heartbeat interval for fast recognition of DN conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1000); conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); conf.setInt(HdfsClientConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, 3000); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build(); DistributedFileSystem fileSystem = null; try { // create a file with replication 3 fileSystem = cluster.getFileSystem(); Path f = new Path("/testAppend"); FSDataOutputStream create = fileSystem.create(f, (short) 2); create.write("/testAppend".getBytes()); create.close(); // Check for replications DFSTestUtil.waitReplication(fileSystem, f, (short) 2); // Shut down all DNs that have the last block location for the file LocatedBlocks lbs = fileSystem.dfs.getNamenode().getBlockLocations("/testAppend", 0, Long.MAX_VALUE); List<DataNode> dnsOfCluster = cluster.getDataNodes(); DatanodeInfo[] dnsWithLocations = lbs.getLastLocatedBlock().getLocations(); for (DataNode dn : dnsOfCluster) { for (DatanodeInfo loc : dnsWithLocations) { if (dn.getDatanodeId().equals(loc)) { dn.shutdown(); DFSTestUtil.waitForDatanodeDeath(dn); } } } // Wait till 0 replication is recognized DFSTestUtil.waitReplication(fileSystem, f, (short) 0); // Append to the file, at this state there are 3 live DNs but none of them // have the block. try { fileSystem.append(f); fail("Append should fail because insufficient locations"); } catch (IOException e) { LOG.info("Expected exception: ", e); } FSDirectory dir = cluster.getNamesystem().getFSDirectory(); final INodeFile inode = INodeFile.valueOf(dir.getINode("/testAppend"), "/testAppend"); assertTrue("File should remain closed", !inode.isUnderConstruction()); } finally { if (null != fileSystem) { fileSystem.close(); } cluster.shutdown(); } }
@Test public void testRegistrationWithDifferentSoftwareVersionsDuringUpgrade() throws Exception { Configuration conf = new HdfsConfiguration(); conf.set(DFSConfigKeys.DFS_DATANODE_MIN_SUPPORTED_NAMENODE_VERSION_KEY, "1.0.0"); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); NamenodeProtocols rpcServer = cluster.getNameNodeRpc(); long nnCTime = cluster.getNamesystem().getFSImage().getStorage().getCTime(); StorageInfo mockStorageInfo = mock(StorageInfo.class); doReturn(nnCTime).when(mockStorageInfo).getCTime(); DatanodeRegistration mockDnReg = mock(DatanodeRegistration.class); doReturn(HdfsConstants.LAYOUT_VERSION).when(mockDnReg).getVersion(); doReturn("fake-storage-id").when(mockDnReg).getStorageID(); doReturn(mockStorageInfo).when(mockDnReg).getStorageInfo(); // Should succeed when software versions are the same and CTimes are the // same. doReturn(VersionInfo.getVersion()).when(mockDnReg).getSoftwareVersion(); rpcServer.registerDatanode(mockDnReg); // Should succeed when software versions are the same and CTimes are // different. doReturn(nnCTime + 1).when(mockStorageInfo).getCTime(); rpcServer.registerDatanode(mockDnReg); // Should fail when software version of DN is different from NN and CTimes // are different. doReturn(VersionInfo.getVersion() + ".1").when(mockDnReg).getSoftwareVersion(); try { rpcServer.registerDatanode(mockDnReg); fail( "Should not have been able to register DN with different software" + " versions and CTimes"); } catch (IncorrectVersionException ive) { GenericTestUtils.assertExceptionContains("does not match CTime of NN", ive); LOG.info("Got expected exception", ive); } } finally { if (cluster != null) { cluster.shutdown(); } } }
@Test public void testRegistrationWithDifferentSoftwareVersions() throws Exception { Configuration conf = new HdfsConfiguration(); conf.set(DFSConfigKeys.DFS_DATANODE_MIN_SUPPORTED_NAMENODE_VERSION_KEY, "3.0.0"); conf.set(DFSConfigKeys.DFS_NAMENODE_MIN_SUPPORTED_DATANODE_VERSION_KEY, "3.0.0"); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); NamenodeProtocols rpcServer = cluster.getNameNodeRpc(); long nnCTime = cluster.getNamesystem().getFSImage().getStorage().getCTime(); StorageInfo mockStorageInfo = mock(StorageInfo.class); doReturn(nnCTime).when(mockStorageInfo).getCTime(); DatanodeRegistration mockDnReg = mock(DatanodeRegistration.class); doReturn(HdfsConstants.LAYOUT_VERSION).when(mockDnReg).getVersion(); doReturn("fake-storage-id").when(mockDnReg).getStorageID(); doReturn(mockStorageInfo).when(mockDnReg).getStorageInfo(); // Should succeed when software versions are the same. doReturn("3.0.0").when(mockDnReg).getSoftwareVersion(); rpcServer.registerDatanode(mockDnReg); // Should succeed when software version of DN is above minimum required by NN. doReturn("4.0.0").when(mockDnReg).getSoftwareVersion(); rpcServer.registerDatanode(mockDnReg); // Should fail when software version of DN is below minimum required by NN. doReturn("2.0.0").when(mockDnReg).getSoftwareVersion(); try { rpcServer.registerDatanode(mockDnReg); fail("Should not have been able to register DN with too-low version."); } catch (IncorrectVersionException ive) { GenericTestUtils.assertExceptionContains("The reported DataNode version is too low", ive); LOG.info("Got expected exception", ive); } } finally { if (cluster != null) { cluster.shutdown(); } } }
/* * Wait up to 20s for the given block to be replicated across * the requested number of racks, with the requested number of * replicas, and the requested number of replicas still needed. */ public static void waitForReplication( MiniDFSCluster cluster, ExtendedBlock b, int racks, int replicas, int neededReplicas) throws IOException, TimeoutException, InterruptedException { int curRacks = 0; int curReplicas = 0; int curNeededReplicas = 0; int count = 0; final int ATTEMPTS = 20; do { Thread.sleep(1000); int[] r = BlockManagerTestUtil.getReplicaInfo(cluster.getNamesystem(), b.getLocalBlock()); curRacks = r[0]; curReplicas = r[1]; curNeededReplicas = r[2]; count++; } while ((curRacks != racks || curReplicas != replicas || curNeededReplicas != neededReplicas) && count < ATTEMPTS); if (count == ATTEMPTS) { throw new TimeoutException( "Timed out waiting for replication." + " Needed replicas = " + neededReplicas + " Cur needed replicas = " + curNeededReplicas + " Replicas = " + replicas + " Cur replicas = " + curReplicas + " Racks = " + racks + " Cur racks = " + curRacks); } }
/** * The following test first creates a file with a few blocks. It randomly truncates the replica of * the last block stored in each datanode. Finally, it triggers block synchronization to * synchronize all stored block. */ public void testBlockSynchronization() throws Exception { final int ORG_FILE_SIZE = 3000; Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(5).build(); cluster.waitActive(); // create a file DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem(); String filestr = "/foo"; Path filepath = new Path(filestr); DFSTestUtil.createFile(dfs, filepath, ORG_FILE_SIZE, REPLICATION_NUM, 0L); assertTrue(dfs.exists(filepath)); DFSTestUtil.waitReplication(dfs, filepath, REPLICATION_NUM); // get block info for the last block LocatedBlock locatedblock = TestInterDatanodeProtocol.getLastLocatedBlock(dfs.dfs.getNamenode(), filestr); DatanodeInfo[] datanodeinfos = locatedblock.getLocations(); assertEquals(REPLICATION_NUM, datanodeinfos.length); // connect to data nodes DataNode[] datanodes = new DataNode[REPLICATION_NUM]; for (int i = 0; i < REPLICATION_NUM; i++) { datanodes[i] = cluster.getDataNode(datanodeinfos[i].getIpcPort()); assertTrue(datanodes[i] != null); } // verify Block Info ExtendedBlock lastblock = locatedblock.getBlock(); DataNode.LOG.info("newblocks=" + lastblock); for (int i = 0; i < REPLICATION_NUM; i++) { checkMetaInfo(lastblock, datanodes[i]); } DataNode.LOG.info("dfs.dfs.clientName=" + dfs.dfs.clientName); cluster.getNameNodeRpc().append(filestr, dfs.dfs.clientName); // expire lease to trigger block recovery. waitLeaseRecovery(cluster); Block[] updatedmetainfo = new Block[REPLICATION_NUM]; long oldSize = lastblock.getNumBytes(); lastblock = TestInterDatanodeProtocol.getLastLocatedBlock(dfs.dfs.getNamenode(), filestr).getBlock(); long currentGS = lastblock.getGenerationStamp(); for (int i = 0; i < REPLICATION_NUM; i++) { updatedmetainfo[i] = DataNodeTestUtils.getFSDataset(datanodes[i]) .getStoredBlock(lastblock.getBlockPoolId(), lastblock.getBlockId()); assertEquals(lastblock.getBlockId(), updatedmetainfo[i].getBlockId()); assertEquals(oldSize, updatedmetainfo[i].getNumBytes()); assertEquals(currentGS, updatedmetainfo[i].getGenerationStamp()); } // verify that lease recovery does not occur when namenode is in safemode System.out.println("Testing that lease recovery cannot happen during safemode."); filestr = "/foo.safemode"; filepath = new Path(filestr); dfs.create(filepath, (short) 1); cluster.getNameNodeRpc().setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_ENTER); assertTrue(dfs.dfs.exists(filestr)); DFSTestUtil.waitReplication(dfs, filepath, (short) 1); waitLeaseRecovery(cluster); // verify that we still cannot recover the lease LeaseManager lm = NameNodeAdapter.getLeaseManager(cluster.getNamesystem()); assertTrue("Found " + lm.countLease() + " lease, expected 1", lm.countLease() == 1); cluster.getNameNodeRpc().setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE); } finally { if (cluster != null) { cluster.shutdown(); } } }
/** * Test the case that a replica is reported corrupt while it is not in blocksMap. Make sure that * ArrayIndexOutOfBounds does not thrown. See Hadoop-4351. * * <p>TODO HOPS This test fails as it tries to remove a non-existing replica. Calling * findAndMarkBlockAsCorrupt from a DataNode that does not store any replica for this specific * block will lead to a tuple did not exist exception. The reason for this is that * BlockManager.removeStoredBlock is called with a node that does not store a replica and hence * the delete will not be able to succeed during commit. */ @Test public void testArrayOutOfBoundsException() throws Exception { MiniDFSCluster cluster = null; try { Configuration conf = new HdfsConfiguration(); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); cluster.waitActive(); FileSystem fs = cluster.getFileSystem(); final Path FILE_PATH = new Path("/tmp.txt"); final long FILE_LEN = 1L; DFSTestUtil.createFile(fs, FILE_PATH, FILE_LEN, (short) 2, 1L); // get the block final String bpid = cluster.getNamesystem().getBlockPoolId(); File storageDir = cluster.getInstanceStorageDir(0, 0); File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); assertTrue("Data directory does not exist", dataDir.exists()); ExtendedBlock blk = getBlock(bpid, dataDir); if (blk == null) { storageDir = cluster.getInstanceStorageDir(0, 1); dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); blk = getBlock(bpid, dataDir); } assertFalse( "Data directory does not contain any blocks or there was an " + "IO error", blk == null); // start a third datanode cluster.startDataNodes(conf, 1, true, null, null); ArrayList<DataNode> datanodes = cluster.getDataNodes(); assertEquals(datanodes.size(), 3); DataNode dataNode = datanodes.get(2); // report corrupted block by the third datanode DatanodeRegistration dnR = DataNodeTestUtils.getDNRegistrationForBP(dataNode, blk.getBlockPoolId()); // Get the storage id of one of the storages on the datanode String storageId = cluster .getNamesystem() .getBlockManager() .getDatanodeManager() .getDatanode(dataNode.getDatanodeId()) .getStorageInfos()[0] .getStorageID(); cluster .getNamesystem() .getBlockManager() .findAndMarkBlockAsCorrupt(blk, new DatanodeInfo(dnR), storageId, "some test reason"); // open the file fs.open(FILE_PATH); // clean up fs.delete(FILE_PATH, false); } finally { if (cluster != null) { cluster.shutdown(); } } }