/** * Verify that the NameNode is able to still use <tt>READ_ONLY_SHARED</tt> replicas even when the * single NORMAL replica is offline (and the effective replication count is 0). */ @Test public void testNormalReplicaOffline() throws Exception { // Stop the datanode hosting the NORMAL replica cluster.stopDataNode(normalDataNode.getXferAddr()); // Force NameNode to detect that the datanode is down BlockManagerTestUtil.noticeDeadDatanode(cluster.getNameNode(), normalDataNode.getXferAddr()); // The live replica count should now be zero (since the NORMAL replica is offline) NumberReplicas numberReplicas = blockManager.countNodes(block); assertThat(numberReplicas.liveReplicas(), is(0)); // The block should be reported as under-replicated BlockManagerTestUtil.updateState(blockManager); assertThat(blockManager.getUnderReplicatedBlocksCount(), is(1L)); // The BlockManager should be able to heal the replication count back to 1 // by triggering an inter-datanode replication from one of the READ_ONLY_SHARED replicas BlockManagerTestUtil.computeAllPendingWork(blockManager); DFSTestUtil.waitForReplication(cluster, extendedBlock, 1, 1, 0); // There should now be 2 *locations* for the block, and 1 *replica* assertThat(getLocatedBlock().getLocations().length, is(2)); validateNumberReplicas(1); }
private void corruptBlock( MiniDFSCluster cluster, FileSystem fs, final Path fileName, int dnIndex, Block block) throws IOException { // corrupt the block on datanode dnIndex // the indexes change once the nodes are restarted. // But the datadirectory will not change assertTrue(cluster.corruptReplica(block.getBlockName(), dnIndex)); DataNodeProperties dnProps = cluster.stopDataNode(0); // Each datanode has multiple data dirs, check each for (int dn = dnIndex * 2; dn < dnIndex * 2 + 2; dn++) { File dataDir = new File(MiniDFSCluster.getBaseDirectory() + "data"); File scanLogFile = new File( dataDir, "data" + (dn + 1) + MiniDFSCluster.FINALIZED_DIR_NAME + "dncp_block_verification.log.curr"); if (scanLogFile.exists()) { // wait for one minute for deletion to succeed; for (int i = 0; !scanLogFile.delete(); i++) { assertTrue("Could not delete log file in one minute", i < 60); try { Thread.sleep(1000); } catch (InterruptedException ignored) { } } } } // restart the detained so the corrupt replica will be detected cluster.restartDataNode(dnProps); }
/** * The corrupt block has to be removed when the number of valid replicas matches replication * factor for the file. In this test, the above condition is achieved by increasing the number of * good replicas by replicating on a new Datanode. The test strategy : Bring up Cluster with 3 * DataNodes Create a file of replication factor 3 Corrupt one replica of a block of the file * Verify that there are still 2 good replicas and 1 corrupt replica (corrupt replica should not * be removed since number of good replicas (2) is less than replication factor (3)) Start a new * data node Verify that the a new replica is created and corrupt replica is removed. */ @Test public void testByAddingAnExtraDataNode() throws IOException { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L); conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build(); FileSystem fs = cluster.getFileSystem(); final FSNamesystem namesystem = cluster.getNamesystem(); DataNodeProperties dnPropsFourth = cluster.stopDataNode(3); try { final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short) 3); Block block = DFSTestUtil.getFirstBlock(fs, fileName); corruptBlock(cluster, fs, fileName, 0, block); DFSTestUtil.waitReplication(fs, fileName, (short) 2); assertEquals(2, namesystem.blockManager.countNodes(block).liveReplicas()); assertEquals(1, namesystem.blockManager.countNodes(block).corruptReplicas()); cluster.restartDataNode(dnPropsFourth); DFSTestUtil.waitReplication(fs, fileName, (short) 3); assertEquals(3, namesystem.blockManager.countNodes(block).liveReplicas()); assertEquals(0, namesystem.blockManager.countNodes(block).corruptReplicas()); } finally { cluster.shutdown(); } }
/** * testing that APPEND operation can handle token expiration when re-establishing pipeline is * needed */ @Test public void testAppend() throws Exception { MiniDFSCluster cluster = null; int numDataNodes = 2; Configuration conf = getConf(numDataNodes); try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build(); cluster.waitActive(); assertEquals(numDataNodes, cluster.getDataNodes().size()); final NameNode nn = cluster.getNameNode(); final BlockManager bm = nn.getNamesystem().getBlockManager(); final BlockTokenSecretManager sm = bm.getBlockTokenSecretManager(); // set a short token lifetime (1 second) SecurityTestUtil.setBlockTokenLifetime(sm, 1000L); Path fileToAppend = new Path(FILE_TO_APPEND); FileSystem fs = cluster.getFileSystem(); byte[] expected = generateBytes(FILE_SIZE); // write a one-byte file FSDataOutputStream stm = writeFile(fs, fileToAppend, (short) numDataNodes, BLOCK_SIZE); stm.write(expected, 0, 1); stm.close(); // open the file again for append stm = fs.append(fileToAppend); int mid = expected.length - 1; stm.write(expected, 1, mid - 1); stm.hflush(); /* * wait till token used in stm expires */ Token<BlockTokenIdentifier> token = DFSTestUtil.getBlockToken(stm); while (!SecurityTestUtil.isBlockTokenExpired(token)) { try { Thread.sleep(10); } catch (InterruptedException ignored) { } } // remove a datanode to force re-establishing pipeline cluster.stopDataNode(0); // append the rest of the file stm.write(expected, mid, expected.length - mid); stm.close(); // check if append is successful FSDataInputStream in5 = fs.open(fileToAppend); assertTrue(checkFile1(in5, expected)); } finally { if (cluster != null) { cluster.shutdown(); } } }
/** * Test whether we can delay the deletion of unknown blocks in DataNode's first several block * reports. */ @Test public void testPendingDeleteUnknownBlocks() throws Exception { final int fileNum = 5; // 5 files final Path[] files = new Path[fileNum]; final DataNodeProperties[] dnprops = new DataNodeProperties[REPLICATION]; // create a group of files, each file contains 1 block for (int i = 0; i < fileNum; i++) { files[i] = new Path("/file" + i); DFSTestUtil.createFile(dfs, files[i], BLOCKSIZE, REPLICATION, i); } // wait until all DataNodes have replicas waitForReplication(); for (int i = REPLICATION - 1; i >= 0; i--) { dnprops[i] = cluster.stopDataNode(i); } Thread.sleep(2000); // delete 2 files, we still have 3 files remaining so that we can cover // every DN storage for (int i = 0; i < 2; i++) { dfs.delete(files[i], true); } // restart NameNode cluster.restartNameNode(false); InvalidateBlocks invalidateBlocks = (InvalidateBlocks) Whitebox.getInternalState( cluster.getNamesystem().getBlockManager(), "invalidateBlocks"); InvalidateBlocks mockIb = Mockito.spy(invalidateBlocks); Mockito.doReturn(1L).when(mockIb).getInvalidationDelay(); Whitebox.setInternalState( cluster.getNamesystem().getBlockManager(), "invalidateBlocks", mockIb); Assert.assertEquals(0L, cluster.getNamesystem().getPendingDeletionBlocks()); // restart DataNodes for (int i = 0; i < REPLICATION; i++) { cluster.restartDataNode(dnprops[i], true); } cluster.waitActive(); for (int i = 0; i < REPLICATION; i++) { DataNodeTestUtils.triggerBlockReport(cluster.getDataNodes().get(i)); } Thread.sleep(2000); // make sure we have received block reports by checking the total block # Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal()); Assert.assertEquals(4, cluster.getNamesystem().getPendingDeletionBlocks()); cluster.restartNameNode(true); Thread.sleep(6000); Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal()); Assert.assertEquals(0, cluster.getNamesystem().getPendingDeletionBlocks()); }