/** * Verify that the NameNode is able to still use <tt>READ_ONLY_SHARED</tt> replicas even when the * single NORMAL replica is offline (and the effective replication count is 0). */ @Test public void testNormalReplicaOffline() throws Exception { // Stop the datanode hosting the NORMAL replica cluster.stopDataNode(normalDataNode.getXferAddr()); // Force NameNode to detect that the datanode is down BlockManagerTestUtil.noticeDeadDatanode(cluster.getNameNode(), normalDataNode.getXferAddr()); // The live replica count should now be zero (since the NORMAL replica is offline) NumberReplicas numberReplicas = blockManager.countNodes(block); assertThat(numberReplicas.liveReplicas(), is(0)); // The block should be reported as under-replicated BlockManagerTestUtil.updateState(blockManager); assertThat(blockManager.getUnderReplicatedBlocksCount(), is(1L)); // The BlockManager should be able to heal the replication count back to 1 // by triggering an inter-datanode replication from one of the READ_ONLY_SHARED replicas BlockManagerTestUtil.computeAllPendingWork(blockManager); DFSTestUtil.waitForReplication(cluster, extendedBlock, 1, 1, 0); // There should now be 2 *locations* for the block, and 1 *replica* assertThat(getLocatedBlock().getLocations().length, is(2)); validateNumberReplicas(1); }
static DatanodeInfo chooseDatanode( final NameNode namenode, final String path, final HttpOpParam.Op op, final long openOffset, final long blocksize, Configuration conf) throws IOException { final BlockManager bm = namenode.getNamesystem().getBlockManager(); if (op == PutOpParam.Op.CREATE) { // choose a datanode near to client final DatanodeDescriptor clientNode = bm.getDatanodeManager().getDatanodeByHost(getRemoteAddress()); if (clientNode != null) { final DatanodeDescriptor[] datanodes = bm.getBlockPlacementPolicy().chooseTarget(path, 1, clientNode, null, blocksize); if (datanodes.length > 0) { return datanodes[0]; } } } else if (op == GetOpParam.Op.OPEN || op == GetOpParam.Op.GETFILECHECKSUM || op == PostOpParam.Op.APPEND) { // choose a datanode containing a replica final NamenodeProtocols np = namenode.getRpcServer(); final HdfsFileStatus status = np.getFileInfo(path); if (status == null) { throw new FileNotFoundException("File " + path + " not found."); } final long len = status.getLen(); if (op == GetOpParam.Op.OPEN) { if (openOffset < 0L || (openOffset >= len && len > 0)) { throw new IOException( "Offset=" + openOffset + " out of the range [0, " + len + "); " + op + ", path=" + path); } } if (len > 0) { final long offset = op == GetOpParam.Op.OPEN ? openOffset : len - 1; final LocatedBlocks locations = np.getBlockLocations(path, offset, 1); final int count = locations.locatedBlockCount(); if (count > 0) { return JspHelper.bestNode(locations.get(0).getLocations(), false, conf); } } } return (DatanodeDescriptor) bm.getDatanodeManager().getNetworkTopology().chooseRandom(NodeBase.ROOT); }
private void validateNumberReplicas(int expectedReplicas) throws IOException { NumberReplicas numberReplicas = blockManager.countNodes(block); assertThat(numberReplicas.liveReplicas(), is(expectedReplicas)); assertThat(numberReplicas.excessReplicas(), is(0)); assertThat(numberReplicas.corruptReplicas(), is(0)); assertThat(numberReplicas.decommissionedReplicas(), is(0)); assertThat(numberReplicas.replicasOnStaleNodes(), is(0)); BlockManagerTestUtil.updateState(blockManager); assertThat(blockManager.getUnderReplicatedBlocksCount(), is(0L)); assertThat(blockManager.getExcessBlocksCount(), is(0L)); }
/** * Setup a {@link MiniDFSCluster}. Create a block with both {@link State#NORMAL} and {@link * State#READ_ONLY_SHARED} replicas. */ @Before public void setup() throws IOException, InterruptedException { conf = new HdfsConfiguration(); SimulatedFSDataset.setFactory(conf); Configuration[] overlays = new Configuration[NUM_DATANODES]; for (int i = 0; i < overlays.length; i++) { overlays[i] = new Configuration(); if (i == RO_NODE_INDEX) { overlays[i].setEnum( SimulatedFSDataset.CONFIG_PROPERTY_STATE, i == RO_NODE_INDEX ? READ_ONLY_SHARED : NORMAL); } } cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(NUM_DATANODES) .dataNodeConfOverlays(overlays) .build(); fs = cluster.getFileSystem(); blockManager = cluster.getNameNode().getNamesystem().getBlockManager(); datanodeManager = blockManager.getDatanodeManager(); client = new DFSClient( new InetSocketAddress("localhost", cluster.getNameNodePort()), cluster.getConfiguration(0)); for (int i = 0; i < NUM_DATANODES; i++) { DataNode dataNode = cluster.getDataNodes().get(i); validateStorageState( BlockManagerTestUtil.getStorageReportsForDatanode( datanodeManager.getDatanode(dataNode.getDatanodeId())), i == RO_NODE_INDEX ? READ_ONLY_SHARED : NORMAL); } // Create a 1 block file DFSTestUtil.createFile(fs, PATH, BLOCK_SIZE, BLOCK_SIZE, BLOCK_SIZE, (short) 1, seed); LocatedBlock locatedBlock = getLocatedBlock(); extendedBlock = locatedBlock.getBlock(); block = extendedBlock.getLocalBlock(); assertThat(locatedBlock.getLocations().length, is(1)); normalDataNode = locatedBlock.getLocations()[0]; readOnlyDataNode = datanodeManager.getDatanode(cluster.getDataNodes().get(RO_NODE_INDEX).getDatanodeId()); assertThat(normalDataNode, is(not(readOnlyDataNode))); validateNumberReplicas(1); // Inject the block into the datanode with READ_ONLY_SHARED storage cluster.injectBlocks(0, RO_NODE_INDEX, Collections.singleton(block)); // There should now be 2 *locations* for the block // Must wait until the NameNode has processed the block report for the injected blocks waitForLocations(2); }
/** * Verify that corrupt <tt>READ_ONLY_SHARED</tt> replicas aren't counted towards the corrupt * replicas total. */ @Test public void testReadOnlyReplicaCorrupt() throws Exception { // "Corrupt" a READ_ONLY_SHARED replica by reporting it as a bad replica client.reportBadBlocks( new LocatedBlock[] { new LocatedBlock(extendedBlock, new DatanodeInfo[] {readOnlyDataNode}) }); // There should now be only 1 *location* for the block as the READ_ONLY_SHARED is corrupt waitForLocations(1); // However, the corrupt READ_ONLY_SHARED replica should *not* affect the overall corrupt // replicas count NumberReplicas numberReplicas = blockManager.countNodes(block); assertThat(numberReplicas.corruptReplicas(), is(0)); }
/** * Test case that reduces replication of a file with a lot of blocks and then fails over right * after those blocks enter the DN invalidation queues on the active. Ensures that fencing is * correct and no replicas are lost. */ @Test public void testNNClearsCommandsOnFailoverWithReplChanges() throws Exception { // Make lots of blocks to increase chances of triggering a bug. DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30 * SMALL_BLOCK, (short) 1, 1L); banner("rolling NN1's edit log, forcing catch-up"); HATestUtil.waitForStandbyToCatchUp(nn1, nn2); // Get some new replicas reported so that NN2 now considers // them over-replicated and schedules some more deletions nn1.getRpcServer().setReplication(TEST_FILE, (short) 2); while (BlockManagerTestUtil.getComputedDatanodeWork(nn1.getNamesystem().getBlockManager()) > 0) { LOG.info("Getting more replication work computed"); } BlockManager bm1 = nn1.getNamesystem().getBlockManager(); while (bm1.getPendingReplicationBlocksCount() > 0) { BlockManagerTestUtil.updateState(bm1); cluster.triggerHeartbeats(); Thread.sleep(1000); } banner("triggering BRs"); cluster.triggerBlockReports(); nn1.getRpcServer().setReplication(TEST_FILE, (short) 1); banner("computing invalidation on nn1"); BlockManagerTestUtil.computeInvalidationWork(nn1.getNamesystem().getBlockManager()); doMetasave(nn1); banner("computing invalidation on nn2"); BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager()); doMetasave(nn2); // Dump some info for debugging purposes. banner("Metadata immediately before failover"); doMetasave(nn2); // Transition nn2 to active even though nn1 still thinks it's active banner("Failing to NN2 but let NN1 continue to think it's active"); NameNodeAdapter.abortEditLogs(nn1); NameNodeAdapter.enterSafeMode(nn1, false); BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager()); cluster.transitionToActive(1); // Check that the standby picked up the replication change. assertEquals(1, nn2.getRpcServer().getFileInfo(TEST_FILE).getReplication()); // Dump some info for debugging purposes. banner("Metadata immediately after failover"); doMetasave(nn2); banner("Triggering heartbeats and block reports so that fencing is completed"); cluster.triggerHeartbeats(); cluster.triggerBlockReports(); banner("Metadata after nodes have all block-reported"); doMetasave(nn2); // The block should no longer be postponed. assertEquals(0, nn2.getNamesystem().getPostponedMisreplicatedBlocks()); // Wait for NN2 to enact its deletions (replication monitor has to run, etc) BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager()); HATestUtil.waitForNNToIssueDeletions(nn2); cluster.triggerHeartbeats(); HATestUtil.waitForDNDeletions(cluster); cluster.triggerDeletionReports(); assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks()); assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks()); banner("Making sure the file is still readable"); FileSystem fs2 = cluster.getFileSystem(1); DFSTestUtil.readFile(fs2, TEST_FILE_PATH); }