/** * Regression test for HDFS-2742. The issue in this bug was: - DN does a block report while file * is open. This BR contains the block in RBW state. - Standby queues the RBW state in * PendingDatanodeMessages - Standby processes edit logs during failover. Before fixing this bug, * it was mistakenly applying the RBW reported state after the block had been completed, causing * the block to get marked corrupt. Instead, we should now be applying the RBW message on OP_ADD, * and then the FINALIZED message on OP_CLOSE. */ @Test public void testBlockReportsWhileFileBeingWritten() throws Exception { FSDataOutputStream out = fs.create(TEST_FILE_PATH); try { AppendTestUtil.write(out, 0, 10); out.hflush(); // Block report will include the RBW replica, but will be // queued on the StandbyNode. cluster.triggerBlockReports(); } finally { IOUtils.closeStream(out); } cluster.transitionToStandby(0); cluster.transitionToActive(1); // Verify that no replicas are marked corrupt, and that the // file is readable from the failed-over standby. BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager()); BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager()); assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks()); assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks()); DFSTestUtil.readFile(fs, TEST_FILE_PATH); }
private void checkForCorruptOpenFiles(FileStatus file, List<FileStatus> corruptFiles) throws IOException { String filePath = file.getPath().toUri().getPath(); if (file.isDir()) { for (FileStatus fileStatus : nn.namesystem.dir.getListing(filePath)) { checkForCorruptOpenFiles(fileStatus, corruptFiles); } } else { LeaseManager.Lease lease = nn.getNamesystem().leaseManager.getLeaseByPath(filePath); // Condition: // 1. lease has expired hard limit // 2. the file is open for write // 3. the last block has 0 locations if (lease != null && lease.expiredHardLimit()) { LocatedBlocks blocks = nn.getNamesystem().getBlockLocations(filePath, 0, file.getLen()); List<LocatedBlock> locatedBlockList = blocks.getLocatedBlocks(); LocatedBlock lastBlock = locatedBlockList.get(locatedBlockList.size() - 1); if (blocks.isUnderConstruction() && lastBlock.getLocations().length == 0) { corruptFiles.add(file); } } } }
private void doMetasave(NameNode nn2) { nn2.getNamesystem().writeLock(); try { PrintWriter pw = new PrintWriter(System.err); nn2.getNamesystem().getBlockManager().metaSave(pw); pw.flush(); } finally { nn2.getNamesystem().writeUnlock(); } }
@Test public void testDnFencing() throws Exception { // Create a file with replication level 3. DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30 * SMALL_BLOCK, (short) 3, 1L); ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, TEST_FILE_PATH); // Drop its replication count to 1, so it becomes over-replicated. // Then compute the invalidation of the extra blocks and trigger // heartbeats so the invalidations are flushed to the DNs. nn1.getRpcServer().setReplication(TEST_FILE, (short) 1); BlockManagerTestUtil.computeInvalidationWork(nn1.getNamesystem().getBlockManager()); cluster.triggerHeartbeats(); // Transition nn2 to active even though nn1 still thinks it's active. banner("Failing to NN2 but let NN1 continue to think it's active"); NameNodeAdapter.abortEditLogs(nn1); NameNodeAdapter.enterSafeMode(nn1, false); cluster.transitionToActive(1); // Check that the standby picked up the replication change. assertEquals(1, nn2.getRpcServer().getFileInfo(TEST_FILE).getReplication()); // Dump some info for debugging purposes. banner("NN2 Metadata immediately after failover"); doMetasave(nn2); // Even though NN2 considers the blocks over-replicated, it should // post-pone the block invalidation because the DNs are still "stale". assertEquals(30, nn2.getNamesystem().getPostponedMisreplicatedBlocks()); banner("Triggering heartbeats and block reports so that fencing is completed"); cluster.triggerHeartbeats(); cluster.triggerBlockReports(); banner("Metadata after nodes have all block-reported"); doMetasave(nn2); // The blocks should no longer be postponed. assertEquals(0, nn2.getNamesystem().getPostponedMisreplicatedBlocks()); // Wait for NN2 to enact its deletions (replication monitor has to run, etc) BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager()); cluster.triggerHeartbeats(); HATestUtil.waitForDNDeletions(cluster); cluster.triggerDeletionReports(); assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks()); assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks()); banner("Making sure the file is still readable"); FileSystem fs2 = cluster.getFileSystem(1); DFSTestUtil.readFile(fs2, TEST_FILE_PATH); banner("Waiting for the actual block files to get deleted from DNs."); waitForTrueReplication(cluster, block, 1); }
/** * Another regression test for HDFS-2742. This tests the following sequence: - DN does a block * report while file is open. This BR contains the block in RBW state. - The block report is * delayed in reaching the standby. - The file is closed. - The standby processes the OP_ADD and * OP_CLOSE operations before the RBW block report arrives. - The standby should not mark the * block as corrupt. */ @Test public void testRBWReportArrivesAfterEdits() throws Exception { final CountDownLatch brFinished = new CountDownLatch(1); DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) { @Override protected Object passThrough(InvocationOnMock invocation) throws Throwable { try { return super.passThrough(invocation); } finally { // inform the test that our block report went through. brFinished.countDown(); } } }; FSDataOutputStream out = fs.create(TEST_FILE_PATH); try { AppendTestUtil.write(out, 0, 10); out.hflush(); DataNode dn = cluster.getDataNodes().get(0); DatanodeProtocolClientSideTranslatorPB spy = DataNodeTestUtils.spyOnBposToNN(dn, nn2); Mockito.doAnswer(delayer) .when(spy) .blockReport( Mockito.<DatanodeRegistration>anyObject(), Mockito.anyString(), Mockito.<StorageBlockReport[]>anyObject()); dn.scheduleAllBlockReport(0); delayer.waitForCall(); } finally { IOUtils.closeStream(out); } cluster.transitionToStandby(0); cluster.transitionToActive(1); delayer.proceed(); brFinished.await(); // Verify that no replicas are marked corrupt, and that the // file is readable from the failed-over standby. BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager()); BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager()); assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks()); assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks()); DFSTestUtil.readFile(fs, TEST_FILE_PATH); }
/** * Test that, when a block is re-opened for append, the related datanode messages are correctly * queued by the SBN because they have future states and genstamps. */ @Test public void testQueueingWithAppend() throws Exception { int numQueued = 0; int numDN = cluster.getDataNodes().size(); FSDataOutputStream out = fs.create(TEST_FILE_PATH); try { AppendTestUtil.write(out, 0, 10); out.hflush(); // Opening the file will report RBW replicas, but will be // queued on the StandbyNode. numQueued += numDN; // RBW messages } finally { IOUtils.closeStream(out); numQueued += numDN; // blockReceived messages } cluster.triggerBlockReports(); numQueued += numDN; try { out = fs.append(TEST_FILE_PATH); AppendTestUtil.write(out, 10, 10); // RBW replicas once it's opened for append numQueued += numDN; } finally { IOUtils.closeStream(out); numQueued += numDN; // blockReceived } cluster.triggerBlockReports(); numQueued += numDN; assertEquals( numQueued, cluster.getNameNode(1).getNamesystem().getPendingDataNodeMessageCount()); cluster.transitionToStandby(0); cluster.transitionToActive(1); // Verify that no replicas are marked corrupt, and that the // file is readable from the failed-over standby. BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager()); BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager()); assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks()); assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks()); AppendTestUtil.check(fs, TEST_FILE_PATH, 20); }
/** * The function will verify the token with NameNode if available and will create a * UserGroupInformation. * * <p>Code in this function is copied from JspHelper.getTokenUGI * * @param identifier Delegation token identifier * @param password Delegation token password * @param kind the kind of token * @param service the service for this token * @param servletContext Jetty servlet context which contains the NN address * @throws SecurityException Thrown when authentication fails */ private static void verifyToken( byte[] identifier, byte[] password, Text kind, Text service, ServletContext servletContext) { try { Token<DelegationTokenIdentifier> token = new Token<DelegationTokenIdentifier>(identifier, password, kind, service); ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); DataInputStream in = new DataInputStream(buf); DelegationTokenIdentifier id = new DelegationTokenIdentifier(); id.readFields(in); final NameNode nn = NameNodeHttpServer.getNameNodeFromContext(servletContext); if (nn != null) { nn.getNamesystem().verifyToken(id, token.getPassword()); } UserGroupInformation userGroupInformation = id.getUser(); userGroupInformation.addToken(token); LOG.debug( "user " + userGroupInformation.getUserName() + " (" + userGroupInformation.getShortUserName() + ") authenticated"); // re-login if necessary userGroupInformation.checkTGTAndReloginFromKeytab(); } catch (IOException e) { throw new SecurityException("Failed to verify delegation token " + e, e); } }
static { try { FileSystem.setDefaultUri(CONF, "hdfs://localhost:0"); CONF.set("dfs.http.address", "0.0.0.0:0"); NameNode.format(CONF); namenode = new NameNode(CONF); } catch (IOException e) { e.printStackTrace(); throw (RuntimeException) new RuntimeException().initCause(e); } FSNamesystem fsNamesystem = namenode.getNamesystem(); replicator = fsNamesystem.blockManager.replicator; cluster = fsNamesystem.clusterMap; // construct network topology for (int i = 0; i < NUM_OF_DATANODES; i++) { cluster.add(dataNodes[i]); } for (int i = 0; i < NUM_OF_DATANODES; i++) { dataNodes[i].updateHeartbeat( 2 * FSConstants.MIN_BLOCKS_FOR_WRITE * BLOCK_SIZE, 0L, 2 * FSConstants.MIN_BLOCKS_FOR_WRITE * BLOCK_SIZE, 0); } }
/** * @return true if safemode is not running, or if safemode has already initialized the replication * queues */ public static boolean safeModeInitializedReplQueues(NameNode nn) { SafeModeInfo smi = nn.getNamesystem().getSafeModeInfoForTests(); if (smi == null) { return true; } return smi.initializedReplQueues; }
/** @return the number of blocks marked safe by safemode, or -1 if safemode is not running. */ public static int getSafeModeSafeBlocks(NameNode nn) throws IOException { SafeModeInfo smi = nn.getNamesystem().getSafeModeInfoForTests(); if (smi == null) { return -1; } return smi.blockSafe(); }
private static DatanodeInfo chooseDatanode( final NameNode namenode, final String path, final HttpOpParam.Op op, final long openOffset) throws IOException { if (op == GetOpParam.Op.OPEN || op == GetOpParam.Op.GETFILECHECKSUM || op == PostOpParam.Op.APPEND) { final HdfsFileStatus status = namenode.getFileInfo(path); final long len = status.getLen(); if (op == GetOpParam.Op.OPEN && (openOffset < 0L || openOffset >= len)) { throw new IOException( "Offset=" + openOffset + " out of the range [0, " + len + "); " + op + ", path=" + path); } if (len > 0) { final long offset = op == GetOpParam.Op.OPEN ? openOffset : len - 1; final LocatedBlocks locations = namenode.getBlockLocations(path, offset, 1); final int count = locations.locatedBlockCount(); if (count > 0) { return JspHelper.bestNode(locations.get(0)); } } } return namenode.getNamesystem().getRandomDatanode(); }
private void listCorruptOpenFiles() throws IOException { int matchedCorruptFilesCount = 0; // directory representation of path String pathdir = path.endsWith(Path.SEPARATOR) ? path : path + Path.SEPARATOR; FileStatus pathFileStatus = nn.getNamesystem().getFileInfo(pathdir); List<FileStatus> corruptFileStatusList = new ArrayList<FileStatus>(); checkForCorruptOpenFiles(pathFileStatus, corruptFileStatusList); for (FileStatus fileStatus : corruptFileStatusList) { String currentPath = fileStatus.getPath().toString(); if (currentPath.startsWith(pathdir) || currentPath.equals(path)) { matchedCorruptFilesCount++; // print the header before listing first item if (matchedCorruptFilesCount == 1) { out.println("Here are a few files that may be corrupted:"); out.println("==========================================="); } out.println(currentPath); } } out.println(); out.println(buildSummaryResultForListCorruptFiles(matchedCorruptFilesCount, path)); }
static DatanodeInfo chooseDatanode( final NameNode namenode, final String path, final HttpOpParam.Op op, final long openOffset, final long blocksize, Configuration conf) throws IOException { final BlockManager bm = namenode.getNamesystem().getBlockManager(); if (op == PutOpParam.Op.CREATE) { // choose a datanode near to client final DatanodeDescriptor clientNode = bm.getDatanodeManager().getDatanodeByHost(getRemoteAddress()); if (clientNode != null) { final DatanodeDescriptor[] datanodes = bm.getBlockPlacementPolicy().chooseTarget(path, 1, clientNode, null, blocksize); if (datanodes.length > 0) { return datanodes[0]; } } } else if (op == GetOpParam.Op.OPEN || op == GetOpParam.Op.GETFILECHECKSUM || op == PostOpParam.Op.APPEND) { // choose a datanode containing a replica final NamenodeProtocols np = namenode.getRpcServer(); final HdfsFileStatus status = np.getFileInfo(path); if (status == null) { throw new FileNotFoundException("File " + path + " not found."); } final long len = status.getLen(); if (op == GetOpParam.Op.OPEN) { if (openOffset < 0L || (openOffset >= len && len > 0)) { throw new IOException( "Offset=" + openOffset + " out of the range [0, " + len + "); " + op + ", path=" + path); } } if (len > 0) { final long offset = op == GetOpParam.Op.OPEN ? openOffset : len - 1; final LocatedBlocks locations = np.getBlockLocations(path, offset, 1); final int count = locations.locatedBlockCount(); if (count > 0) { return JspHelper.bestNode(locations.get(0).getLocations(), false, conf); } } } return (DatanodeDescriptor) bm.getDatanodeManager().getNetworkTopology().chooseRandom(NodeBase.ROOT); }
/** @return a randomly chosen datanode. */ static DatanodeDescriptor getRandomDatanode(final NameNode namenode) { return (DatanodeDescriptor) namenode .getNamesystem() .getBlockManager() .getDatanodeManager() .getNetworkTopology() .chooseRandom(NodeBase.ROOT); }
/** * testing that APPEND operation can handle token expiration when re-establishing pipeline is * needed */ @Test public void testAppend() throws Exception { MiniDFSCluster cluster = null; int numDataNodes = 2; Configuration conf = getConf(numDataNodes); try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build(); cluster.waitActive(); assertEquals(numDataNodes, cluster.getDataNodes().size()); final NameNode nn = cluster.getNameNode(); final BlockManager bm = nn.getNamesystem().getBlockManager(); final BlockTokenSecretManager sm = bm.getBlockTokenSecretManager(); // set a short token lifetime (1 second) SecurityTestUtil.setBlockTokenLifetime(sm, 1000L); Path fileToAppend = new Path(FILE_TO_APPEND); FileSystem fs = cluster.getFileSystem(); byte[] expected = generateBytes(FILE_SIZE); // write a one-byte file FSDataOutputStream stm = writeFile(fs, fileToAppend, (short) numDataNodes, BLOCK_SIZE); stm.write(expected, 0, 1); stm.close(); // open the file again for append stm = fs.append(fileToAppend); int mid = expected.length - 1; stm.write(expected, 1, mid - 1); stm.hflush(); /* * wait till token used in stm expires */ Token<BlockTokenIdentifier> token = DFSTestUtil.getBlockToken(stm); while (!SecurityTestUtil.isBlockTokenExpired(token)) { try { Thread.sleep(10); } catch (InterruptedException ignored) { } } // remove a datanode to force re-establishing pipeline cluster.stopDataNode(0); // append the rest of the file stm.write(expected, mid, expected.length - mid); stm.close(); // check if append is successful FSDataInputStream in5 = fs.open(fileToAppend); assertTrue(checkFile1(in5, expected)); } finally { if (cluster != null) { cluster.shutdown(); } } }
public NameNodeRpcServer(Configuration conf, NameNode nn) throws IOException { this.nn = nn; this.namesystem = nn.getNamesystem(); this.metrics = NameNode.getNameNodeMetrics(); int handlerCount = conf.getInt(DFS_NAMENODE_HANDLER_COUNT_KEY, DFS_NAMENODE_HANDLER_COUNT_DEFAULT); InetSocketAddress socAddr = nn.getRpcServerAddress(conf); InetSocketAddress dnSocketAddr = nn.getServiceRpcServerAddress(conf); if (dnSocketAddr != null) { int serviceHandlerCount = conf.getInt( DFS_NAMENODE_SERVICE_HANDLER_COUNT_KEY, DFS_NAMENODE_SERVICE_HANDLER_COUNT_DEFAULT); this.serviceRpcServer = RPC.getServer( NamenodeProtocols.class, this, dnSocketAddr.getHostName(), dnSocketAddr.getPort(), serviceHandlerCount, false, conf, namesystem.getDelegationTokenSecretManager()); this.serviceRPCAddress = this.serviceRpcServer.getListenerAddress(); nn.setRpcServiceServerAddress(conf, serviceRPCAddress); } else { serviceRpcServer = null; serviceRPCAddress = null; } this.server = RPC.getServer( NamenodeProtocols.class, this, socAddr.getHostName(), socAddr.getPort(), handlerCount, false, conf, namesystem.getDelegationTokenSecretManager()); // set service-level authorization security policy if (serviceAuthEnabled = conf.getBoolean(CommonConfigurationKeys.HADOOP_SECURITY_AUTHORIZATION, false)) { this.server.refreshServiceAcl(conf, new HDFSPolicyProvider()); if (this.serviceRpcServer != null) { this.serviceRpcServer.refreshServiceAcl(conf, new HDFSPolicyProvider()); } } // The rpc-server port can be ephemeral... ensure we have the correct info this.rpcAddress = this.server.getListenerAddress(); nn.setRpcServerAddress(conf, rpcAddress); }
private void listCorruptFileBlocks() throws IOException { Collection<FSNamesystem.CorruptFileBlockInfo> corruptFiles = namenode.getNamesystem().listCorruptFileBlocks(path, currentCookie); int numCorruptFiles = corruptFiles.size(); String filler; if (numCorruptFiles > 0) { filler = Integer.toString(numCorruptFiles); } else if (currentCookie[0].equals("0")) { filler = "no"; } else { filler = "no more"; } out.println("Cookie:\t" + currentCookie[0]); for (FSNamesystem.CorruptFileBlockInfo c : corruptFiles) { out.println(c.toString()); } out.println("\n\nThe filesystem under path '" + path + "' has " + filler + " CORRUPT files"); out.println(); }
/** * Ensure that the given NameNode marks the specified DataNode as entirely dead/expired. * * @param nn the NameNode to manipulate * @param dnName the name of the DataNode */ public static void noticeDeadDatanode(NameNode nn, String dnName) { FSNamesystem namesystem = nn.getNamesystem(); namesystem.writeLock(); try { DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager(); HeartbeatManager hbm = dnm.getHeartbeatManager(); DatanodeDescriptor[] dnds = hbm.getDatanodes(); DatanodeDescriptor theDND = null; for (DatanodeDescriptor dnd : dnds) { if (dnd.getXferAddr().equals(dnName)) { theDND = dnd; } } Assert.assertNotNull("Could not find DN with name: " + dnName, theDND); synchronized (hbm) { DFSTestUtil.setDatanodeDead(theDND); hbm.heartbeatCheck(); } } finally { namesystem.writeUnlock(); } }
@BeforeClass public static void setupCluster() throws IOException { Configuration conf = new HdfsConfiguration(); final String[] racks = {"/rack1", "/rack1", "/rack1", "/rack2", "/rack2", "/rack2"}; storages = DFSTestUtil.createDatanodeStorageInfos(racks); dataNodes = DFSTestUtil.toDatanodeDescriptor(storages); FileSystem.setDefaultUri(conf, "hdfs://localhost:0"); conf.set(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY, "0.0.0.0:0"); File baseDir = PathUtils.getTestDir(TestReplicationPolicy.class); conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, new File(baseDir, "name").getPath()); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY, true); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_WRITE_KEY, true); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY, true); DFSTestUtil.formatNameNode(conf); namenode = new NameNode(conf); int blockSize = 1024; dnrList = new ArrayList<DatanodeRegistration>(); dnManager = namenode.getNamesystem().getBlockManager().getDatanodeManager(); // Register DNs for (int i = 0; i < 6; i++) { DatanodeRegistration dnr = new DatanodeRegistration( dataNodes[i], new StorageInfo(NodeType.DATA_NODE), new ExportedBlockKeys(), VersionInfo.getVersion()); dnrList.add(dnr); dnManager.registerDatanode(dnr); dataNodes[i].getStorageInfos()[0].setUtilizationForTesting( 2 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * blockSize, 0L, 2 * HdfsServerConstants.MIN_BLOCKS_FOR_WRITE * blockSize, 0L); dataNodes[i].updateHeartbeat( BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[i]), 0L, 0L, 0, 0, null); } }
public static HdfsFileStatus getFileInfo(NameNode namenode, String src, boolean resolveLink) throws AccessControlException, UnresolvedLinkException, StandbyException, IOException { return namenode.getNamesystem().getFileInfo(src, resolveLink); }
/** Get block locations within the specified range. */ public static LocatedBlocks getBlockLocations( NameNode namenode, String src, long offset, long length) throws IOException { return namenode.getNamesystem().getBlockLocations(src, offset, length, false, true, true); }
/** * Test case that reduces replication of a file with a lot of blocks and then fails over right * after those blocks enter the DN invalidation queues on the active. Ensures that fencing is * correct and no replicas are lost. */ @Test public void testNNClearsCommandsOnFailoverWithReplChanges() throws Exception { // Make lots of blocks to increase chances of triggering a bug. DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30 * SMALL_BLOCK, (short) 1, 1L); banner("rolling NN1's edit log, forcing catch-up"); HATestUtil.waitForStandbyToCatchUp(nn1, nn2); // Get some new replicas reported so that NN2 now considers // them over-replicated and schedules some more deletions nn1.getRpcServer().setReplication(TEST_FILE, (short) 2); while (BlockManagerTestUtil.getComputedDatanodeWork(nn1.getNamesystem().getBlockManager()) > 0) { LOG.info("Getting more replication work computed"); } BlockManager bm1 = nn1.getNamesystem().getBlockManager(); while (bm1.getPendingReplicationBlocksCount() > 0) { BlockManagerTestUtil.updateState(bm1); cluster.triggerHeartbeats(); Thread.sleep(1000); } banner("triggering BRs"); cluster.triggerBlockReports(); nn1.getRpcServer().setReplication(TEST_FILE, (short) 1); banner("computing invalidation on nn1"); BlockManagerTestUtil.computeInvalidationWork(nn1.getNamesystem().getBlockManager()); doMetasave(nn1); banner("computing invalidation on nn2"); BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager()); doMetasave(nn2); // Dump some info for debugging purposes. banner("Metadata immediately before failover"); doMetasave(nn2); // Transition nn2 to active even though nn1 still thinks it's active banner("Failing to NN2 but let NN1 continue to think it's active"); NameNodeAdapter.abortEditLogs(nn1); NameNodeAdapter.enterSafeMode(nn1, false); BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager()); cluster.transitionToActive(1); // Check that the standby picked up the replication change. assertEquals(1, nn2.getRpcServer().getFileInfo(TEST_FILE).getReplication()); // Dump some info for debugging purposes. banner("Metadata immediately after failover"); doMetasave(nn2); banner("Triggering heartbeats and block reports so that fencing is completed"); cluster.triggerHeartbeats(); cluster.triggerBlockReports(); banner("Metadata after nodes have all block-reported"); doMetasave(nn2); // The block should no longer be postponed. assertEquals(0, nn2.getNamesystem().getPostponedMisreplicatedBlocks()); // Wait for NN2 to enact its deletions (replication monitor has to run, etc) BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager()); HATestUtil.waitForNNToIssueDeletions(nn2); cluster.triggerHeartbeats(); HATestUtil.waitForDNDeletions(cluster); cluster.triggerDeletionReports(); assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks()); assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks()); banner("Making sure the file is still readable"); FileSystem fs2 = cluster.getFileSystem(1); DFSTestUtil.readFile(fs2, TEST_FILE_PATH); }
public static boolean mkdirs( NameNode namenode, String src, PermissionStatus permissions, boolean createParent) throws UnresolvedLinkException, IOException { return namenode.getNamesystem().mkdirs(src, permissions, createParent); }
public static void enterSafeMode(NameNode namenode, boolean resourcesLow) throws IOException { namenode.getNamesystem().enterSafeMode(resourcesLow); }
/** * Test case which restarts the standby node in such a way that, when it exits safemode, it will * want to invalidate a bunch of over-replicated block replicas. Ensures that if we failover at * this point it won't lose data. */ @Test public void testNNClearsCommandsOnFailoverAfterStartup() throws Exception { // Make lots of blocks to increase chances of triggering a bug. DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30 * SMALL_BLOCK, (short) 3, 1L); banner("Shutting down NN2"); cluster.shutdownNameNode(1); banner("Setting replication to 1, rolling edit log."); nn1.getRpcServer().setReplication(TEST_FILE, (short) 1); nn1.getRpcServer().rollEditLog(); // Start NN2 again. When it starts up, it will see all of the // blocks as over-replicated, since it has the metadata for // replication=1, but the DNs haven't yet processed the deletions. banner("Starting NN2 again."); cluster.restartNameNode(1); nn2 = cluster.getNameNode(1); banner("triggering BRs"); cluster.triggerBlockReports(); // We expect that both NN1 and NN2 will have some number of // deletions queued up for the DNs. banner("computing invalidation on nn1"); BlockManagerTestUtil.computeInvalidationWork(nn1.getNamesystem().getBlockManager()); banner("computing invalidation on nn2"); BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager()); // Dump some info for debugging purposes. banner("Metadata immediately before failover"); doMetasave(nn2); // Transition nn2 to active even though nn1 still thinks it's active banner("Failing to NN2 but let NN1 continue to think it's active"); NameNodeAdapter.abortEditLogs(nn1); NameNodeAdapter.enterSafeMode(nn1, false); cluster.transitionToActive(1); // Check that the standby picked up the replication change. assertEquals(1, nn2.getRpcServer().getFileInfo(TEST_FILE).getReplication()); // Dump some info for debugging purposes. banner("Metadata immediately after failover"); doMetasave(nn2); banner("Triggering heartbeats and block reports so that fencing is completed"); cluster.triggerHeartbeats(); cluster.triggerBlockReports(); banner("Metadata after nodes have all block-reported"); doMetasave(nn2); // The block should no longer be postponed. assertEquals(0, nn2.getNamesystem().getPostponedMisreplicatedBlocks()); // Wait for NN2 to enact its deletions (replication monitor has to run, etc) BlockManagerTestUtil.computeInvalidationWork(nn2.getNamesystem().getBlockManager()); HATestUtil.waitForNNToIssueDeletions(nn2); cluster.triggerHeartbeats(); HATestUtil.waitForDNDeletions(cluster); cluster.triggerDeletionReports(); assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks()); assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks()); banner("Making sure the file is still readable"); FileSystem fs2 = cluster.getFileSystem(1); DFSTestUtil.readFile(fs2, TEST_FILE_PATH); }
public static void leaveSafeMode(NameNode namenode) throws IOException { namenode.getNamesystem().leaveSafeMode(); }
protected void doTestRead(Configuration conf, MiniDFSCluster cluster, boolean isStriped) throws Exception { final int numDataNodes = cluster.getDataNodes().size(); final NameNode nn = cluster.getNameNode(); final NamenodeProtocols nnProto = nn.getRpcServer(); final BlockManager bm = nn.getNamesystem().getBlockManager(); final BlockTokenSecretManager sm = bm.getBlockTokenSecretManager(); // set a short token lifetime (1 second) initially SecurityTestUtil.setBlockTokenLifetime(sm, 1000L); Path fileToRead = new Path(FILE_TO_READ); FileSystem fs = cluster.getFileSystem(); byte[] expected = generateBytes(FILE_SIZE); createFile(fs, fileToRead, expected); /* * setup for testing expiration handling of cached tokens */ // read using blockSeekTo(). Acquired tokens are cached in in1 FSDataInputStream in1 = fs.open(fileToRead); assertTrue(checkFile1(in1, expected)); // read using blockSeekTo(). Acquired tokens are cached in in2 FSDataInputStream in2 = fs.open(fileToRead); assertTrue(checkFile1(in2, expected)); // read using fetchBlockByteRange(). Acquired tokens are cached in in3 FSDataInputStream in3 = fs.open(fileToRead); assertTrue(checkFile2(in3, expected)); /* * testing READ interface on DN using a BlockReader */ DFSClient client = null; try { client = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf); } finally { if (client != null) client.close(); } List<LocatedBlock> locatedBlocks = nnProto.getBlockLocations(FILE_TO_READ, 0, FILE_SIZE).getLocatedBlocks(); LocatedBlock lblock = locatedBlocks.get(0); // first block // verify token is not expired assertFalse(isBlockTokenExpired(lblock)); // read with valid token, should succeed tryRead(conf, lblock, true); /* * wait till myToken and all cached tokens in in1, in2 and in3 expire */ while (!isBlockTokenExpired(lblock)) { try { Thread.sleep(10); } catch (InterruptedException ignored) { } } /* * continue testing READ interface on DN using a BlockReader */ // verify token is expired assertTrue(isBlockTokenExpired(lblock)); // read should fail tryRead(conf, lblock, false); // use a valid new token bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.READ); // read should succeed tryRead(conf, lblock, true); // use a token with wrong blockID long rightId = lblock.getBlock().getBlockId(); long wrongId = rightId + 1; lblock.getBlock().setBlockId(wrongId); bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.READ); lblock.getBlock().setBlockId(rightId); // read should fail tryRead(conf, lblock, false); // use a token with wrong access modes bm.setBlockToken(lblock, BlockTokenIdentifier.AccessMode.WRITE); // read should fail tryRead(conf, lblock, false); // set a long token lifetime for future tokens SecurityTestUtil.setBlockTokenLifetime(sm, 600 * 1000L); /* * testing that when cached tokens are expired, DFSClient will re-fetch * tokens transparently for READ. */ // confirm all tokens cached in in1 are expired by now List<LocatedBlock> lblocks = DFSTestUtil.getAllBlocks(in1); for (LocatedBlock blk : lblocks) { assertTrue(isBlockTokenExpired(blk)); } // verify blockSeekTo() is able to re-fetch token transparently in1.seek(0); assertTrue(checkFile1(in1, expected)); // confirm all tokens cached in in2 are expired by now List<LocatedBlock> lblocks2 = DFSTestUtil.getAllBlocks(in2); for (LocatedBlock blk : lblocks2) { assertTrue(isBlockTokenExpired(blk)); } // verify blockSeekTo() is able to re-fetch token transparently (testing // via another interface method) if (isStriped) { // striped block doesn't support seekToNewSource in2.seek(0); } else { assertTrue(in2.seekToNewSource(0)); } assertTrue(checkFile1(in2, expected)); // confirm all tokens cached in in3 are expired by now List<LocatedBlock> lblocks3 = DFSTestUtil.getAllBlocks(in3); for (LocatedBlock blk : lblocks3) { assertTrue(isBlockTokenExpired(blk)); } // verify fetchBlockByteRange() is able to re-fetch token transparently assertTrue(checkFile2(in3, expected)); /* * testing that after datanodes are restarted on the same ports, cached * tokens should still work and there is no need to fetch new tokens from * namenode. This test should run while namenode is down (to make sure no * new tokens can be fetched from namenode). */ // restart datanodes on the same ports that they currently use assertTrue(cluster.restartDataNodes(true)); cluster.waitActive(); assertEquals(numDataNodes, cluster.getDataNodes().size()); cluster.shutdownNameNode(0); // confirm tokens cached in in1 are still valid lblocks = DFSTestUtil.getAllBlocks(in1); for (LocatedBlock blk : lblocks) { assertFalse(isBlockTokenExpired(blk)); } // verify blockSeekTo() still works (forced to use cached tokens) in1.seek(0); assertTrue(checkFile1(in1, expected)); // confirm tokens cached in in2 are still valid lblocks2 = DFSTestUtil.getAllBlocks(in2); for (LocatedBlock blk : lblocks2) { assertFalse(isBlockTokenExpired(blk)); } // verify blockSeekTo() still works (forced to use cached tokens) if (isStriped) { in2.seek(0); } else { in2.seekToNewSource(0); } assertTrue(checkFile1(in2, expected)); // confirm tokens cached in in3 are still valid lblocks3 = DFSTestUtil.getAllBlocks(in3); for (LocatedBlock blk : lblocks3) { assertFalse(isBlockTokenExpired(blk)); } // verify fetchBlockByteRange() still works (forced to use cached tokens) assertTrue(checkFile2(in3, expected)); /* * testing that when namenode is restarted, cached tokens should still * work and there is no need to fetch new tokens from namenode. Like the * previous test, this test should also run while namenode is down. The * setup for this test depends on the previous test. */ // restart the namenode and then shut it down for test cluster.restartNameNode(0); cluster.shutdownNameNode(0); // verify blockSeekTo() still works (forced to use cached tokens) in1.seek(0); assertTrue(checkFile1(in1, expected)); // verify again blockSeekTo() still works (forced to use cached tokens) if (isStriped) { in2.seek(0); } else { in2.seekToNewSource(0); } assertTrue(checkFile1(in2, expected)); // verify fetchBlockByteRange() still works (forced to use cached tokens) assertTrue(checkFile2(in3, expected)); /* * testing that after both namenode and datanodes got restarted (namenode * first, followed by datanodes), DFSClient can't access DN without * re-fetching tokens and is able to re-fetch tokens transparently. The * setup of this test depends on the previous test. */ // restore the cluster and restart the datanodes for test cluster.restartNameNode(0); assertTrue(cluster.restartDataNodes(true)); cluster.waitActive(); assertEquals(numDataNodes, cluster.getDataNodes().size()); // shutdown namenode so that DFSClient can't get new tokens from namenode cluster.shutdownNameNode(0); // verify blockSeekTo() fails (cached tokens become invalid) in1.seek(0); assertFalse(checkFile1(in1, expected)); // verify fetchBlockByteRange() fails (cached tokens become invalid) assertFalse(checkFile2(in3, expected)); // restart the namenode to allow DFSClient to re-fetch tokens cluster.restartNameNode(0); // verify blockSeekTo() works again (by transparently re-fetching // tokens from namenode) in1.seek(0); assertTrue(checkFile1(in1, expected)); if (isStriped) { in2.seek(0); } else { in2.seekToNewSource(0); } assertTrue(checkFile1(in2, expected)); // verify fetchBlockByteRange() works again (by transparently // re-fetching tokens from namenode) assertTrue(checkFile2(in3, expected)); /* * testing that when datanodes are restarted on different ports, DFSClient * is able to re-fetch tokens transparently to connect to them */ // restart datanodes on newly assigned ports assertTrue(cluster.restartDataNodes(false)); cluster.waitActive(); assertEquals(numDataNodes, cluster.getDataNodes().size()); // verify blockSeekTo() is able to re-fetch token transparently in1.seek(0); assertTrue(checkFile1(in1, expected)); // verify blockSeekTo() is able to re-fetch token transparently if (isStriped) { in2.seek(0); } else { in2.seekToNewSource(0); } assertTrue(checkFile1(in2, expected)); // verify fetchBlockByteRange() is able to re-fetch token transparently assertTrue(checkFile2(in3, expected)); }
/** Check files on DFS, starting from the indicated path. */ public void fsck() { final long startTime = Time.now(); try { String msg = "FSCK started by " + UserGroupInformation.getCurrentUser() + " from " + remoteAddress + " for path " + path + " at " + new Date(); LOG.info(msg); out.println(msg); namenode.getNamesystem().logFsckEvent(path, remoteAddress); if (snapshottableDirs != null) { SnapshottableDirectoryStatus[] snapshotDirs = namenode.getRpcServer().getSnapshottableDirListing(); if (snapshotDirs != null) { for (SnapshottableDirectoryStatus dir : snapshotDirs) { snapshottableDirs.add(dir.getFullPath().toString()); } } } final HdfsFileStatus file = namenode.getRpcServer().getFileInfo(path); if (file != null) { if (showCorruptFileBlocks) { listCorruptFileBlocks(); return; } Result res = new Result(conf); check(path, file, res); out.println(res); out.println(" Number of data-nodes:\t\t" + totalDatanodes); out.println(" Number of racks:\t\t" + networktopology.getNumOfRacks()); out.println( "FSCK ended at " + new Date() + " in " + (Time.now() - startTime + " milliseconds")); // If there were internal errors during the fsck operation, we want to // return FAILURE_STATUS, even if those errors were not immediately // fatal. Otherwise many unit tests will pass even when there are bugs. if (internalError) { throw new IOException("fsck encountered internal errors!"); } // DFSck client scans for the string HEALTHY/CORRUPT to check the status // of file system and return appropriate code. Changing the output // string might break testcases. Also note this must be the last line // of the report. if (res.isHealthy()) { out.print("\n\nThe filesystem under path '" + path + "' " + HEALTHY_STATUS); } else { out.print("\n\nThe filesystem under path '" + path + "' " + CORRUPT_STATUS); } } else { out.print("\n\nPath '" + path + "' " + NONEXISTENT_STATUS); } } catch (Exception e) { String errMsg = "Fsck on path '" + path + "' " + FAILURE_STATUS; LOG.warn(errMsg, e); out.println( "FSCK ended at " + new Date() + " in " + (Time.now() - startTime + " milliseconds")); out.println(e.getMessage()); out.print("\n\n" + errMsg); } finally { out.close(); } }
@VisibleForTesting void check(String parent, HdfsFileStatus file, Result res) throws IOException { String path = file.getFullName(parent); boolean isOpen = false; if (file.isDir()) { if (snapshottableDirs != null && snapshottableDirs.contains(path)) { String snapshotPath = (path.endsWith(Path.SEPARATOR) ? path : path + Path.SEPARATOR) + HdfsConstants.DOT_SNAPSHOT_DIR; HdfsFileStatus snapshotFileInfo = namenode.getRpcServer().getFileInfo(snapshotPath); check(snapshotPath, snapshotFileInfo, res); } byte[] lastReturnedName = HdfsFileStatus.EMPTY_NAME; DirectoryListing thisListing; if (showFiles) { out.println(path + " <dir>"); } res.totalDirs++; do { assert lastReturnedName != null; thisListing = namenode.getRpcServer().getListing(path, lastReturnedName, false); if (thisListing == null) { return; } HdfsFileStatus[] files = thisListing.getPartialListing(); for (int i = 0; i < files.length; i++) { check(path, files[i], res); } lastReturnedName = thisListing.getLastName(); } while (thisListing.hasMore()); return; } if (file.isSymlink()) { if (showFiles) { out.println(path + " <symlink>"); } res.totalSymlinks++; return; } long fileLen = file.getLen(); // Get block locations without updating the file access time // and without block access tokens LocatedBlocks blocks; try { blocks = namenode.getNamesystem().getBlockLocations(path, 0, fileLen, false, false, false); } catch (FileNotFoundException fnfe) { blocks = null; } if (blocks == null) { // the file is deleted return; } isOpen = blocks.isUnderConstruction(); if (isOpen && !showOpenFiles) { // We collect these stats about open files to report with default options res.totalOpenFilesSize += fileLen; res.totalOpenFilesBlocks += blocks.locatedBlockCount(); res.totalOpenFiles++; return; } res.totalFiles++; res.totalSize += fileLen; res.totalBlocks += blocks.locatedBlockCount(); if (showOpenFiles && isOpen) { out.print( path + " " + fileLen + " bytes, " + blocks.locatedBlockCount() + " block(s), OPENFORWRITE: "); } else if (showFiles) { out.print(path + " " + fileLen + " bytes, " + blocks.locatedBlockCount() + " block(s): "); } else { out.print('.'); } if (res.totalFiles % 100 == 0) { out.println(); out.flush(); } int missing = 0; int corrupt = 0; long missize = 0; int underReplicatedPerFile = 0; int misReplicatedPerFile = 0; StringBuilder report = new StringBuilder(); int i = 0; for (LocatedBlock lBlk : blocks.getLocatedBlocks()) { ExtendedBlock block = lBlk.getBlock(); boolean isCorrupt = lBlk.isCorrupt(); String blkName = block.toString(); DatanodeInfo[] locs = lBlk.getLocations(); NumberReplicas numberReplicas = namenode.getNamesystem().getBlockManager().countNodes(block.getLocalBlock()); int liveReplicas = numberReplicas.liveReplicas(); res.totalReplicas += liveReplicas; short targetFileReplication = file.getReplication(); res.numExpectedReplicas += targetFileReplication; if (liveReplicas > targetFileReplication) { res.excessiveReplicas += (liveReplicas - targetFileReplication); res.numOverReplicatedBlocks += 1; } // Check if block is Corrupt if (isCorrupt) { corrupt++; res.corruptBlocks++; out.print( "\n" + path + ": CORRUPT blockpool " + block.getBlockPoolId() + " block " + block.getBlockName() + "\n"); } if (liveReplicas >= minReplication) res.numMinReplicatedBlocks++; if (liveReplicas < targetFileReplication && liveReplicas > 0) { res.missingReplicas += (targetFileReplication - liveReplicas); res.numUnderReplicatedBlocks += 1; underReplicatedPerFile++; if (!showFiles) { out.print("\n" + path + ": "); } out.println( " Under replicated " + block + ". Target Replicas is " + targetFileReplication + " but found " + liveReplicas + " replica(s)."); } // verify block placement policy BlockPlacementStatus blockPlacementStatus = bpPolicy.verifyBlockPlacement(path, lBlk, targetFileReplication); if (!blockPlacementStatus.isPlacementPolicySatisfied()) { res.numMisReplicatedBlocks++; misReplicatedPerFile++; if (!showFiles) { if (underReplicatedPerFile == 0) out.println(); out.print(path + ": "); } out.println( " Replica placement policy is violated for " + block + ". " + blockPlacementStatus.getErrorDescription()); } report.append(i + ". " + blkName + " len=" + block.getNumBytes()); if (liveReplicas == 0) { report.append(" MISSING!"); res.addMissing(block.toString(), block.getNumBytes()); missing++; missize += block.getNumBytes(); } else { report.append(" repl=" + liveReplicas); if (showLocations || showRacks) { StringBuilder sb = new StringBuilder("["); for (int j = 0; j < locs.length; j++) { if (j > 0) { sb.append(", "); } if (showRacks) sb.append(NodeBase.getPath(locs[j])); else sb.append(locs[j]); } sb.append(']'); report.append(" " + sb.toString()); } } report.append('\n'); i++; } if ((missing > 0) || (corrupt > 0)) { if (!showFiles && (missing > 0)) { out.print( "\n" + path + ": MISSING " + missing + " blocks of total size " + missize + " B."); } res.corruptFiles++; if (isOpen) { LOG.info("Fsck: ignoring open file " + path); } else { if (doMove) copyBlocksToLostFound(parent, file, blocks); if (doDelete) deleteCorruptedFile(path); } } if (showFiles) { if (missing > 0) { out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n"); } else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) { out.print(" OK\n"); } if (showBlocks) { out.print(report.toString() + "\n"); } } }
/** * Get {@link UserGroupInformation} and possibly the delegation token out of the request. * * @param context the Servlet context * @param request the http request * @param conf configuration * @param secureAuthMethod the AuthenticationMethod used in secure mode. * @param tryUgiParameter Should it try the ugi parameter? * @return a new user from the request * @throws AccessControlException if the request has no token */ public static UserGroupInformation getUGI( ServletContext context, HttpServletRequest request, Configuration conf, final AuthenticationMethod secureAuthMethod, final boolean tryUgiParameter) throws IOException { final UserGroupInformation ugi; final String usernameFromQuery = getUsernameFromQuery(request, tryUgiParameter); final String doAsUserFromQuery = request.getParameter(DoAsParam.NAME); if (UserGroupInformation.isSecurityEnabled()) { final String remoteUser = request.getRemoteUser(); String tokenString = request.getParameter(DELEGATION_PARAMETER_NAME); if (tokenString != null) { Token<DelegationTokenIdentifier> token = new Token<DelegationTokenIdentifier>(); token.decodeFromUrlString(tokenString); SecurityUtil.setTokenService(token, NameNode.getAddress(conf)); token.setKind(DelegationTokenIdentifier.HDFS_DELEGATION_KIND); ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); DataInputStream in = new DataInputStream(buf); DelegationTokenIdentifier id = new DelegationTokenIdentifier(); id.readFields(in); if (context != null) { NameNode nn = (NameNode) context.getAttribute("name.node"); if (nn != null) { // Verify the token. nn.getNamesystem() .getDelegationTokenSecretManager() .verifyToken(id, token.getPassword()); } } ugi = id.getUser(); if (ugi.getRealUser() == null) { // non-proxy case checkUsername(ugi.getShortUserName(), usernameFromQuery); checkUsername(null, doAsUserFromQuery); } else { // proxy case checkUsername(ugi.getRealUser().getShortUserName(), usernameFromQuery); checkUsername(ugi.getShortUserName(), doAsUserFromQuery); ProxyUsers.authorize(ugi, request.getRemoteAddr(), conf); } ugi.addToken(token); ugi.setAuthenticationMethod(AuthenticationMethod.TOKEN); } else { if (remoteUser == null) { throw new IOException("Security enabled but user not " + "authenticated by filter"); } final UserGroupInformation realUgi = UserGroupInformation.createRemoteUser(remoteUser); checkUsername(realUgi.getShortUserName(), usernameFromQuery); // This is not necessarily true, could have been auth'ed by user-facing // filter realUgi.setAuthenticationMethod(secureAuthMethod); ugi = initUGI(realUgi, doAsUserFromQuery, request, true, conf); } } else { // Security's not on, pull from url final UserGroupInformation realUgi = usernameFromQuery == null ? getDefaultWebUser(conf) // not specified in request : UserGroupInformation.createRemoteUser(usernameFromQuery); realUgi.setAuthenticationMethod(AuthenticationMethod.SIMPLE); ugi = initUGI(realUgi, doAsUserFromQuery, request, false, conf); } if (LOG.isDebugEnabled()) LOG.debug("getUGI is returning: " + ugi.getShortUserName()); return ugi; }