/** Initialize SecondaryNameNode. */ private void initialize(Configuration conf) throws IOException { // initiate Java VM metrics JvmMetrics.init("SecondaryNameNode", conf.get("session.id")); // Create connection to the namenode. shouldRun = true; nameNodeAddr = NameNode.getAddress(conf); this.conf = conf; this.namenode = (NamenodeProtocol) RPC.waitForProxy( NamenodeProtocol.class, NamenodeProtocol.versionID, nameNodeAddr, conf); // initialize checkpoint directories fsName = getInfoServer(); checkpointDirs = FSImage.getCheckpointDirs(conf, "/tmp/hadoop/dfs/namesecondary"); checkpointEditsDirs = FSImage.getCheckpointEditsDirs(conf, "/tmp/hadoop/dfs/namesecondary"); checkpointImage = new CheckpointStorage(conf); checkpointImage.recoverCreate(checkpointDirs, checkpointEditsDirs); // Initialize other scheduling parameters from the configuration checkpointPeriod = conf.getLong("fs.checkpoint.period", 3600); checkpointSize = conf.getLong("fs.checkpoint.size", 4194304); // initialize the webserver for uploading files. String infoAddr = NetUtils.getServerAddress( conf, "dfs.secondary.info.bindAddress", "dfs.secondary.info.port", "dfs.secondary.http.address"); InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(infoAddr); infoBindAddress = infoSocAddr.getHostName(); int tmpInfoPort = infoSocAddr.getPort(); infoServer = new HttpServer("secondary", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf); infoServer.setAttribute("name.system.image", checkpointImage); this.infoServer.setAttribute("name.conf", conf); infoServer.addInternalServlet("getimage", "/getimage", GetImageServlet.class); infoServer.start(); // The web-server port can be ephemeral... ensure we have the correct info infoPort = infoServer.getPort(); conf.set("dfs.secondary.http.address", infoBindAddress + ":" + infoPort); LOG.info("Secondary Web-server up at: " + infoBindAddress + ":" + infoPort); LOG.warn( "Checkpoint Period :" + checkpointPeriod + " secs " + "(" + checkpointPeriod / 60 + " min)"); LOG.warn( "Log Size Trigger :" + checkpointSize + " bytes " + "(" + checkpointSize / 1024 + " KB)"); }
@Test /** * Test that isInStartupSafemode returns true only during startup safemode and not also during * low-resource safemode */ public void testStartupSafemode() throws IOException { Configuration conf = new Configuration(); FSImage fsImage = Mockito.mock(FSImage.class); FSEditLog fsEditLog = Mockito.mock(FSEditLog.class); Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog); FSNamesystem fsn = new FSNamesystem(conf, fsImage); fsn.leaveSafeMode(); assertTrue( "After leaving safemode FSNamesystem.isInStartupSafeMode still " + "returned true", !fsn.isInStartupSafeMode()); assertTrue( "After leaving safemode FSNamesystem.isInSafeMode still returned" + " true", !fsn.isInSafeMode()); fsn.enterSafeMode(true); assertTrue( "After entering safemode due to low resources FSNamesystem." + "isInStartupSafeMode still returned true", !fsn.isInStartupSafeMode()); assertTrue( "After entering safemode due to low resources FSNamesystem." + "isInSafeMode still returned false", fsn.isInSafeMode()); }
/** test savenamespace in the middle of a checkpoint */ @Test public void testCheckpointWithSavenamespace() throws Exception { Configuration conf = getConf(); MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null); cluster.waitActive(); FSNamesystem fsn = FSNamesystem.getFSNamesystem(); // Replace the FSImage with a spy final FSImage originalImage = fsn.dir.fsImage; try { doAnEdit(fsn, 1); CheckpointSignature sig = fsn.rollEditLog(); LOG.warn("Checkpoint signature: " + sig); // Do another edit doAnEdit(fsn, 2); // Save namespace fsn.saveNamespace(true, false); // try to do a rollFSImage, this should fail because the // saveNamespace have already occured after the call to // rollFSEdit try { fsn.rollFSImage(sig); assertTrue( "The rollFSImage immediately folloing the saveName " + " command should fail. ", false); } catch (IOException e) { LOG.info( "Expected exception while invoking rollFSImage " + " after a successful call to saveNamespace." + e); } // Now shut down and restart the NN originalImage.close(); fsn.close(); cluster.shutdown(); fsn = null; // Start a new namesystem, which should be able to recover // the namespace from the previous incarnation. cluster = new MiniDFSCluster(conf, 1, false, null); cluster.waitActive(); fsn = FSNamesystem.getFSNamesystem(); // Make sure the image loaded including our edits. checkEditExists(cluster, 1); checkEditExists(cluster, 2); } finally { if (fsn != null) { fsn.close(); cluster.shutdown(); } } }
private void testSaveWhileEditsRolled(boolean dosafemode, boolean force, boolean uncompressed) throws Exception { Configuration conf = getConf(); MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null); cluster.waitActive(); FSNamesystem fsn = FSNamesystem.getFSNamesystem(); // Replace the FSImage with a spy FSImage originalImage = fsn.dir.fsImage; FSImage spyImage = spy(originalImage); spyImage.setStorageDirectories( FSNamesystem.getNamespaceDirs(conf), FSNamesystem.getNamespaceEditsDirs(conf)); fsn.dir.fsImage = spyImage; try { doAnEdit(fsn, 1); CheckpointSignature sig = fsn.rollEditLog(); LOG.warn("Checkpoint signature: " + sig); // Do another edit doAnEdit(fsn, 2); // Save namespace if (dosafemode) { fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER); } fsn.saveNamespace(force, uncompressed); // Now shut down and restart the NN originalImage.close(); originalImage = null; fsn.close(); fsn = null; cluster.shutdown(); // Start a new namesystem, which should be able to recover // the namespace from the previous incarnation. cluster = new MiniDFSCluster(conf, 1, false, null); cluster.waitActive(); fsn = FSNamesystem.getFSNamesystem(); // Make sure the image loaded including our edits. checkEditExists(cluster, 1); checkEditExists(cluster, 2); } finally { if (originalImage != null) { originalImage.close(); } if (fsn != null) { fsn.close(); cluster.shutdown(); } } }
@Test public void testFsLockFairness() throws IOException, InterruptedException { Configuration conf = new Configuration(); FSEditLog fsEditLog = Mockito.mock(FSEditLog.class); FSImage fsImage = Mockito.mock(FSImage.class); Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog); conf.setBoolean("dfs.namenode.fslock.fair", true); FSNamesystem fsNamesystem = new FSNamesystem(conf, fsImage); assertTrue(fsNamesystem.getFsLockForTests().isFair()); conf.setBoolean("dfs.namenode.fslock.fair", false); fsNamesystem = new FSNamesystem(conf, fsImage); assertFalse(fsNamesystem.getFsLockForTests().isFair()); }
/** * Test to simulate interleaved checkpointing by 2 2NNs after a storage directory has been taken * offline. The first will cause the directory to come back online, but it won't have any valid * contents. The second 2NN will then try to perform a checkpoint. The NN should not serve up the * image or edits from the restored (empty) dir. */ @Test public void testCheckpointWithRestoredDirectory() throws IOException { SecondaryNameNode secondary = null; try { cluster = new MiniDFSCluster(0, config, 1, true, false, true, null, null, null, null); cluster.waitActive(); secondary = new SecondaryNameNode(config); FSImage fsImage = cluster.getNameNode().getFSImage(); FileSystem fs = cluster.getFileSystem(); Path path1 = new Path("/", "test"); writeFile(fs, path1, 2); // Take name3 offline fsImage.getEditLog().removeEditsAndStorageDir(2); // Simulate a 2NN beginning a checkpoint, but not finishing. This will // cause name3 to be restored. cluster.getNameNode().rollEditLog(); // Now another 2NN comes along to do a full checkpoint. secondary.doCheckpoint(); // The created file should still exist in the in-memory FS state after the // checkpoint. assertTrue("File missing after checkpoint", fs.exists(path1)); secondary.shutdown(); // Restart the NN so it reloads the edits from on-disk. cluster.restartNameNode(); // The created file should still exist after the restart. assertTrue("path should still exist after restart", fs.exists(path1)); } finally { if (cluster != null) { cluster.shutdown(); } if (secondary != null) { secondary.shutdown(); } } }
@Test public void testReplQueuesActiveAfterStartupSafemode() throws IOException, InterruptedException { Configuration conf = new Configuration(); FSEditLog fsEditLog = Mockito.mock(FSEditLog.class); FSImage fsImage = Mockito.mock(FSImage.class); Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog); FSNamesystem fsNamesystem = new FSNamesystem(conf, fsImage); FSNamesystem fsn = Mockito.spy(fsNamesystem); // Make shouldPopulaeReplQueues return true HAContext haContext = Mockito.mock(HAContext.class); HAState haState = Mockito.mock(HAState.class); Mockito.when(haContext.getState()).thenReturn(haState); Mockito.when(haState.shouldPopulateReplQueues()).thenReturn(true); Whitebox.setInternalState(fsn, "haContext", haContext); // Make NameNode.getNameNodeMetrics() not return null NameNode.initMetrics(conf, NamenodeRole.NAMENODE); fsn.enterSafeMode(false); assertTrue("FSNamesystem didn't enter safemode", fsn.isInSafeMode()); assertTrue( "Replication queues were being populated during very first " + "safemode", !fsn.isPopulatingReplQueues()); fsn.leaveSafeMode(); assertTrue("FSNamesystem didn't leave safemode", !fsn.isInSafeMode()); assertTrue( "Replication queues weren't being populated even after leaving " + "safemode", fsn.isPopulatingReplQueues()); fsn.enterSafeMode(false); assertTrue("FSNamesystem didn't enter safemode", fsn.isInSafeMode()); assertTrue( "Replication queues weren't being populated after entering " + "safemode 2nd time", fsn.isPopulatingReplQueues()); }
/** Merge image and edits, and verify consistency with the signature. */ private void doMerge(CheckpointSignature sig) throws IOException { getEditLog().open(); StorageDirectory sdName = null; StorageDirectory sdEdits = null; Iterator<StorageDirectory> it = null; it = dirIterator(NameNodeDirType.IMAGE); if (it.hasNext()) sdName = it.next(); it = dirIterator(NameNodeDirType.EDITS); if (it.hasNext()) sdEdits = it.next(); if ((sdName == null) || (sdEdits == null)) throw new IOException("Could not locate checkpoint directories"); loadFSImage(FSImage.getImageFile(sdName, NameNodeFile.IMAGE)); loadFSEdits(sdEdits); sig.validateStorageInfo(this); saveFSImage(); }
/** Merge image and edits, and verify consistency with the signature. */ private void doMerge(CheckpointSignature sig, boolean loadImage) throws IOException { getEditLog().open(); StorageDirectory sdName = null; StorageDirectory sdEdits = null; Iterator<StorageDirectory> it = null; if (loadImage) { it = dirIterator(NameNodeDirType.IMAGE); if (it.hasNext()) sdName = it.next(); if (sdName == null) throw new IOException("Could not locate checkpoint fsimage"); } it = dirIterator(NameNodeDirType.EDITS); if (it.hasNext()) sdEdits = it.next(); if (sdEdits == null) throw new IOException("Could not locate checkpoint edits"); if (loadImage) { loadFSImage(FSImage.getImageFile(sdName, NameNodeFile.IMAGE)); } loadFSEdits(sdEdits); sig.validateStorageInfo(this); saveNamespace(false); }
/** * Verify that a saveNamespace command brings faulty directories in fs.name.dir and fs.edit.dir * back online. */ @Test public void testReinsertnamedirsInSavenamespace() throws Exception { // create a configuration with the key to restore error // directories in fs.name.dir Configuration conf = getConf(); conf.setBoolean("dfs.namenode.name.dir.restore", true); MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null); cluster.waitActive(); FSNamesystem fsn = FSNamesystem.getFSNamesystem(); // Replace the FSImage with a spy FSImage originalImage = fsn.dir.fsImage; FSImage spyImage = spy(originalImage); spyImage.setStorageDirectories( FSNamesystem.getNamespaceDirs(conf), FSNamesystem.getNamespaceEditsDirs(conf)); fsn.dir.fsImage = spyImage; // inject fault // The spy throws a IOException when writing to the second directory doAnswer(new FaultySaveImage(false)) .when(spyImage) .saveFSImage((File) anyObject(), anyBoolean()); try { doAnEdit(fsn, 1); fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER); // Save namespace - this injects a fault and marks one // directory as faulty. LOG.info("Doing the first savenamespace."); fsn.saveNamespace(false, false); LOG.warn("First savenamespace sucessful."); assertTrue( "Savenamespace should have marked one directory as bad." + " But found " + spyImage.getRemovedStorageDirs().size() + " bad directories.", spyImage.getRemovedStorageDirs().size() == 1); // The next call to savenamespace should try inserting the // erroneous directory back to fs.name.dir. This command should // be successful. LOG.info("Doing the second savenamespace."); fsn.saveNamespace(false, false); LOG.warn("Second savenamespace sucessful."); assertTrue( "Savenamespace should have been successful in removing " + " bad directories from Image." + " But found " + originalImage.getRemovedStorageDirs().size() + " bad directories.", originalImage.getRemovedStorageDirs().size() == 0); // Now shut down and restart the namesystem LOG.info("Shutting down fsimage."); originalImage.close(); fsn.close(); fsn = null; cluster.shutdown(); // Start a new namesystem, which should be able to recover // the namespace from the previous incarnation. LOG.info("Loading new FSmage from disk."); cluster = new MiniDFSCluster(conf, 1, false, null); cluster.waitActive(); fsn = FSNamesystem.getFSNamesystem(); // Make sure the image loaded including our edit. LOG.info("Checking reloaded image."); checkEditExists(cluster, 1); LOG.info("Reloaded image is good."); } finally { fsn.close(); cluster.shutdown(); } }
/** Remove edits and storage directories. */ public void invalidateStorage(FSImage fi) throws IOException { fi.getEditLog().removeEditsAndStorageDir(2); // name3 fi.getEditLog().removeEditsAndStorageDir(1); // name2 }