예제 #1
0
  /** Initialize SecondaryNameNode. */
  private void initialize(Configuration conf) throws IOException {
    // initiate Java VM metrics
    JvmMetrics.init("SecondaryNameNode", conf.get("session.id"));

    // Create connection to the namenode.
    shouldRun = true;
    nameNodeAddr = NameNode.getAddress(conf);

    this.conf = conf;
    this.namenode =
        (NamenodeProtocol)
            RPC.waitForProxy(
                NamenodeProtocol.class, NamenodeProtocol.versionID, nameNodeAddr, conf);

    // initialize checkpoint directories
    fsName = getInfoServer();
    checkpointDirs = FSImage.getCheckpointDirs(conf, "/tmp/hadoop/dfs/namesecondary");
    checkpointEditsDirs = FSImage.getCheckpointEditsDirs(conf, "/tmp/hadoop/dfs/namesecondary");
    checkpointImage = new CheckpointStorage(conf);
    checkpointImage.recoverCreate(checkpointDirs, checkpointEditsDirs);

    // Initialize other scheduling parameters from the configuration
    checkpointPeriod = conf.getLong("fs.checkpoint.period", 3600);
    checkpointSize = conf.getLong("fs.checkpoint.size", 4194304);

    // initialize the webserver for uploading files.
    String infoAddr =
        NetUtils.getServerAddress(
            conf,
            "dfs.secondary.info.bindAddress",
            "dfs.secondary.info.port",
            "dfs.secondary.http.address");
    InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(infoAddr);
    infoBindAddress = infoSocAddr.getHostName();
    int tmpInfoPort = infoSocAddr.getPort();
    infoServer = new HttpServer("secondary", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf);
    infoServer.setAttribute("name.system.image", checkpointImage);
    this.infoServer.setAttribute("name.conf", conf);
    infoServer.addInternalServlet("getimage", "/getimage", GetImageServlet.class);
    infoServer.start();

    // The web-server port can be ephemeral... ensure we have the correct info
    infoPort = infoServer.getPort();
    conf.set("dfs.secondary.http.address", infoBindAddress + ":" + infoPort);
    LOG.info("Secondary Web-server up at: " + infoBindAddress + ":" + infoPort);
    LOG.warn(
        "Checkpoint Period   :"
            + checkpointPeriod
            + " secs "
            + "("
            + checkpointPeriod / 60
            + " min)");
    LOG.warn(
        "Log Size Trigger    :"
            + checkpointSize
            + " bytes "
            + "("
            + checkpointSize / 1024
            + " KB)");
  }
  @Test
  /**
   * Test that isInStartupSafemode returns true only during startup safemode and not also during
   * low-resource safemode
   */
  public void testStartupSafemode() throws IOException {
    Configuration conf = new Configuration();
    FSImage fsImage = Mockito.mock(FSImage.class);
    FSEditLog fsEditLog = Mockito.mock(FSEditLog.class);
    Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog);
    FSNamesystem fsn = new FSNamesystem(conf, fsImage);

    fsn.leaveSafeMode();
    assertTrue(
        "After leaving safemode FSNamesystem.isInStartupSafeMode still " + "returned true",
        !fsn.isInStartupSafeMode());
    assertTrue(
        "After leaving safemode FSNamesystem.isInSafeMode still returned" + " true",
        !fsn.isInSafeMode());

    fsn.enterSafeMode(true);
    assertTrue(
        "After entering safemode due to low resources FSNamesystem."
            + "isInStartupSafeMode still returned true",
        !fsn.isInStartupSafeMode());
    assertTrue(
        "After entering safemode due to low resources FSNamesystem."
            + "isInSafeMode still returned false",
        fsn.isInSafeMode());
  }
  /** test savenamespace in the middle of a checkpoint */
  @Test
  public void testCheckpointWithSavenamespace() throws Exception {
    Configuration conf = getConf();
    MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null);
    cluster.waitActive();
    FSNamesystem fsn = FSNamesystem.getFSNamesystem();

    // Replace the FSImage with a spy
    final FSImage originalImage = fsn.dir.fsImage;

    try {
      doAnEdit(fsn, 1);
      CheckpointSignature sig = fsn.rollEditLog();
      LOG.warn("Checkpoint signature: " + sig);

      // Do another edit
      doAnEdit(fsn, 2);

      // Save namespace
      fsn.saveNamespace(true, false);

      // try to do a rollFSImage, this should fail because the
      // saveNamespace have already occured after the call to
      // rollFSEdit
      try {
        fsn.rollFSImage(sig);
        assertTrue(
            "The rollFSImage immediately folloing the saveName " + " command should fail. ", false);
      } catch (IOException e) {
        LOG.info(
            "Expected exception while invoking rollFSImage "
                + " after a successful call to saveNamespace."
                + e);
      }

      // Now shut down and restart the NN
      originalImage.close();
      fsn.close();
      cluster.shutdown();
      fsn = null;

      // Start a new namesystem, which should be able to recover
      // the namespace from the previous incarnation.
      cluster = new MiniDFSCluster(conf, 1, false, null);
      cluster.waitActive();
      fsn = FSNamesystem.getFSNamesystem();

      // Make sure the image loaded including our edits.
      checkEditExists(cluster, 1);
      checkEditExists(cluster, 2);
    } finally {
      if (fsn != null) {
        fsn.close();
        cluster.shutdown();
      }
    }
  }
  private void testSaveWhileEditsRolled(boolean dosafemode, boolean force, boolean uncompressed)
      throws Exception {
    Configuration conf = getConf();
    MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null);
    cluster.waitActive();
    FSNamesystem fsn = FSNamesystem.getFSNamesystem();

    // Replace the FSImage with a spy
    FSImage originalImage = fsn.dir.fsImage;
    FSImage spyImage = spy(originalImage);
    spyImage.setStorageDirectories(
        FSNamesystem.getNamespaceDirs(conf), FSNamesystem.getNamespaceEditsDirs(conf));
    fsn.dir.fsImage = spyImage;

    try {
      doAnEdit(fsn, 1);
      CheckpointSignature sig = fsn.rollEditLog();
      LOG.warn("Checkpoint signature: " + sig);
      // Do another edit
      doAnEdit(fsn, 2);

      // Save namespace
      if (dosafemode) {
        fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
      }
      fsn.saveNamespace(force, uncompressed);

      // Now shut down and restart the NN
      originalImage.close();
      originalImage = null;
      fsn.close();
      fsn = null;
      cluster.shutdown();

      // Start a new namesystem, which should be able to recover
      // the namespace from the previous incarnation.
      cluster = new MiniDFSCluster(conf, 1, false, null);
      cluster.waitActive();
      fsn = FSNamesystem.getFSNamesystem();

      // Make sure the image loaded including our edits.
      checkEditExists(cluster, 1);
      checkEditExists(cluster, 2);
    } finally {
      if (originalImage != null) {
        originalImage.close();
      }
      if (fsn != null) {
        fsn.close();
        cluster.shutdown();
      }
    }
  }
  @Test
  public void testFsLockFairness() throws IOException, InterruptedException {
    Configuration conf = new Configuration();

    FSEditLog fsEditLog = Mockito.mock(FSEditLog.class);
    FSImage fsImage = Mockito.mock(FSImage.class);
    Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog);

    conf.setBoolean("dfs.namenode.fslock.fair", true);
    FSNamesystem fsNamesystem = new FSNamesystem(conf, fsImage);
    assertTrue(fsNamesystem.getFsLockForTests().isFair());

    conf.setBoolean("dfs.namenode.fslock.fair", false);
    fsNamesystem = new FSNamesystem(conf, fsImage);
    assertFalse(fsNamesystem.getFsLockForTests().isFair());
  }
예제 #6
0
  /**
   * Test to simulate interleaved checkpointing by 2 2NNs after a storage directory has been taken
   * offline. The first will cause the directory to come back online, but it won't have any valid
   * contents. The second 2NN will then try to perform a checkpoint. The NN should not serve up the
   * image or edits from the restored (empty) dir.
   */
  @Test
  public void testCheckpointWithRestoredDirectory() throws IOException {
    SecondaryNameNode secondary = null;
    try {
      cluster = new MiniDFSCluster(0, config, 1, true, false, true, null, null, null, null);
      cluster.waitActive();

      secondary = new SecondaryNameNode(config);
      FSImage fsImage = cluster.getNameNode().getFSImage();

      FileSystem fs = cluster.getFileSystem();
      Path path1 = new Path("/", "test");
      writeFile(fs, path1, 2);

      // Take name3 offline
      fsImage.getEditLog().removeEditsAndStorageDir(2);

      // Simulate a 2NN beginning a checkpoint, but not finishing. This will
      // cause name3 to be restored.
      cluster.getNameNode().rollEditLog();

      // Now another 2NN comes along to do a full checkpoint.
      secondary.doCheckpoint();

      // The created file should still exist in the in-memory FS state after the
      // checkpoint.
      assertTrue("File missing after checkpoint", fs.exists(path1));

      secondary.shutdown();

      // Restart the NN so it reloads the edits from on-disk.
      cluster.restartNameNode();

      // The created file should still exist after the restart.
      assertTrue("path should still exist after restart", fs.exists(path1));
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
      if (secondary != null) {
        secondary.shutdown();
      }
    }
  }
  @Test
  public void testReplQueuesActiveAfterStartupSafemode() throws IOException, InterruptedException {
    Configuration conf = new Configuration();

    FSEditLog fsEditLog = Mockito.mock(FSEditLog.class);
    FSImage fsImage = Mockito.mock(FSImage.class);
    Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog);

    FSNamesystem fsNamesystem = new FSNamesystem(conf, fsImage);
    FSNamesystem fsn = Mockito.spy(fsNamesystem);

    // Make shouldPopulaeReplQueues return true
    HAContext haContext = Mockito.mock(HAContext.class);
    HAState haState = Mockito.mock(HAState.class);
    Mockito.when(haContext.getState()).thenReturn(haState);
    Mockito.when(haState.shouldPopulateReplQueues()).thenReturn(true);
    Whitebox.setInternalState(fsn, "haContext", haContext);

    // Make NameNode.getNameNodeMetrics() not return null
    NameNode.initMetrics(conf, NamenodeRole.NAMENODE);

    fsn.enterSafeMode(false);
    assertTrue("FSNamesystem didn't enter safemode", fsn.isInSafeMode());
    assertTrue(
        "Replication queues were being populated during very first " + "safemode",
        !fsn.isPopulatingReplQueues());
    fsn.leaveSafeMode();
    assertTrue("FSNamesystem didn't leave safemode", !fsn.isInSafeMode());
    assertTrue(
        "Replication queues weren't being populated even after leaving " + "safemode",
        fsn.isPopulatingReplQueues());
    fsn.enterSafeMode(false);
    assertTrue("FSNamesystem didn't enter safemode", fsn.isInSafeMode());
    assertTrue(
        "Replication queues weren't being populated after entering " + "safemode 2nd time",
        fsn.isPopulatingReplQueues());
  }
예제 #8
0
 /** Merge image and edits, and verify consistency with the signature. */
 private void doMerge(CheckpointSignature sig) throws IOException {
   getEditLog().open();
   StorageDirectory sdName = null;
   StorageDirectory sdEdits = null;
   Iterator<StorageDirectory> it = null;
   it = dirIterator(NameNodeDirType.IMAGE);
   if (it.hasNext()) sdName = it.next();
   it = dirIterator(NameNodeDirType.EDITS);
   if (it.hasNext()) sdEdits = it.next();
   if ((sdName == null) || (sdEdits == null))
     throw new IOException("Could not locate checkpoint directories");
   loadFSImage(FSImage.getImageFile(sdName, NameNodeFile.IMAGE));
   loadFSEdits(sdEdits);
   sig.validateStorageInfo(this);
   saveFSImage();
 }
예제 #9
0
 /** Merge image and edits, and verify consistency with the signature. */
 private void doMerge(CheckpointSignature sig, boolean loadImage) throws IOException {
   getEditLog().open();
   StorageDirectory sdName = null;
   StorageDirectory sdEdits = null;
   Iterator<StorageDirectory> it = null;
   if (loadImage) {
     it = dirIterator(NameNodeDirType.IMAGE);
     if (it.hasNext()) sdName = it.next();
     if (sdName == null) throw new IOException("Could not locate checkpoint fsimage");
   }
   it = dirIterator(NameNodeDirType.EDITS);
   if (it.hasNext()) sdEdits = it.next();
   if (sdEdits == null) throw new IOException("Could not locate checkpoint edits");
   if (loadImage) {
     loadFSImage(FSImage.getImageFile(sdName, NameNodeFile.IMAGE));
   }
   loadFSEdits(sdEdits);
   sig.validateStorageInfo(this);
   saveNamespace(false);
 }
  /**
   * Verify that a saveNamespace command brings faulty directories in fs.name.dir and fs.edit.dir
   * back online.
   */
  @Test
  public void testReinsertnamedirsInSavenamespace() throws Exception {
    // create a configuration with the key to restore error
    // directories in fs.name.dir
    Configuration conf = getConf();
    conf.setBoolean("dfs.namenode.name.dir.restore", true);

    MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null);
    cluster.waitActive();
    FSNamesystem fsn = FSNamesystem.getFSNamesystem();

    // Replace the FSImage with a spy
    FSImage originalImage = fsn.dir.fsImage;
    FSImage spyImage = spy(originalImage);
    spyImage.setStorageDirectories(
        FSNamesystem.getNamespaceDirs(conf), FSNamesystem.getNamespaceEditsDirs(conf));
    fsn.dir.fsImage = spyImage;

    // inject fault
    // The spy throws a IOException when writing to the second directory
    doAnswer(new FaultySaveImage(false))
        .when(spyImage)
        .saveFSImage((File) anyObject(), anyBoolean());

    try {
      doAnEdit(fsn, 1);
      fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER);

      // Save namespace - this  injects a fault and marks one
      // directory as faulty.
      LOG.info("Doing the first savenamespace.");
      fsn.saveNamespace(false, false);
      LOG.warn("First savenamespace sucessful.");
      assertTrue(
          "Savenamespace should have marked one directory as bad."
              + " But found "
              + spyImage.getRemovedStorageDirs().size()
              + " bad directories.",
          spyImage.getRemovedStorageDirs().size() == 1);

      // The next call to savenamespace should try inserting the
      // erroneous directory back to fs.name.dir. This command should
      // be successful.
      LOG.info("Doing the second savenamespace.");
      fsn.saveNamespace(false, false);
      LOG.warn("Second savenamespace sucessful.");
      assertTrue(
          "Savenamespace should have been successful in removing "
              + " bad directories from Image."
              + " But found "
              + originalImage.getRemovedStorageDirs().size()
              + " bad directories.",
          originalImage.getRemovedStorageDirs().size() == 0);

      // Now shut down and restart the namesystem
      LOG.info("Shutting down fsimage.");
      originalImage.close();
      fsn.close();
      fsn = null;
      cluster.shutdown();

      // Start a new namesystem, which should be able to recover
      // the namespace from the previous incarnation.
      LOG.info("Loading new FSmage from disk.");
      cluster = new MiniDFSCluster(conf, 1, false, null);
      cluster.waitActive();
      fsn = FSNamesystem.getFSNamesystem();

      // Make sure the image loaded including our edit.
      LOG.info("Checking reloaded image.");
      checkEditExists(cluster, 1);
      LOG.info("Reloaded image is good.");
    } finally {
      fsn.close();
      cluster.shutdown();
    }
  }
예제 #11
0
 /** Remove edits and storage directories. */
 public void invalidateStorage(FSImage fi) throws IOException {
   fi.getEditLog().removeEditsAndStorageDir(2); // name3
   fi.getEditLog().removeEditsAndStorageDir(1); // name2
 }