Beispiel #1
0
  /** Create a new checkpoint */
  void doCheckpoint() throws IOException {

    LOG.info("Checkpoint starting");

    // Do the required initialization of the merge work area.
    startCheckpoint();

    // Tell the namenode to start logging transactions in a new edit file
    // Returns a token that would be used to upload the merged image.
    CheckpointSignature sig = (CheckpointSignature) namenode.rollEditLog();

    // error simulation code for junit test
    if (ErrorSimulator.getErrorSimulation(0)) {
      throw new IOException("Simulating error0 " + "after creating edits.new");
    }

    boolean loadImage = downloadCheckpointFiles(sig); // Fetch fsimage and edits
    doMerge(sig, loadImage); // Do the merge

    //
    // Upload the new image into the NameNode. Then tell the Namenode
    // to make this new uploaded image as the most current image.
    //
    putFSImage(sig);

    // error simulation code for junit test
    if (ErrorSimulator.getErrorSimulation(1)) {
      throw new IOException("Simulating error1 " + "after uploading new image to NameNode");
    }

    namenode.rollFsImage(new CheckpointSignature(checkpointImage));
    checkpointImage.endCheckpoint();

    LOG.info("Checkpoint done. New Image Size: " + checkpointImage.getFsImageName().length());
  }
Beispiel #2
0
 /** Merge downloaded image and edits and write the new image into current storage directory. */
 private void doMerge(CheckpointSignature sig, boolean loadImage) throws IOException {
   if (loadImage) { // create an empty namespace if new image
     namesystem = new FSNamesystem(checkpointImage, conf);
   }
   assert namesystem.dir.fsImage == checkpointImage;
   checkpointImage.doMerge(sig, loadImage);
 }
Beispiel #3
0
  /** Initialize SecondaryNameNode. */
  private void initialize(Configuration conf) throws IOException {
    // initiate Java VM metrics
    JvmMetrics.init("SecondaryNameNode", conf.get("session.id"));

    // Create connection to the namenode.
    shouldRun = true;
    nameNodeAddr = NameNode.getAddress(conf);

    this.conf = conf;
    this.namenode =
        (NamenodeProtocol)
            RPC.waitForProxy(
                NamenodeProtocol.class, NamenodeProtocol.versionID, nameNodeAddr, conf);

    // initialize checkpoint directories
    fsName = getInfoServer();
    checkpointDirs = FSImage.getCheckpointDirs(conf, "/tmp/hadoop/dfs/namesecondary");
    checkpointEditsDirs = FSImage.getCheckpointEditsDirs(conf, "/tmp/hadoop/dfs/namesecondary");
    checkpointImage = new CheckpointStorage(conf);
    checkpointImage.recoverCreate(checkpointDirs, checkpointEditsDirs);

    // Initialize other scheduling parameters from the configuration
    checkpointPeriod = conf.getLong("fs.checkpoint.period", 3600);
    checkpointSize = conf.getLong("fs.checkpoint.size", 4194304);

    // initialize the webserver for uploading files.
    String infoAddr =
        NetUtils.getServerAddress(
            conf,
            "dfs.secondary.info.bindAddress",
            "dfs.secondary.info.port",
            "dfs.secondary.http.address");
    InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(infoAddr);
    infoBindAddress = infoSocAddr.getHostName();
    int tmpInfoPort = infoSocAddr.getPort();
    infoServer = new HttpServer("secondary", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf);
    infoServer.setAttribute("name.system.image", checkpointImage);
    this.infoServer.setAttribute("name.conf", conf);
    infoServer.addInternalServlet("getimage", "/getimage", GetImageServlet.class);
    infoServer.start();

    // The web-server port can be ephemeral... ensure we have the correct info
    infoPort = infoServer.getPort();
    conf.set("dfs.secondary.http.address", infoBindAddress + ":" + infoPort);
    LOG.info("Secondary Web-server up at: " + infoBindAddress + ":" + infoPort);
    LOG.warn(
        "Checkpoint Period   :"
            + checkpointPeriod
            + " secs "
            + "("
            + checkpointPeriod / 60
            + " min)");
    LOG.warn(
        "Log Size Trigger    :"
            + checkpointSize
            + " bytes "
            + "("
            + checkpointSize / 1024
            + " KB)");
  }
Beispiel #4
0
  /**
   * Download <code>fsimage</code> and <code>edits</code> files from the name-node.
   *
   * @return true if a new image has been downloaded and needs to be loaded
   * @throws IOException
   */
  private boolean downloadCheckpointFiles(CheckpointSignature sig) throws IOException {

    checkpointImage.cTime = sig.cTime;
    checkpointImage.checkpointTime = sig.checkpointTime;

    boolean downloadImage = true;
    String fileid;
    File[] srcNames;
    if (sig.imageDigest.equals(checkpointImage.imageDigest)) {
      downloadImage = false;
      LOG.info("Image has not changed. Will not download image.");
    } else {
      // get fsimage
      srcNames = checkpointImage.getImageFiles();
      assert srcNames.length > 0 : "No checkpoint targets.";
      fileid = "getimage=1";
      TransferFsImage.getFileClient(fsName, fileid, srcNames, false);
      checkpointImage.imageDigest = sig.imageDigest;
      LOG.info(
          "Downloaded file " + srcNames[0].getName() + " size " + srcNames[0].length() + " bytes.");
    }
    // get edits file
    fileid = "getedit=1";
    srcNames = checkpointImage.getEditsFiles();
    assert srcNames.length > 0 : "No checkpoint targets.";
    TransferFsImage.getFileClient(fsName, fileid, srcNames, false);
    LOG.info(
        "Downloaded file " + srcNames[0].getName() + " size " + srcNames[0].length() + " bytes.");

    checkpointImage.checkpointUploadDone(null);

    return downloadImage;
  }
Beispiel #5
0
 /** Shut down this instance of the datanode. Returns only after shutdown is complete. */
 public void shutdown() {
   shouldRun = false;
   try {
     if (infoServer != null) infoServer.stop();
   } catch (Exception e) {
     LOG.warn("Exception shutting down SecondaryNameNode", e);
   }
   try {
     if (checkpointImage != null) checkpointImage.close();
   } catch (IOException e) {
     LOG.warn(StringUtils.stringifyException(e));
   }
 }
Beispiel #6
0
  //
  // The main work loop
  //
  public void run() {

    //
    // Poll the Namenode (once every 5 minutes) to find the size of the
    // pending edit log.
    //
    long period = 5 * 60; // 5 minutes
    long lastCheckpointTime = 0;
    if (checkpointPeriod < period) {
      period = checkpointPeriod;
    }

    while (shouldRun) {
      try {
        Thread.sleep(1000 * period);
      } catch (InterruptedException ie) {
        // do nothing
      }
      if (!shouldRun) {
        break;
      }
      try {
        long now = System.currentTimeMillis();

        long size = namenode.getEditLogSize();
        if (size >= checkpointSize || now >= lastCheckpointTime + 1000 * checkpointPeriod) {
          doCheckpoint();
          lastCheckpointTime = now;
        }
      } catch (IOException e) {
        LOG.error("Exception in doCheckpoint: ");
        LOG.error(StringUtils.stringifyException(e));
        e.printStackTrace();
        checkpointImage.imageDigest = null;
      } catch (Throwable e) {
        LOG.error("Throwable Exception in doCheckpoint: ");
        LOG.error(StringUtils.stringifyException(e));
        e.printStackTrace();
        Runtime.getRuntime().exit(-1);
      }
    }
  }
Beispiel #7
0
 private void startCheckpoint() throws IOException {
   checkpointImage.unlockAll();
   checkpointImage.getEditLog().close();
   checkpointImage.recoverCreate(checkpointDirs, checkpointEditsDirs);
   checkpointImage.startCheckpoint();
 }