Example #1
0
  /** Create a new checkpoint */
  void doCheckpoint() throws IOException {

    LOG.info("Checkpoint starting");

    // Do the required initialization of the merge work area.
    startCheckpoint();

    // Tell the namenode to start logging transactions in a new edit file
    // Returns a token that would be used to upload the merged image.
    CheckpointSignature sig = (CheckpointSignature) namenode.rollEditLog();

    // error simulation code for junit test
    if (ErrorSimulator.getErrorSimulation(0)) {
      throw new IOException("Simulating error0 " + "after creating edits.new");
    }

    boolean loadImage = downloadCheckpointFiles(sig); // Fetch fsimage and edits
    doMerge(sig, loadImage); // Do the merge

    //
    // Upload the new image into the NameNode. Then tell the Namenode
    // to make this new uploaded image as the most current image.
    //
    putFSImage(sig);

    // error simulation code for junit test
    if (ErrorSimulator.getErrorSimulation(1)) {
      throw new IOException("Simulating error1 " + "after uploading new image to NameNode");
    }

    namenode.rollFsImage(new CheckpointSignature(checkpointImage));
    checkpointImage.endCheckpoint();

    LOG.info("Checkpoint done. New Image Size: " + checkpointImage.getFsImageName().length());
  }
Example #2
0
  /**
   * Download <code>fsimage</code> and <code>edits</code> files from the name-node.
   *
   * @return true if a new image has been downloaded and needs to be loaded
   * @throws IOException
   */
  private boolean downloadCheckpointFiles(CheckpointSignature sig) throws IOException {

    checkpointImage.cTime = sig.cTime;
    checkpointImage.checkpointTime = sig.checkpointTime;

    boolean downloadImage = true;
    String fileid;
    File[] srcNames;
    if (sig.imageDigest.equals(checkpointImage.imageDigest)) {
      downloadImage = false;
      LOG.info("Image has not changed. Will not download image.");
    } else {
      // get fsimage
      srcNames = checkpointImage.getImageFiles();
      assert srcNames.length > 0 : "No checkpoint targets.";
      fileid = "getimage=1";
      TransferFsImage.getFileClient(fsName, fileid, srcNames, false);
      checkpointImage.imageDigest = sig.imageDigest;
      LOG.info(
          "Downloaded file " + srcNames[0].getName() + " size " + srcNames[0].length() + " bytes.");
    }
    // get edits file
    fileid = "getedit=1";
    srcNames = checkpointImage.getEditsFiles();
    assert srcNames.length > 0 : "No checkpoint targets.";
    TransferFsImage.getFileClient(fsName, fileid, srcNames, false);
    LOG.info(
        "Downloaded file " + srcNames[0].getName() + " size " + srcNames[0].length() + " bytes.");

    checkpointImage.checkpointUploadDone(null);

    return downloadImage;
  }
Example #3
0
  /** Initialize SecondaryNameNode. */
  private void initialize(Configuration conf) throws IOException {
    // initiate Java VM metrics
    JvmMetrics.init("SecondaryNameNode", conf.get("session.id"));

    // Create connection to the namenode.
    shouldRun = true;
    nameNodeAddr = NameNode.getAddress(conf);

    this.conf = conf;
    this.namenode =
        (NamenodeProtocol)
            RPC.waitForProxy(
                NamenodeProtocol.class, NamenodeProtocol.versionID, nameNodeAddr, conf);

    // initialize checkpoint directories
    fsName = getInfoServer();
    checkpointDirs = FSImage.getCheckpointDirs(conf, "/tmp/hadoop/dfs/namesecondary");
    checkpointEditsDirs = FSImage.getCheckpointEditsDirs(conf, "/tmp/hadoop/dfs/namesecondary");
    checkpointImage = new CheckpointStorage(conf);
    checkpointImage.recoverCreate(checkpointDirs, checkpointEditsDirs);

    // Initialize other scheduling parameters from the configuration
    checkpointPeriod = conf.getLong("fs.checkpoint.period", 3600);
    checkpointSize = conf.getLong("fs.checkpoint.size", 4194304);

    // initialize the webserver for uploading files.
    String infoAddr =
        NetUtils.getServerAddress(
            conf,
            "dfs.secondary.info.bindAddress",
            "dfs.secondary.info.port",
            "dfs.secondary.http.address");
    InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(infoAddr);
    infoBindAddress = infoSocAddr.getHostName();
    int tmpInfoPort = infoSocAddr.getPort();
    infoServer = new HttpServer("secondary", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf);
    infoServer.setAttribute("name.system.image", checkpointImage);
    this.infoServer.setAttribute("name.conf", conf);
    infoServer.addInternalServlet("getimage", "/getimage", GetImageServlet.class);
    infoServer.start();

    // The web-server port can be ephemeral... ensure we have the correct info
    infoPort = infoServer.getPort();
    conf.set("dfs.secondary.http.address", infoBindAddress + ":" + infoPort);
    LOG.info("Secondary Web-server up at: " + infoBindAddress + ":" + infoPort);
    LOG.warn(
        "Checkpoint Period   :"
            + checkpointPeriod
            + " secs "
            + "("
            + checkpointPeriod / 60
            + " min)");
    LOG.warn(
        "Log Size Trigger    :"
            + checkpointSize
            + " bytes "
            + "("
            + checkpointSize / 1024
            + " KB)");
  }
Example #4
0
 /** Shut down this instance of the datanode. Returns only after shutdown is complete. */
 public void shutdown() {
   shouldRun = false;
   try {
     if (infoServer != null) infoServer.stop();
   } catch (Exception e) {
     LOG.warn("Exception shutting down SecondaryNameNode", e);
   }
   try {
     if (checkpointImage != null) checkpointImage.close();
   } catch (IOException e) {
     LOG.warn(StringUtils.stringifyException(e));
   }
 }
Example #5
0
  //
  // The main work loop
  //
  public void run() {

    //
    // Poll the Namenode (once every 5 minutes) to find the size of the
    // pending edit log.
    //
    long period = 5 * 60; // 5 minutes
    long lastCheckpointTime = 0;
    if (checkpointPeriod < period) {
      period = checkpointPeriod;
    }

    while (shouldRun) {
      try {
        Thread.sleep(1000 * period);
      } catch (InterruptedException ie) {
        // do nothing
      }
      if (!shouldRun) {
        break;
      }
      try {
        long now = System.currentTimeMillis();

        long size = namenode.getEditLogSize();
        if (size >= checkpointSize || now >= lastCheckpointTime + 1000 * checkpointPeriod) {
          doCheckpoint();
          lastCheckpointTime = now;
        }
      } catch (IOException e) {
        LOG.error("Exception in doCheckpoint: ");
        LOG.error(StringUtils.stringifyException(e));
        e.printStackTrace();
        checkpointImage.imageDigest = null;
      } catch (Throwable e) {
        LOG.error("Throwable Exception in doCheckpoint: ");
        LOG.error(StringUtils.stringifyException(e));
        e.printStackTrace();
        Runtime.getRuntime().exit(-1);
      }
    }
  }
Example #6
0
 /** Copy the new fsimage into the NameNode */
 private void putFSImage(CheckpointSignature sig) throws IOException {
   String fileid =
       "putimage=1&port="
           + infoPort
           + "&machine="
           + InetAddress.getLocalHost().getHostAddress()
           + "&token="
           + sig.toString();
   LOG.info("Posted URL " + fsName + fileid);
   TransferFsImage.getFileClient(fsName, fileid, (File[]) null, false);
 }
Example #7
0
  /**
   * @param argv The parameters passed to this program.
   * @exception Exception if the filesystem does not exist.
   * @return 0 on success, non zero on error.
   */
  private int processArgs(String[] argv) throws Exception {

    if (argv.length < 1) {
      printUsage("");
      return -1;
    }

    int exitCode = -1;
    int i = 0;
    String cmd = argv[i++];

    //
    // verify that we have enough command line parameters
    //
    if ("-geteditsize".equals(cmd)) {
      if (argv.length != 1) {
        printUsage(cmd);
        return exitCode;
      }
    } else if ("-checkpoint".equals(cmd)) {
      if (argv.length != 1 && argv.length != 2) {
        printUsage(cmd);
        return exitCode;
      }
      if (argv.length == 2 && !"force".equals(argv[i])) {
        printUsage(cmd);
        return exitCode;
      }
    }

    exitCode = 0;
    try {
      if ("-checkpoint".equals(cmd)) {
        long size = namenode.getEditLogSize();
        if (size >= checkpointSize || argv.length == 2 && "force".equals(argv[i])) {
          doCheckpoint();
        } else {
          System.err.println(
              "EditLog size "
                  + size
                  + " bytes is "
                  + "smaller than configured checkpoint "
                  + "size "
                  + checkpointSize
                  + " bytes.");
          System.err.println("Skipping checkpoint.");
        }
      } else if ("-geteditsize".equals(cmd)) {
        long size = namenode.getEditLogSize();
        System.out.println("EditLog size is " + size + " bytes");
      } else {
        exitCode = -1;
        LOG.error(cmd.substring(1) + ": Unknown command");
        printUsage("");
      }
    } catch (RemoteException e) {
      //
      // This is a error returned by hadoop server. Print
      // out the first line of the error mesage, ignore the stack trace.
      exitCode = -1;
      try {
        String[] content;
        content = e.getLocalizedMessage().split("\n");
        LOG.error(cmd.substring(1) + ": " + content[0]);
      } catch (Exception ex) {
        LOG.error(cmd.substring(1) + ": " + ex.getLocalizedMessage());
      }
    } catch (IOException e) {
      //
      // IO exception encountered locally.
      //
      exitCode = -1;
      LOG.error(cmd.substring(1) + ": " + e.getLocalizedMessage());
    } finally {
      // Does the RPC connection need to be closed?
    }
    return exitCode;
  }