/** Create a new checkpoint */ void doCheckpoint() throws IOException { LOG.info("Checkpoint starting"); // Do the required initialization of the merge work area. startCheckpoint(); // Tell the namenode to start logging transactions in a new edit file // Returns a token that would be used to upload the merged image. CheckpointSignature sig = (CheckpointSignature) namenode.rollEditLog(); // error simulation code for junit test if (ErrorSimulator.getErrorSimulation(0)) { throw new IOException("Simulating error0 " + "after creating edits.new"); } boolean loadImage = downloadCheckpointFiles(sig); // Fetch fsimage and edits doMerge(sig, loadImage); // Do the merge // // Upload the new image into the NameNode. Then tell the Namenode // to make this new uploaded image as the most current image. // putFSImage(sig); // error simulation code for junit test if (ErrorSimulator.getErrorSimulation(1)) { throw new IOException("Simulating error1 " + "after uploading new image to NameNode"); } namenode.rollFsImage(new CheckpointSignature(checkpointImage)); checkpointImage.endCheckpoint(); LOG.info("Checkpoint done. New Image Size: " + checkpointImage.getFsImageName().length()); }
/** Merge downloaded image and edits and write the new image into current storage directory. */ private void doMerge(CheckpointSignature sig, boolean loadImage) throws IOException { if (loadImage) { // create an empty namespace if new image namesystem = new FSNamesystem(checkpointImage, conf); } assert namesystem.dir.fsImage == checkpointImage; checkpointImage.doMerge(sig, loadImage); }
/** Initialize SecondaryNameNode. */ private void initialize(Configuration conf) throws IOException { // initiate Java VM metrics JvmMetrics.init("SecondaryNameNode", conf.get("session.id")); // Create connection to the namenode. shouldRun = true; nameNodeAddr = NameNode.getAddress(conf); this.conf = conf; this.namenode = (NamenodeProtocol) RPC.waitForProxy( NamenodeProtocol.class, NamenodeProtocol.versionID, nameNodeAddr, conf); // initialize checkpoint directories fsName = getInfoServer(); checkpointDirs = FSImage.getCheckpointDirs(conf, "/tmp/hadoop/dfs/namesecondary"); checkpointEditsDirs = FSImage.getCheckpointEditsDirs(conf, "/tmp/hadoop/dfs/namesecondary"); checkpointImage = new CheckpointStorage(conf); checkpointImage.recoverCreate(checkpointDirs, checkpointEditsDirs); // Initialize other scheduling parameters from the configuration checkpointPeriod = conf.getLong("fs.checkpoint.period", 3600); checkpointSize = conf.getLong("fs.checkpoint.size", 4194304); // initialize the webserver for uploading files. String infoAddr = NetUtils.getServerAddress( conf, "dfs.secondary.info.bindAddress", "dfs.secondary.info.port", "dfs.secondary.http.address"); InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(infoAddr); infoBindAddress = infoSocAddr.getHostName(); int tmpInfoPort = infoSocAddr.getPort(); infoServer = new HttpServer("secondary", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf); infoServer.setAttribute("name.system.image", checkpointImage); this.infoServer.setAttribute("name.conf", conf); infoServer.addInternalServlet("getimage", "/getimage", GetImageServlet.class); infoServer.start(); // The web-server port can be ephemeral... ensure we have the correct info infoPort = infoServer.getPort(); conf.set("dfs.secondary.http.address", infoBindAddress + ":" + infoPort); LOG.info("Secondary Web-server up at: " + infoBindAddress + ":" + infoPort); LOG.warn( "Checkpoint Period :" + checkpointPeriod + " secs " + "(" + checkpointPeriod / 60 + " min)"); LOG.warn( "Log Size Trigger :" + checkpointSize + " bytes " + "(" + checkpointSize / 1024 + " KB)"); }
/** * Download <code>fsimage</code> and <code>edits</code> files from the name-node. * * @return true if a new image has been downloaded and needs to be loaded * @throws IOException */ private boolean downloadCheckpointFiles(CheckpointSignature sig) throws IOException { checkpointImage.cTime = sig.cTime; checkpointImage.checkpointTime = sig.checkpointTime; boolean downloadImage = true; String fileid; File[] srcNames; if (sig.imageDigest.equals(checkpointImage.imageDigest)) { downloadImage = false; LOG.info("Image has not changed. Will not download image."); } else { // get fsimage srcNames = checkpointImage.getImageFiles(); assert srcNames.length > 0 : "No checkpoint targets."; fileid = "getimage=1"; TransferFsImage.getFileClient(fsName, fileid, srcNames, false); checkpointImage.imageDigest = sig.imageDigest; LOG.info( "Downloaded file " + srcNames[0].getName() + " size " + srcNames[0].length() + " bytes."); } // get edits file fileid = "getedit=1"; srcNames = checkpointImage.getEditsFiles(); assert srcNames.length > 0 : "No checkpoint targets."; TransferFsImage.getFileClient(fsName, fileid, srcNames, false); LOG.info( "Downloaded file " + srcNames[0].getName() + " size " + srcNames[0].length() + " bytes."); checkpointImage.checkpointUploadDone(null); return downloadImage; }
/** Shut down this instance of the datanode. Returns only after shutdown is complete. */ public void shutdown() { shouldRun = false; try { if (infoServer != null) infoServer.stop(); } catch (Exception e) { LOG.warn("Exception shutting down SecondaryNameNode", e); } try { if (checkpointImage != null) checkpointImage.close(); } catch (IOException e) { LOG.warn(StringUtils.stringifyException(e)); } }
// // The main work loop // public void run() { // // Poll the Namenode (once every 5 minutes) to find the size of the // pending edit log. // long period = 5 * 60; // 5 minutes long lastCheckpointTime = 0; if (checkpointPeriod < period) { period = checkpointPeriod; } while (shouldRun) { try { Thread.sleep(1000 * period); } catch (InterruptedException ie) { // do nothing } if (!shouldRun) { break; } try { long now = System.currentTimeMillis(); long size = namenode.getEditLogSize(); if (size >= checkpointSize || now >= lastCheckpointTime + 1000 * checkpointPeriod) { doCheckpoint(); lastCheckpointTime = now; } } catch (IOException e) { LOG.error("Exception in doCheckpoint: "); LOG.error(StringUtils.stringifyException(e)); e.printStackTrace(); checkpointImage.imageDigest = null; } catch (Throwable e) { LOG.error("Throwable Exception in doCheckpoint: "); LOG.error(StringUtils.stringifyException(e)); e.printStackTrace(); Runtime.getRuntime().exit(-1); } } }
private void startCheckpoint() throws IOException { checkpointImage.unlockAll(); checkpointImage.getEditLog().close(); checkpointImage.recoverCreate(checkpointDirs, checkpointEditsDirs); checkpointImage.startCheckpoint(); }