/** * recoverTransitionRead for a specific block pool * * @param datanode DataNode * @param bpID Block pool Id * @param nsInfo Namespace info of namenode corresponding to the block pool * @param dataDirs Storage directories * @param startOpt startup option * @throws IOException on error */ void recoverTransitionRead( DataNode datanode, String bpID, NamespaceInfo nsInfo, Collection<File> dataDirs, StartupOption startOpt) throws IOException { // First ensure datanode level format/snapshot/rollback is completed recoverTransitionRead(datanode, nsInfo, dataDirs, startOpt); // Create list of storage directories for the block pool Collection<File> bpDataDirs = new ArrayList<File>(); for (Iterator<File> it = dataDirs.iterator(); it.hasNext(); ) { File dnRoot = it.next(); File bpRoot = BlockPoolSliceStorage.getBpRoot(bpID, new File(dnRoot, STORAGE_DIR_CURRENT)); bpDataDirs.add(bpRoot); } // mkdir for the list of BlockPoolStorage makeBlockPoolDataDir(bpDataDirs, null); BlockPoolSliceStorage bpStorage = new BlockPoolSliceStorage( nsInfo.getNamespaceID(), bpID, nsInfo.getCTime(), nsInfo.getClusterID()); bpStorage.recoverTransitionRead(datanode, nsInfo, bpDataDirs, startOpt); addBlockPoolStorage(bpID, bpStorage); }
void format(StorageDirectory sd, NamespaceInfo nsInfo) throws IOException { sd.clearDirectory(); // create directory this.layoutVersion = HdfsConstants.LAYOUT_VERSION; this.clusterID = nsInfo.getClusterID(); this.namespaceID = nsInfo.getNamespaceID(); this.cTime = 0; // store storageID as it currently is writeProperties(sd); }
/** * Format a block pool slice storage. * * @param bpSdir the block pool storage * @param nsInfo the name space info * @throws IOException Signals that an I/O exception has occurred. */ private void format(StorageDirectory bpSdir, NamespaceInfo nsInfo) throws IOException { LOG.info("Formatting block pool " + blockpoolID + " directory " + bpSdir.getCurrentDir()); bpSdir.clearDirectory(); // create directory this.layoutVersion = HdfsConstants.DATANODE_LAYOUT_VERSION; this.cTime = nsInfo.getCTime(); this.namespaceID = nsInfo.getNamespaceID(); this.blockpoolID = nsInfo.getBlockPoolID(); writeProperties(bpSdir); }
/** * Upgrade to any release after 0.22 (0.22 included) release e.g. 0.22 => 0.23 Upgrade procedure * is as follows: * * <ol> * <li>If <SD>/current/<bpid>/previous exists then delete it * <li>Rename <SD>/current/<bpid>/current to <SD>/current/bpid/current/previous.tmp * <li>Create new <SD>current/<bpid>/current directory * <ol> * <li>Hard links for block files are created from previous.tmp to current * <li>Save new version file in current directory * </ol> * <li>Rename previous.tmp to previous * </ol> * * @param bpSd storage directory <SD>/current/<bpid> * @param nsInfo Namespace Info from the namenode * @throws IOException on error */ void doUpgrade(DataNode datanode, StorageDirectory bpSd, NamespaceInfo nsInfo) throws IOException { // Upgrading is applicable only to release with federation or after if (!DataNodeLayoutVersion.supports(LayoutVersion.Feature.FEDERATION, layoutVersion)) { return; } LOG.info( "Upgrading block pool storage directory " + bpSd.getRoot() + ".\n old LV = " + this.getLayoutVersion() + "; old CTime = " + this.getCTime() + ".\n new LV = " + HdfsConstants.DATANODE_LAYOUT_VERSION + "; new CTime = " + nsInfo.getCTime()); // get <SD>/previous directory String dnRoot = getDataNodeStorageRoot(bpSd.getRoot().getCanonicalPath()); StorageDirectory dnSdStorage = new StorageDirectory(new File(dnRoot)); File dnPrevDir = dnSdStorage.getPreviousDir(); // If <SD>/previous directory exists delete it if (dnPrevDir.exists()) { deleteDir(dnPrevDir); } File bpCurDir = bpSd.getCurrentDir(); File bpPrevDir = bpSd.getPreviousDir(); assert bpCurDir.exists() : "BP level current directory must exist."; cleanupDetachDir(new File(bpCurDir, DataStorage.STORAGE_DIR_DETACHED)); // 1. Delete <SD>/current/<bpid>/previous dir before upgrading if (bpPrevDir.exists()) { deleteDir(bpPrevDir); } File bpTmpDir = bpSd.getPreviousTmp(); assert !bpTmpDir.exists() : "previous.tmp directory must not exist."; // 2. Rename <SD>/current/<bpid>/current to // <SD>/current/<bpid>/previous.tmp rename(bpCurDir, bpTmpDir); // 3. Create new <SD>/current with block files hardlinks and VERSION linkAllBlocks(datanode, bpTmpDir, bpCurDir); this.layoutVersion = HdfsConstants.DATANODE_LAYOUT_VERSION; assert this.namespaceID == nsInfo.getNamespaceID() : "Data-node and name-node layout versions must be the same."; this.cTime = nsInfo.getCTime(); writeProperties(bpSd); // 4.rename <SD>/current/<bpid>/previous.tmp to // <SD>/current/<bpid>/previous rename(bpTmpDir, bpPrevDir); LOG.info("Upgrade of block pool " + blockpoolID + " at " + bpSd.getRoot() + " is complete"); }
/** * Analize which and whether a transition of the fs state is required and perform it if necessary. * * <p>Rollback if previousLV >= LAYOUT_VERSION && prevCTime <= namenode.cTime Upgrade if this.LV > * LAYOUT_VERSION || this.cTime < namenode.cTime Regular startup if this.LV = LAYOUT_VERSION && * this.cTime = namenode.cTime * * @param datanode Datanode to which this storage belongs to * @param sd storage directory * @param nsInfo namespace info * @param startOpt startup option * @throws IOException */ private void doTransition( DataNode datanode, StorageDirectory sd, NamespaceInfo nsInfo, StartupOption startOpt) throws IOException { if (startOpt == StartupOption.ROLLBACK) { doRollback(sd, nsInfo); // rollback if applicable } readProperties(sd); checkVersionUpgradable(this.layoutVersion); assert this.layoutVersion >= HdfsConstants.LAYOUT_VERSION : "Future version is not allowed"; boolean federationSupported = LayoutVersion.supports(Feature.FEDERATION, layoutVersion); // For pre-federation version - validate the namespaceID if (!federationSupported && getNamespaceID() != nsInfo.getNamespaceID()) { throw new IOException( "Incompatible namespaceIDs in " + sd.getRoot().getCanonicalPath() + ": namenode namespaceID = " + nsInfo.getNamespaceID() + "; datanode namespaceID = " + getNamespaceID()); } // For version that supports federation, validate clusterID if (federationSupported && !getClusterID().equals(nsInfo.getClusterID())) { throw new IOException( "Incompatible clusterIDs in " + sd.getRoot().getCanonicalPath() + ": namenode clusterID = " + nsInfo.getClusterID() + "; datanode clusterID = " + getClusterID()); } // regular start up if (this.layoutVersion == HdfsConstants.LAYOUT_VERSION && this.cTime == nsInfo.getCTime()) return; // regular startup // verify necessity of a distributed upgrade UpgradeManagerDatanode um = datanode.getUpgradeManagerDatanode(nsInfo.getBlockPoolID()); verifyDistributedUpgradeProgress(um, nsInfo); // do upgrade if (this.layoutVersion > HdfsConstants.LAYOUT_VERSION || this.cTime < nsInfo.getCTime()) { doUpgrade(sd, nsInfo); // upgrade return; } // layoutVersion == LAYOUT_VERSION && this.cTime > nsInfo.cTime // must shutdown throw new IOException( "Datanode state: LV = " + this.getLayoutVersion() + " CTime = " + this.getCTime() + " is newer than the namespace state: LV = " + nsInfo.getLayoutVersion() + " CTime = " + nsInfo.getCTime()); }
/* * Roll back to old snapshot at the block pool level * If previous directory exists: * <ol> * <li>Rename <SD>/current/<bpid>/current to removed.tmp</li> * <li>Rename * <SD>/current/<bpid>/previous to current</li> * <li>Remove removed.tmp</li> * </ol> * * Do nothing if previous directory does not exist. * @param bpSd Block pool storage directory at <SD>/current/<bpid> */ void doRollback(StorageDirectory bpSd, NamespaceInfo nsInfo) throws IOException { File prevDir = bpSd.getPreviousDir(); // regular startup if previous dir does not exist if (!prevDir.exists()) return; // read attributes out of the VERSION file of previous directory BlockPoolSliceStorage prevInfo = new BlockPoolSliceStorage(); prevInfo.readPreviousVersionProperties(bpSd); // We allow rollback to a state, which is either consistent with // the namespace state or can be further upgraded to it. // In another word, we can only roll back when ( storedLV >= software LV) // && ( DN.previousCTime <= NN.ctime) if (!(prevInfo.getLayoutVersion() >= HdfsConstants.DATANODE_LAYOUT_VERSION && prevInfo.getCTime() <= nsInfo.getCTime())) { // cannot rollback throw new InconsistentFSStateException( bpSd.getRoot(), "Cannot rollback to a newer state.\nDatanode previous state: LV = " + prevInfo.getLayoutVersion() + " CTime = " + prevInfo.getCTime() + " is newer than the namespace state: LV = " + HdfsConstants.DATANODE_LAYOUT_VERSION + " CTime = " + nsInfo.getCTime()); } LOG.info( "Rolling back storage directory " + bpSd.getRoot() + ".\n target LV = " + nsInfo.getLayoutVersion() + "; target CTime = " + nsInfo.getCTime()); File tmpDir = bpSd.getRemovedTmp(); assert !tmpDir.exists() : "removed.tmp directory must not exist."; // 1. rename current to tmp File curDir = bpSd.getCurrentDir(); assert curDir.exists() : "Current directory must exist."; rename(curDir, tmpDir); // 2. rename previous to current rename(prevDir, curDir); // 3. delete removed.tmp dir deleteDir(tmpDir); LOG.info("Rollback of " + bpSd.getRoot() + " is complete"); }
/** * Analyze storage directories. Recover from previous transitions if required. * * <p>The block pool storages are either all analyzed or none of them is loaded. Therefore, a * failure on loading any block pool storage results a faulty data volume. * * @param datanode Datanode to which this storage belongs to * @param nsInfo namespace information * @param dataDirs storage directories of block pool * @param startOpt startup option * @throws IOException on error */ void recoverTransitionRead( DataNode datanode, NamespaceInfo nsInfo, Collection<File> dataDirs, StartupOption startOpt) throws IOException { LOG.info("Analyzing storage directories for bpid " + nsInfo.getBlockPoolID()); for (StorageDirectory sd : loadBpStorageDirectories(datanode, nsInfo, dataDirs, startOpt)) { addStorageDir(sd); } }
/** * Load one storage directory. Recover from previous transitions if required. * * @param datanode datanode instance * @param nsInfo namespace information * @param dataDir the root path of the storage directory * @param startOpt startup option * @return the StorageDirectory successfully loaded. * @throws IOException */ private StorageDirectory loadStorageDirectory( DataNode datanode, NamespaceInfo nsInfo, File dataDir, StartupOption startOpt) throws IOException { StorageDirectory sd = new StorageDirectory(dataDir, null, true); try { StorageState curState = sd.analyzeStorage(startOpt, this); // sd is locked but not opened switch (curState) { case NORMAL: break; case NON_EXISTENT: LOG.info("Block pool storage directory " + dataDir + " does not exist"); throw new IOException("Storage directory " + dataDir + " does not exist"); case NOT_FORMATTED: // format LOG.info( "Block pool storage directory " + dataDir + " is not formatted for " + nsInfo.getBlockPoolID()); LOG.info("Formatting ..."); format(sd, nsInfo); break; default: // recovery part is common sd.doRecover(curState); } // 2. Do transitions // Each storage directory is treated individually. // During startup some of them can upgrade or roll back // while others could be up-to-date for the regular startup. doTransition(datanode, sd, nsInfo, startOpt); if (getCTime() != nsInfo.getCTime()) { throw new IOException("Data-node and name-node CTimes must be the same."); } // 3. Update successfully loaded storage. setServiceLayoutVersion(getServiceLayoutVersion()); writeProperties(sd); return sd; } catch (IOException ioe) { sd.unlock(); throw ioe; } }
/** * Rolling back to a snapshot in previous directory by moving it to current directory. Rollback * procedure: <br> * If previous directory exists: * * <ol> * <li>Rename current to removed.tmp * <li>Rename previous to current * <li>Remove removed.tmp * </ol> * * Do nothing, if previous directory does not exist. */ void doRollback(StorageDirectory sd, NamespaceInfo nsInfo) throws IOException { File prevDir = sd.getPreviousDir(); // regular startup if previous dir does not exist if (!prevDir.exists()) return; DataStorage prevInfo = new DataStorage(); prevInfo.readPreviousVersionProperties(sd); // We allow rollback to a state, which is either consistent with // the namespace state or can be further upgraded to it. if (!(prevInfo.getLayoutVersion() >= HdfsConstants.LAYOUT_VERSION && prevInfo.getCTime() <= nsInfo.getCTime())) // cannot rollback throw new InconsistentFSStateException( sd.getRoot(), "Cannot rollback to a newer state.\nDatanode previous state: LV = " + prevInfo.getLayoutVersion() + " CTime = " + prevInfo.getCTime() + " is newer than the namespace state: LV = " + nsInfo.getLayoutVersion() + " CTime = " + nsInfo.getCTime()); LOG.info( "Rolling back storage directory " + sd.getRoot() + ".\n target LV = " + nsInfo.getLayoutVersion() + "; target CTime = " + nsInfo.getCTime()); File tmpDir = sd.getRemovedTmp(); assert !tmpDir.exists() : "removed.tmp directory must not exist."; // rename current to tmp File curDir = sd.getCurrentDir(); assert curDir.exists() : "Current directory must exist."; rename(curDir, tmpDir); // rename previous to current rename(prevDir, curDir); // delete tmp dir deleteDir(tmpDir); LOG.info("Rollback of " + sd.getRoot() + " is complete"); }
/** * Analyze and load storage directories. Recover from previous transitions if required. * * <p>The block pool storages are either all analyzed or none of them is loaded. Therefore, a * failure on loading any block pool storage results a faulty data volume. * * @param datanode Datanode to which this storage belongs to * @param nsInfo namespace information * @param dataDirs storage directories of block pool * @param startOpt startup option * @return an array of loaded block pool directories. * @throws IOException on error */ List<StorageDirectory> loadBpStorageDirectories( DataNode datanode, NamespaceInfo nsInfo, Collection<File> dataDirs, StartupOption startOpt) throws IOException { List<StorageDirectory> succeedDirs = Lists.newArrayList(); try { for (File dataDir : dataDirs) { if (containsStorageDir(dataDir)) { throw new IOException( "BlockPoolSliceStorage.recoverTransitionRead: " + "attempt to load an used block storage: " + dataDir); } StorageDirectory sd = loadStorageDirectory(datanode, nsInfo, dataDir, startOpt); succeedDirs.add(sd); } } catch (IOException e) { LOG.warn( "Failed to analyze storage directories for block pool " + nsInfo.getBlockPoolID(), e); throw e; } return succeedDirs; }
/** * Upgrade -- Move current storage into a backup directory, and hardlink all its blocks into the * new current directory. * * <p>Upgrade from pre-0.22 to 0.22 or later release e.g. 0.19/0.20/ => 0.22/0.23 * * <ul> * <li>If <SD>/previous exists then delete it * <li>Rename <SD>/current to <SD>/previous.tmp * <li>Create new <SD>/current/<bpid>/current directory * <li> * <ul> * <li>Hard links for block files are created from <SD>/previous.tmp to * <SD>/current/<bpid>/current * <li>Saves new version file in <SD>/current/<bpid>/current directory * </ul> * <li>Rename <SD>/previous.tmp to <SD>/previous * </ul> * * There should be only ONE namenode in the cluster for first time upgrade to 0.22 * * @param sd storage directory * @throws IOException on error */ void doUpgrade(StorageDirectory sd, NamespaceInfo nsInfo) throws IOException { if (LayoutVersion.supports(Feature.FEDERATION, layoutVersion)) { clusterID = nsInfo.getClusterID(); layoutVersion = nsInfo.getLayoutVersion(); writeProperties(sd); return; } LOG.info( "Upgrading storage directory " + sd.getRoot() + ".\n old LV = " + this.getLayoutVersion() + "; old CTime = " + this.getCTime() + ".\n new LV = " + nsInfo.getLayoutVersion() + "; new CTime = " + nsInfo.getCTime()); File curDir = sd.getCurrentDir(); File prevDir = sd.getPreviousDir(); File bbwDir = new File(sd.getRoot(), Storage.STORAGE_1_BBW); assert curDir.exists() : "Data node current directory must exist."; // Cleanup directory "detach" cleanupDetachDir(new File(curDir, STORAGE_DIR_DETACHED)); // 1. delete <SD>/previous dir before upgrading if (prevDir.exists()) deleteDir(prevDir); // get previous.tmp directory, <SD>/previous.tmp File tmpDir = sd.getPreviousTmp(); assert !tmpDir.exists() : "Data node previous.tmp directory must not exist."; // 2. Rename <SD>/current to <SD>/previous.tmp rename(curDir, tmpDir); // 3. Format BP and hard link blocks from previous directory File curBpDir = BlockPoolSliceStorage.getBpRoot(nsInfo.getBlockPoolID(), curDir); BlockPoolSliceStorage bpStorage = new BlockPoolSliceStorage( nsInfo.getNamespaceID(), nsInfo.getBlockPoolID(), nsInfo.getCTime(), nsInfo.getClusterID()); bpStorage.format(curDir, nsInfo); linkAllBlocks(tmpDir, bbwDir, new File(curBpDir, STORAGE_DIR_CURRENT)); // 4. Write version file under <SD>/current layoutVersion = HdfsConstants.LAYOUT_VERSION; clusterID = nsInfo.getClusterID(); writeProperties(sd); // 5. Rename <SD>/previous.tmp to <SD>/previous rename(tmpDir, prevDir); LOG.info("Upgrade of " + sd.getRoot() + " is complete"); addBlockPoolStorage(nsInfo.getBlockPoolID(), bpStorage); }
/** * Analyze storage directories. Recover from previous transitions if required. Perform fs state * transition if necessary depending on the namespace info. Read storage info. <br> * This method should be synchronized between multiple DN threads. Only the first DN thread does * DN level storage dir recoverTransitionRead. * * @param nsInfo namespace information * @param dataDirs array of data storage directories * @param startOpt startup option * @throws IOException */ synchronized void recoverTransitionRead( DataNode datanode, NamespaceInfo nsInfo, Collection<File> dataDirs, StartupOption startOpt) throws IOException { if (initilized) { // DN storage has been initialized, no need to do anything return; } assert HdfsConstants.LAYOUT_VERSION == nsInfo.getLayoutVersion() : "Data-node version " + HdfsConstants.LAYOUT_VERSION + " and name-node layout version " + nsInfo.getLayoutVersion() + " must be the same."; // 1. For each data directory calculate its state and // check whether all is consistent before transitioning. // Format and recover. this.storageDirs = new ArrayList<StorageDirectory>(dataDirs.size()); ArrayList<StorageState> dataDirStates = new ArrayList<StorageState>(dataDirs.size()); for (Iterator<File> it = dataDirs.iterator(); it.hasNext(); ) { File dataDir = it.next(); StorageDirectory sd = new StorageDirectory(dataDir); StorageState curState; try { curState = sd.analyzeStorage(startOpt, this); // sd is locked but not opened switch (curState) { case NORMAL: break; case NON_EXISTENT: // ignore this storage LOG.info("Storage directory " + dataDir + " does not exist"); it.remove(); continue; case NOT_FORMATTED: // format LOG.info("Storage directory " + dataDir + " is not formatted"); LOG.info("Formatting ..."); format(sd, nsInfo); break; default: // recovery part is common sd.doRecover(curState); } } catch (IOException ioe) { sd.unlock(); LOG.warn("Ignoring storage directory " + dataDir + " due to an exception", ioe); // continue with other good dirs continue; } // add to the storage list addStorageDir(sd); dataDirStates.add(curState); } if (dataDirs.size() == 0 || dataDirStates.size() == 0) // none of the data dirs exist throw new IOException("All specified directories are not accessible or do not exist."); // 2. Do transitions // Each storage directory is treated individually. // During startup some of them can upgrade or rollback // while others could be uptodate for the regular startup. for (int idx = 0; idx < getNumStorageDirs(); idx++) { doTransition(datanode, getStorageDir(idx), nsInfo, startOpt); assert this.getLayoutVersion() == nsInfo.getLayoutVersion() : "Data-node and name-node layout versions must be the same."; } // make sure we have storage id set - if not - generate new one createStorageID(datanode.getXferPort()); // 3. Update all storages. Some of them might have just been formatted. this.writeAll(); // 4. mark DN storage is initilized this.initilized = true; }
/** * Analyze whether a transition of the BP state is required and perform it if necessary. <br> * Rollback if previousLV >= LAYOUT_VERSION && prevCTime <= namenode.cTime. Upgrade if this.LV > * LAYOUT_VERSION || this.cTime < namenode.cTime Regular startup if this.LV = LAYOUT_VERSION && * this.cTime = namenode.cTime * * @param sd storage directory <SD>/current/<bpid> * @param nsInfo namespace info * @param startOpt startup option * @throws IOException */ private void doTransition( DataNode datanode, StorageDirectory sd, NamespaceInfo nsInfo, StartupOption startOpt) throws IOException { if (startOpt == StartupOption.ROLLBACK && sd.getPreviousDir().exists()) { Preconditions.checkState( !getTrashRootDir(sd).exists(), sd.getPreviousDir() + " and " + getTrashRootDir(sd) + " should not " + " both be present."); doRollback(sd, nsInfo); // rollback if applicable } else if (startOpt == StartupOption.ROLLBACK && !sd.getPreviousDir().exists()) { // Restore all the files in the trash. The restored files are retained // during rolling upgrade rollback. They are deleted during rolling // upgrade downgrade. int restored = restoreBlockFilesFromTrash(getTrashRootDir(sd)); LOG.info("Restored " + restored + " block files from trash."); } readProperties(sd); checkVersionUpgradable(this.layoutVersion); assert this.layoutVersion >= HdfsConstants.DATANODE_LAYOUT_VERSION : "Future version is not allowed"; if (getNamespaceID() != nsInfo.getNamespaceID()) { throw new IOException( "Incompatible namespaceIDs in " + sd.getRoot().getCanonicalPath() + ": namenode namespaceID = " + nsInfo.getNamespaceID() + "; datanode namespaceID = " + getNamespaceID()); } if (!blockpoolID.equals(nsInfo.getBlockPoolID())) { throw new IOException( "Incompatible blockpoolIDs in " + sd.getRoot().getCanonicalPath() + ": namenode blockpoolID = " + nsInfo.getBlockPoolID() + "; datanode blockpoolID = " + blockpoolID); } if (this.layoutVersion == HdfsConstants.DATANODE_LAYOUT_VERSION && this.cTime == nsInfo.getCTime()) { return; // regular startup } if (this.layoutVersion > HdfsConstants.DATANODE_LAYOUT_VERSION) { int restored = restoreBlockFilesFromTrash(getTrashRootDir(sd)); LOG.info( "Restored " + restored + " block files from trash " + "before the layout upgrade. These blocks will be moved to " + "the previous directory during the upgrade"); } if (this.layoutVersion > HdfsConstants.DATANODE_LAYOUT_VERSION || this.cTime < nsInfo.getCTime()) { doUpgrade(datanode, sd, nsInfo); // upgrade return; } // layoutVersion == LAYOUT_VERSION && this.cTime > nsInfo.cTime // must shutdown throw new IOException( "Datanode state: LV = " + this.getLayoutVersion() + " CTime = " + this.getCTime() + " is newer than the namespace state: LV = " + nsInfo.getLayoutVersion() + " CTime = " + nsInfo.getCTime()); }
/** * Format a block pool slice storage. * * @param dnCurDir DataStorage current directory * @param nsInfo the name space info * @throws IOException Signals that an I/O exception has occurred. */ void format(File dnCurDir, NamespaceInfo nsInfo) throws IOException { File curBpDir = getBpRoot(nsInfo.getBlockPoolID(), dnCurDir); StorageDirectory bpSdir = new StorageDirectory(curBpDir); format(bpSdir, nsInfo); }