/* * Read ClusterID, StorageID, StorageType, CTime from * DataStorage VERSION file and verify them. */ @Override protected void setFieldsFromProperties(Properties props, StorageDirectory sd) throws IOException { setLayoutVersion(props, sd); setcTime(props, sd); setStorageType(props, sd); setClusterId(props, layoutVersion, sd); // Read NamespaceID in version before federation if (!LayoutVersion.supports(Feature.FEDERATION, layoutVersion)) { setNamespaceID(props, sd); } // valid storage id, storage id may be empty String ssid = props.getProperty("storageID"); if (ssid == null) { throw new InconsistentFSStateException( sd.getRoot(), "file " + STORAGE_FILE_VERSION + " is invalid."); } String sid = getStorageID(); if (!(sid.equals("") || ssid.equals("") || sid.equals(ssid))) { throw new InconsistentFSStateException(sd.getRoot(), "has incompatible storage Id."); } if (sid.equals("")) { // update id only if it was empty setStorageID(ssid); } }
private void init() throws LogHeaderCorruptException, IOException { Preconditions.checkState(state == State.UNINIT); BufferedInputStream bin = null; try { fStream = log.getInputStream(); bin = new BufferedInputStream(fStream); tracker = new FSEditLogLoader.PositionTrackingInputStream(bin); dataIn = new DataInputStream(tracker); try { logVersion = readLogVersion(dataIn); } catch (EOFException eofe) { throw new LogHeaderCorruptException("No header found in log"); } if (LayoutVersion.supports(Feature.ADD_LAYOUT_FLAGS, logVersion)) { try { LayoutFlags.read(dataIn); } catch (EOFException eofe) { throw new LogHeaderCorruptException("EOF while reading layout " + "flags from log"); } } reader = new FSEditLogOp.Reader(dataIn, tracker, logVersion); reader.setMaxOpSize(maxOpSize); state = State.OPEN; } finally { if (reader == null) { IOUtils.cleanup(LOG, dataIn, tracker, bin, fStream); state = State.CLOSED; } } }
/** * Hardlink all finalized and RBW blocks in fromDir to toDir * * @param fromDir The directory where the 'from' snapshot is stored * @param fromBbwDir In HDFS 1.x, the directory where blocks that are under construction are * stored. * @param toDir The current data directory * @throws IOException If error occurs during hardlink */ private void linkAllBlocks(File fromDir, File fromBbwDir, File toDir) throws IOException { HardLink hardLink = new HardLink(); // do the link int diskLayoutVersion = this.getLayoutVersion(); if (LayoutVersion.supports(Feature.APPEND_RBW_DIR, diskLayoutVersion)) { // hardlink finalized blocks in tmpDir/finalized linkBlocks( new File(fromDir, STORAGE_DIR_FINALIZED), new File(toDir, STORAGE_DIR_FINALIZED), diskLayoutVersion, hardLink); // hardlink rbw blocks in tmpDir/rbw linkBlocks( new File(fromDir, STORAGE_DIR_RBW), new File(toDir, STORAGE_DIR_RBW), diskLayoutVersion, hardLink); } else { // pre-RBW version // hardlink finalized blocks in tmpDir linkBlocks(fromDir, new File(toDir, STORAGE_DIR_FINALIZED), diskLayoutVersion, hardLink); if (fromBbwDir.exists()) { /* * We need to put the 'blocksBeingWritten' from HDFS 1.x into the rbw * directory. It's a little messy, because the blocksBeingWriten was * NOT underneath the 'current' directory in those releases. See * HDFS-3731 for details. */ linkBlocks(fromBbwDir, new File(toDir, STORAGE_DIR_RBW), diskLayoutVersion, hardLink); } } LOG.info(hardLink.linkStats.report()); }
/** * Upgrade -- Move current storage into a backup directory, and hardlink all its blocks into the * new current directory. * * <p>Upgrade from pre-0.22 to 0.22 or later release e.g. 0.19/0.20/ => 0.22/0.23 * * <ul> * <li>If <SD>/previous exists then delete it * <li>Rename <SD>/current to <SD>/previous.tmp * <li>Create new <SD>/current/<bpid>/current directory * <li> * <ul> * <li>Hard links for block files are created from <SD>/previous.tmp to * <SD>/current/<bpid>/current * <li>Saves new version file in <SD>/current/<bpid>/current directory * </ul> * <li>Rename <SD>/previous.tmp to <SD>/previous * </ul> * * There should be only ONE namenode in the cluster for first time upgrade to 0.22 * * @param sd storage directory * @throws IOException on error */ void doUpgrade(StorageDirectory sd, NamespaceInfo nsInfo) throws IOException { if (LayoutVersion.supports(Feature.FEDERATION, layoutVersion)) { clusterID = nsInfo.getClusterID(); layoutVersion = nsInfo.getLayoutVersion(); writeProperties(sd); return; } LOG.info( "Upgrading storage directory " + sd.getRoot() + ".\n old LV = " + this.getLayoutVersion() + "; old CTime = " + this.getCTime() + ".\n new LV = " + nsInfo.getLayoutVersion() + "; new CTime = " + nsInfo.getCTime()); File curDir = sd.getCurrentDir(); File prevDir = sd.getPreviousDir(); File bbwDir = new File(sd.getRoot(), Storage.STORAGE_1_BBW); assert curDir.exists() : "Data node current directory must exist."; // Cleanup directory "detach" cleanupDetachDir(new File(curDir, STORAGE_DIR_DETACHED)); // 1. delete <SD>/previous dir before upgrading if (prevDir.exists()) deleteDir(prevDir); // get previous.tmp directory, <SD>/previous.tmp File tmpDir = sd.getPreviousTmp(); assert !tmpDir.exists() : "Data node previous.tmp directory must not exist."; // 2. Rename <SD>/current to <SD>/previous.tmp rename(curDir, tmpDir); // 3. Format BP and hard link blocks from previous directory File curBpDir = BlockPoolSliceStorage.getBpRoot(nsInfo.getBlockPoolID(), curDir); BlockPoolSliceStorage bpStorage = new BlockPoolSliceStorage( nsInfo.getNamespaceID(), nsInfo.getBlockPoolID(), nsInfo.getCTime(), nsInfo.getClusterID()); bpStorage.format(curDir, nsInfo); linkAllBlocks(tmpDir, bbwDir, new File(curBpDir, STORAGE_DIR_CURRENT)); // 4. Write version file under <SD>/current layoutVersion = HdfsConstants.LAYOUT_VERSION; clusterID = nsInfo.getClusterID(); writeProperties(sd); // 5. Rename <SD>/previous.tmp to <SD>/previous rename(tmpDir, prevDir); LOG.info("Upgrade of " + sd.getRoot() + " is complete"); addBlockPoolStorage(nsInfo.getBlockPoolID(), bpStorage); }
/** * Analize which and whether a transition of the fs state is required and perform it if necessary. * * <p>Rollback if previousLV >= LAYOUT_VERSION && prevCTime <= namenode.cTime Upgrade if this.LV > * LAYOUT_VERSION || this.cTime < namenode.cTime Regular startup if this.LV = LAYOUT_VERSION && * this.cTime = namenode.cTime * * @param datanode Datanode to which this storage belongs to * @param sd storage directory * @param nsInfo namespace info * @param startOpt startup option * @throws IOException */ private void doTransition( DataNode datanode, StorageDirectory sd, NamespaceInfo nsInfo, StartupOption startOpt) throws IOException { if (startOpt == StartupOption.ROLLBACK) { doRollback(sd, nsInfo); // rollback if applicable } readProperties(sd); checkVersionUpgradable(this.layoutVersion); assert this.layoutVersion >= HdfsConstants.LAYOUT_VERSION : "Future version is not allowed"; boolean federationSupported = LayoutVersion.supports(Feature.FEDERATION, layoutVersion); // For pre-federation version - validate the namespaceID if (!federationSupported && getNamespaceID() != nsInfo.getNamespaceID()) { throw new IOException( "Incompatible namespaceIDs in " + sd.getRoot().getCanonicalPath() + ": namenode namespaceID = " + nsInfo.getNamespaceID() + "; datanode namespaceID = " + getNamespaceID()); } // For version that supports federation, validate clusterID if (federationSupported && !getClusterID().equals(nsInfo.getClusterID())) { throw new IOException( "Incompatible clusterIDs in " + sd.getRoot().getCanonicalPath() + ": namenode clusterID = " + nsInfo.getClusterID() + "; datanode clusterID = " + getClusterID()); } // regular start up if (this.layoutVersion == HdfsConstants.LAYOUT_VERSION && this.cTime == nsInfo.getCTime()) return; // regular startup // verify necessity of a distributed upgrade UpgradeManagerDatanode um = datanode.getUpgradeManagerDatanode(nsInfo.getBlockPoolID()); verifyDistributedUpgradeProgress(um, nsInfo); // do upgrade if (this.layoutVersion > HdfsConstants.LAYOUT_VERSION || this.cTime < nsInfo.getCTime()) { doUpgrade(sd, nsInfo); // upgrade return; } // layoutVersion == LAYOUT_VERSION && this.cTime > nsInfo.cTime // must shutdown throw new IOException( "Datanode state: LV = " + this.getLayoutVersion() + " CTime = " + this.getCTime() + " is newer than the namespace state: LV = " + nsInfo.getLayoutVersion() + " CTime = " + nsInfo.getCTime()); }
/* * Set ClusterID, StorageID, StorageType, CTime into * DataStorage VERSION file */ @Override protected void setPropertiesFromFields(Properties props, StorageDirectory sd) throws IOException { props.setProperty("storageType", storageType.toString()); props.setProperty("clusterID", clusterID); props.setProperty("cTime", String.valueOf(cTime)); props.setProperty("layoutVersion", String.valueOf(layoutVersion)); props.setProperty("storageID", getStorageID()); // Set NamespaceID in version before federation if (!LayoutVersion.supports(Feature.FEDERATION, layoutVersion)) { props.setProperty("namespaceID", String.valueOf(namespaceID)); } }
/** * Cleanup the detachDir. * * <p>If the directory is not empty report an error; Otherwise remove the directory. * * @param detachDir detach directory * @throws IOException if the directory is not empty or it can not be removed */ private void cleanupDetachDir(File detachDir) throws IOException { if (!LayoutVersion.supports(Feature.APPEND_RBW_DIR, layoutVersion) && detachDir.exists() && detachDir.isDirectory()) { if (FileUtil.list(detachDir).length != 0) { throw new IOException( "Detached directory " + detachDir + " is not empty. Please manually move each file under this " + "directory to the finalized directory if the finalized " + "directory tree does not have the file."); } else if (!detachDir.delete()) { throw new IOException("Cannot remove directory " + detachDir); } } }
/** modified by tony */ @SuppressWarnings("deprecation") int loadEditRecords(int logVersion, DataInputStream in, boolean closeOnExit) throws IOException { FSNamesystem.LOG.info("logversion: " + logVersion); FSDirectory fsDir = fsNamesys.dir; int numEdits = 0; String clientName = null; String clientMachine = null; String path = null; int numOpAdd = 0, numOpClose = 0, numOpDelete = 0, numOpRenameOld = 0, numOpSetRepl = 0, numOpMkDir = 0, numOpSetPerm = 0, numOpSetOwner = 0, numOpSetGenStamp = 0, numOpTimes = 0, numOpRename = 0, numOpConcatDelete = 0, numOpSymlink = 0, numOpGetDelegationToken = 0, numOpRenewDelegationToken = 0, numOpCancelDelegationToken = 0, numOpUpdateMasterKey = 0, numOpOther = 0; try { while (true) { long timestamp = 0; long mtime = 0; long atime = 0; long blockSize = 0; byte opcode = -1; try { in.mark(1); opcode = in.readByte(); if (opcode == Ops.OP_INVALID) { in.reset(); // reset back to end of file if somebody reads it again break; // no more transactions } } catch (EOFException e) { break; // no more transactions } numEdits++; switch (opcode) { case Ops.OP_ADD: case Ops.OP_CLOSE: { // versions > 0 support per file replication // get name and replication int length = in.readInt(); // modified by tony if (-7 == logVersion && length != 3 || -17 < logVersion && logVersion < -7 && length != 4 || logVersion <= -17 && length != 7) { throw new IOException( "Incorrect data format." + " logVersion is " + logVersion + " but writables.length is " + length + ". "); } path = FSImageSerialization.readString(in); short replication = fsNamesys.adjustReplication(readShort(in)); mtime = readLong(in); if (LayoutVersion.supports(Feature.FILE_ACCESS_TIME, logVersion)) { atime = readLong(in); } if (logVersion < -7) { blockSize = readLong(in); } long fileSize = readLong(in); byte type = (byte) readLong(in); // get blocks boolean isFileUnderConstruction = (opcode == Ops.OP_ADD); BlockInfo blocks[] = readBlocks(in, logVersion, isFileUnderConstruction, replication); // Older versions of HDFS does not store the block size in inode. // If the file has more than one block, use the size of the // first block as the blocksize. Otherwise use the default // block size. if (-8 <= logVersion && blockSize == 0) { if (blocks.length > 1) { blockSize = blocks[0].getNumBytes(); } else { long first = ((blocks.length == 1) ? blocks[0].getNumBytes() : 0); blockSize = Math.max(fsNamesys.getDefaultBlockSize(), first); } } PermissionStatus permissions = fsNamesys.getUpgradePermission(); if (logVersion <= -11) { permissions = PermissionStatus.read(in); } CodingMatrix codingMatrix = CodingMatrix.getMatrixofCertainType(type); codingMatrix.readFields(in); /** added by tony* */ LongWritable offset = new LongWritable(); offset.readFields(in); long headeroffset = offset.get(); // clientname, clientMachine and block locations of last block. if (opcode == Ops.OP_ADD && logVersion <= -12) { clientName = FSImageSerialization.readString(in); clientMachine = FSImageSerialization.readString(in); if (-13 <= logVersion) { readDatanodeDescriptorArray(in); } } else { clientName = ""; clientMachine = ""; } // The open lease transaction re-creates a file if necessary. // Delete the file if it already exists. if (FSNamesystem.LOG.isDebugEnabled()) { FSNamesystem.LOG.debug( opcode + ": " + path + " numblocks : " + blocks.length + " clientHolder " + clientName + " clientMachine " + clientMachine); } fsDir.unprotectedDelete(path, mtime); /** modified by tony add to the file tree */ INodeFile node = (INodeFile) fsDir.unprotectedAddFile( path, permissions, codingMatrix, headeroffset, fileSize, blocks, replication, mtime, atime, blockSize); if (isFileUnderConstruction) { numOpAdd++; // // Replace current node with a INodeUnderConstruction. // Recreate in-memory lease record. // // INodeFileUnderConstruction cons = new INodeFileUnderConstruction( // node.getLocalNameBytes(), // node.getReplication(), // node.getModificationTime(), // node.getPreferredBlockSize(), // node.getBlocks(), // node.getPermissionStatus(), // clientName, // clientMachine, // null); // TODO: INodeFileUnderConstruction cons = null; fsDir.replaceNode(path, node, cons); fsNamesys.leaseManager.addLease(cons.getClientName(), path); } break; } case Ops.OP_SET_REPLICATION: { numOpSetRepl++; path = FSImageSerialization.readString(in); short replication = fsNamesys.adjustReplication(readShort(in)); fsDir.unprotectedSetReplication(path, replication, null); break; } case Ops.OP_CONCAT_DELETE: { numOpConcatDelete++; int length = in.readInt(); if (length < 3) { // trg, srcs.., timestam throw new IOException("Incorrect data format. " + "Mkdir operation."); } String trg = FSImageSerialization.readString(in); int srcSize = length - 1 - 1; // trg and timestamp String[] srcs = new String[srcSize]; for (int i = 0; i < srcSize; i++) { srcs[i] = FSImageSerialization.readString(in); } timestamp = readLong(in); fsDir.unprotectedConcat(trg, srcs); break; } case Ops.OP_RENAME_OLD: { numOpRenameOld++; int length = in.readInt(); if (length != 3) { throw new IOException("Incorrect data format. " + "Mkdir operation."); } String s = FSImageSerialization.readString(in); String d = FSImageSerialization.readString(in); timestamp = readLong(in); HdfsFileStatus dinfo = fsDir.getFileInfo(d, false); fsDir.unprotectedRenameTo(s, d, timestamp); fsNamesys.changeLease(s, d, dinfo); break; } case Ops.OP_DELETE: { numOpDelete++; int length = in.readInt(); if (length != 2) { throw new IOException("Incorrect data format. " + "delete operation."); } path = FSImageSerialization.readString(in); timestamp = readLong(in); fsDir.unprotectedDelete(path, timestamp); break; } case Ops.OP_MKDIR: { numOpMkDir++; PermissionStatus permissions = fsNamesys.getUpgradePermission(); int length = in.readInt(); if (-17 < logVersion && length != 2 || logVersion <= -17 && length != 3) { throw new IOException("Incorrect data format. " + "Mkdir operation."); } path = FSImageSerialization.readString(in); timestamp = readLong(in); // The disk format stores atimes for directories as well. // However, currently this is not being updated/used because of // performance reasons. if (LayoutVersion.supports(Feature.FILE_ACCESS_TIME, logVersion)) { atime = readLong(in); } if (logVersion <= -11) { permissions = PermissionStatus.read(in); } fsDir.unprotectedMkdir(path, permissions, timestamp); break; } case Ops.OP_SET_GENSTAMP: { numOpSetGenStamp++; long lw = in.readLong(); fsNamesys.setGenerationStamp(lw); break; } case Ops.OP_DATANODE_ADD: { numOpOther++; // Datanodes are not persistent any more. FSImageSerialization.DatanodeImage.skipOne(in); break; } case Ops.OP_DATANODE_REMOVE: { numOpOther++; DatanodeID nodeID = new DatanodeID(); nodeID.readFields(in); // Datanodes are not persistent any more. break; } case Ops.OP_SET_PERMISSIONS: { numOpSetPerm++; fsDir.unprotectedSetPermission( FSImageSerialization.readString(in), FsPermission.read(in)); break; } case Ops.OP_SET_OWNER: { numOpSetOwner++; fsDir.unprotectedSetOwner( FSImageSerialization.readString(in), FSImageSerialization.readString_EmptyAsNull(in), FSImageSerialization.readString_EmptyAsNull(in)); break; } case Ops.OP_SET_NS_QUOTA: { fsDir.unprotectedSetQuota( FSImageSerialization.readString(in), readLongWritable(in), FSConstants.QUOTA_DONT_SET); break; } case Ops.OP_CLEAR_NS_QUOTA: { fsDir.unprotectedSetQuota( FSImageSerialization.readString(in), FSConstants.QUOTA_RESET, FSConstants.QUOTA_DONT_SET); break; } case Ops.OP_SET_QUOTA: fsDir.unprotectedSetQuota( FSImageSerialization.readString(in), readLongWritable(in), readLongWritable(in)); break; case Ops.OP_TIMES: { numOpTimes++; int length = in.readInt(); if (length != 3) { throw new IOException("Incorrect data format. " + "times operation."); } path = FSImageSerialization.readString(in); mtime = readLong(in); atime = readLong(in); fsDir.unprotectedSetTimes(path, mtime, atime, true); break; } case Ops.OP_SYMLINK: { numOpSymlink++; int length = in.readInt(); if (length != 4) { throw new IOException("Incorrect data format. " + "symlink operation."); } path = FSImageSerialization.readString(in); String value = FSImageSerialization.readString(in); mtime = readLong(in); atime = readLong(in); PermissionStatus perm = PermissionStatus.read(in); fsDir.unprotectedSymlink(path, value, mtime, atime, perm); break; } case Ops.OP_RENAME: { numOpRename++; int length = in.readInt(); if (length != 3) { throw new IOException("Incorrect data format. " + "Mkdir operation."); } String s = FSImageSerialization.readString(in); String d = FSImageSerialization.readString(in); timestamp = readLong(in); Rename[] options = readRenameOptions(in); HdfsFileStatus dinfo = fsDir.getFileInfo(d, false); fsDir.unprotectedRenameTo(s, d, timestamp, options); fsNamesys.changeLease(s, d, dinfo); break; } case Ops.OP_GET_DELEGATION_TOKEN: { numOpGetDelegationToken++; DelegationTokenIdentifier delegationTokenId = new DelegationTokenIdentifier(); delegationTokenId.readFields(in); long expiryTime = readLong(in); fsNamesys .getDelegationTokenSecretManager() .addPersistedDelegationToken(delegationTokenId, expiryTime); break; } case Ops.OP_RENEW_DELEGATION_TOKEN: { numOpRenewDelegationToken++; DelegationTokenIdentifier delegationTokenId = new DelegationTokenIdentifier(); delegationTokenId.readFields(in); long expiryTime = readLong(in); fsNamesys .getDelegationTokenSecretManager() .updatePersistedTokenRenewal(delegationTokenId, expiryTime); break; } case Ops.OP_CANCEL_DELEGATION_TOKEN: { numOpCancelDelegationToken++; DelegationTokenIdentifier delegationTokenId = new DelegationTokenIdentifier(); delegationTokenId.readFields(in); fsNamesys .getDelegationTokenSecretManager() .updatePersistedTokenCancellation(delegationTokenId); break; } case Ops.OP_UPDATE_MASTER_KEY: { numOpUpdateMasterKey++; DelegationKey delegationKey = new DelegationKey(); delegationKey.readFields(in); fsNamesys.getDelegationTokenSecretManager().updatePersistedMasterKey(delegationKey); break; } default: { throw new IOException("Never seen opcode " + opcode); } } } } catch (IOException ex) { check203UpgradeFailure(logVersion, ex); } finally { if (closeOnExit) in.close(); } if (FSImage.LOG.isDebugEnabled()) { FSImage.LOG.debug( "numOpAdd = " + numOpAdd + " numOpClose = " + numOpClose + " numOpDelete = " + numOpDelete + " numOpRenameOld = " + numOpRenameOld + " numOpSetRepl = " + numOpSetRepl + " numOpMkDir = " + numOpMkDir + " numOpSetPerm = " + numOpSetPerm + " numOpSetOwner = " + numOpSetOwner + " numOpSetGenStamp = " + numOpSetGenStamp + " numOpTimes = " + numOpTimes + " numOpConcatDelete = " + numOpConcatDelete + " numOpRename = " + numOpRename + " numOpGetDelegationToken = " + numOpGetDelegationToken + " numOpRenewDelegationToken = " + numOpRenewDelegationToken + " numOpCancelDelegationToken = " + numOpCancelDelegationToken + " numOpUpdateMasterKey = " + numOpUpdateMasterKey + " numOpOther = " + numOpOther); } return numEdits; }
public boolean versionSupportsFederation() { return LayoutVersion.supports(Feature.FEDERATION, layoutVersion); }