@Private @Unstable /** * A simple class for storing RM state in any storage that implements a basic FileSystem interface. * Does not use directories so that simple key-value stores can be used. The retry policy for the * real filesystem client must be configured separately to enable retry of filesystem operations * when needed. * * <p>Changes from 1.1 to 1.2, AMRMTokenSecretManager state has been saved separately. The * currentMasterkey and nextMasterkey have been stored. Also, AMRMToken has been removed from * ApplicationAttemptState. * * <p>Changes from 1.2 to 1.3, Addition of ReservationSystem state. */ public class FileSystemRMStateStore extends RMStateStore { public static final Log LOG = LogFactory.getLog(FileSystemRMStateStore.class); protected static final String ROOT_DIR_NAME = "FSRMStateRoot"; protected static final Version CURRENT_VERSION_INFO = Version.newInstance(1, 3); protected static final String AMRMTOKEN_SECRET_MANAGER_NODE = "AMRMTokenSecretManagerNode"; private static final String UNREADABLE_BY_SUPERUSER_XATTRIB = "security.hdfs.unreadable.by.superuser"; protected FileSystem fs; @VisibleForTesting protected Configuration fsConf; private Path rootDirPath; @Private @VisibleForTesting Path rmDTSecretManagerRoot; private Path rmAppRoot; private Path dtSequenceNumberPath = null; private int fsNumRetries; private long fsRetryInterval; private boolean intermediateEncryptionEnabled = YarnConfiguration.DEFAULT_YARN_INTERMEDIATE_DATA_ENCRYPTION; @VisibleForTesting Path fsWorkingPath; Path amrmTokenSecretManagerRoot; private Path reservationRoot; @Override public synchronized void initInternal(Configuration conf) throws Exception { fsWorkingPath = new Path(conf.get(YarnConfiguration.FS_RM_STATE_STORE_URI)); rootDirPath = new Path(fsWorkingPath, ROOT_DIR_NAME); rmDTSecretManagerRoot = new Path(rootDirPath, RM_DT_SECRET_MANAGER_ROOT); rmAppRoot = new Path(rootDirPath, RM_APP_ROOT); amrmTokenSecretManagerRoot = new Path(rootDirPath, AMRMTOKEN_SECRET_MANAGER_ROOT); reservationRoot = new Path(rootDirPath, RESERVATION_SYSTEM_ROOT); fsNumRetries = conf.getInt( YarnConfiguration.FS_RM_STATE_STORE_NUM_RETRIES, YarnConfiguration.DEFAULT_FS_RM_STATE_STORE_NUM_RETRIES); fsRetryInterval = conf.getLong( YarnConfiguration.FS_RM_STATE_STORE_RETRY_INTERVAL_MS, YarnConfiguration.DEFAULT_FS_RM_STATE_STORE_RETRY_INTERVAL_MS); intermediateEncryptionEnabled = conf.getBoolean( YarnConfiguration.YARN_INTERMEDIATE_DATA_ENCRYPTION, YarnConfiguration.DEFAULT_YARN_INTERMEDIATE_DATA_ENCRYPTION); } @Override protected synchronized void startInternal() throws Exception { // create filesystem only now, as part of service-start. By this time, RM is // authenticated with kerberos so we are good to create a file-system // handle. fsConf = new Configuration(getConfig()); fsConf.setBoolean("dfs.client.retry.policy.enabled", true); String retryPolicy = fsConf.get( YarnConfiguration.FS_RM_STATE_STORE_RETRY_POLICY_SPEC, YarnConfiguration.DEFAULT_FS_RM_STATE_STORE_RETRY_POLICY_SPEC); fsConf.set("dfs.client.retry.policy.spec", retryPolicy); String scheme = fsWorkingPath.toUri().getScheme(); if (scheme == null) { scheme = FileSystem.getDefaultUri(fsConf).getScheme(); } if (scheme != null) { String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme); fsConf.setBoolean(disableCacheName, true); } fs = fsWorkingPath.getFileSystem(fsConf); mkdirsWithRetries(rmDTSecretManagerRoot); mkdirsWithRetries(rmAppRoot); mkdirsWithRetries(amrmTokenSecretManagerRoot); mkdirsWithRetries(reservationRoot); } @Override protected synchronized void closeInternal() throws Exception { closeWithRetries(); } @Override protected Version getCurrentVersion() { return CURRENT_VERSION_INFO; } @Override protected synchronized Version loadVersion() throws Exception { Path versionNodePath = getNodePath(rootDirPath, VERSION_NODE); FileStatus status = getFileStatusWithRetries(versionNodePath); if (status != null) { byte[] data = readFileWithRetries(versionNodePath, status.getLen()); Version version = new VersionPBImpl(VersionProto.parseFrom(data)); return version; } return null; } @Override protected synchronized void storeVersion() throws Exception { Path versionNodePath = getNodePath(rootDirPath, VERSION_NODE); byte[] data = ((VersionPBImpl) CURRENT_VERSION_INFO).getProto().toByteArray(); if (existsWithRetries(versionNodePath)) { updateFile(versionNodePath, data, false); } else { writeFileWithRetries(versionNodePath, data, false); } } @Override public synchronized long getAndIncrementEpoch() throws Exception { Path epochNodePath = getNodePath(rootDirPath, EPOCH_NODE); long currentEpoch = 0; FileStatus status = getFileStatusWithRetries(epochNodePath); if (status != null) { // load current epoch byte[] data = readFileWithRetries(epochNodePath, status.getLen()); Epoch epoch = new EpochPBImpl(EpochProto.parseFrom(data)); currentEpoch = epoch.getEpoch(); // increment epoch and store it byte[] storeData = Epoch.newInstance(currentEpoch + 1).getProto().toByteArray(); updateFile(epochNodePath, storeData, false); } else { // initialize epoch file with 1 for the next time. byte[] storeData = Epoch.newInstance(currentEpoch + 1).getProto().toByteArray(); writeFileWithRetries(epochNodePath, storeData, false); } return currentEpoch; } @Override public synchronized RMState loadState() throws Exception { RMState rmState = new RMState(); // recover DelegationTokenSecretManager loadRMDTSecretManagerState(rmState); // recover RM applications loadRMAppState(rmState); // recover AMRMTokenSecretManager loadAMRMTokenSecretManagerState(rmState); // recover reservation state loadReservationSystemState(rmState); return rmState; } private void loadReservationSystemState(RMState rmState) throws Exception { try { final ReservationStateFileProcessor fileProcessor = new ReservationStateFileProcessor(rmState); final Path rootDirectory = this.reservationRoot; processDirectoriesOfFiles(fileProcessor, rootDirectory); } catch (Exception e) { LOG.error("Failed to load state.", e); throw e; } } private void loadAMRMTokenSecretManagerState(RMState rmState) throws Exception { checkAndResumeUpdateOperation(amrmTokenSecretManagerRoot); Path amrmTokenSecretManagerStateDataDir = new Path(amrmTokenSecretManagerRoot, AMRMTOKEN_SECRET_MANAGER_NODE); FileStatus status = getFileStatusWithRetries(amrmTokenSecretManagerStateDataDir); if (status == null) { return; } assert status.isFile(); byte[] data = readFileWithRetries(amrmTokenSecretManagerStateDataDir, status.getLen()); AMRMTokenSecretManagerStatePBImpl stateData = new AMRMTokenSecretManagerStatePBImpl(AMRMTokenSecretManagerStateProto.parseFrom(data)); rmState.amrmTokenSecretManagerState = AMRMTokenSecretManagerState.newInstance( stateData.getCurrentMasterKey(), stateData.getNextMasterKey()); } private void loadRMAppState(RMState rmState) throws Exception { try { List<ApplicationAttemptStateData> attempts = new ArrayList<>(); final RMAppStateFileProcessor rmAppStateFileProcessor = new RMAppStateFileProcessor(rmState, attempts); final Path rootDirectory = this.rmAppRoot; processDirectoriesOfFiles(rmAppStateFileProcessor, rootDirectory); // go through all attempts and add them to their apps, Ideally, each // attempt node must have a corresponding app node, because remove // directory operation remove both at the same time for (ApplicationAttemptStateData attemptState : attempts) { ApplicationId appId = attemptState.getAttemptId().getApplicationId(); ApplicationStateData appState = rmState.appState.get(appId); assert appState != null; appState.attempts.put(attemptState.getAttemptId(), attemptState); } LOG.info("Done loading applications from FS state store"); } catch (Exception e) { LOG.error("Failed to load state.", e); throw e; } } private void processDirectoriesOfFiles( RMStateFileProcessor rmAppStateFileProcessor, Path rootDirectory) throws Exception { for (FileStatus dir : listStatusWithRetries(rootDirectory)) { checkAndResumeUpdateOperation(dir.getPath()); String dirName = dir.getPath().getName(); for (FileStatus fileNodeStatus : listStatusWithRetries(dir.getPath())) { assert fileNodeStatus.isFile(); String fileName = fileNodeStatus.getPath().getName(); if (checkAndRemovePartialRecordWithRetries(fileNodeStatus.getPath())) { continue; } byte[] fileData = readFileWithRetries(fileNodeStatus.getPath(), fileNodeStatus.getLen()); // Set attribute if not already set setUnreadableBySuperuserXattrib(fileNodeStatus.getPath()); rmAppStateFileProcessor.processChildNode(dirName, fileName, fileData); } } } private boolean checkAndRemovePartialRecord(Path record) throws IOException { // If the file ends with .tmp then it shows that it failed // during saving state into state store. The file will be deleted as a // part of this call if (record.getName().endsWith(".tmp")) { LOG.error("incomplete rm state store entry found :" + record); fs.delete(record, false); return true; } return false; } private void checkAndResumeUpdateOperation(Path path) throws Exception { // Before loading the state information, check whether .new file exists. // If it does, the prior updateFile is failed on half way. We need to // complete replacing the old file first. FileStatus[] newChildNodes = listStatusWithRetries( path, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().endsWith(".new"); } }); for (FileStatus newChildNodeStatus : newChildNodes) { assert newChildNodeStatus.isFile(); String newChildNodeName = newChildNodeStatus.getPath().getName(); String childNodeName = newChildNodeName.substring(0, newChildNodeName.length() - ".new".length()); Path childNodePath = new Path(newChildNodeStatus.getPath().getParent(), childNodeName); replaceFile(newChildNodeStatus.getPath(), childNodePath); } } private void loadRMDTSecretManagerState(RMState rmState) throws Exception { checkAndResumeUpdateOperation(rmDTSecretManagerRoot); FileStatus[] childNodes = listStatusWithRetries(rmDTSecretManagerRoot); for (FileStatus childNodeStatus : childNodes) { assert childNodeStatus.isFile(); String childNodeName = childNodeStatus.getPath().getName(); if (checkAndRemovePartialRecordWithRetries(childNodeStatus.getPath())) { continue; } if (childNodeName.startsWith(DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX)) { rmState.rmSecretManagerState.dtSequenceNumber = Integer.parseInt(childNodeName.split("_")[1]); continue; } Path childNodePath = getNodePath(rmDTSecretManagerRoot, childNodeName); byte[] childData = readFileWithRetries(childNodePath, childNodeStatus.getLen()); ByteArrayInputStream is = new ByteArrayInputStream(childData); try (DataInputStream fsIn = new DataInputStream(is)) { if (childNodeName.startsWith(DELEGATION_KEY_PREFIX)) { DelegationKey key = new DelegationKey(); key.readFields(fsIn); rmState.rmSecretManagerState.masterKeyState.add(key); if (LOG.isDebugEnabled()) { LOG.debug( "Loaded delegation key: keyId=" + key.getKeyId() + ", expirationDate=" + key.getExpiryDate()); } } else if (childNodeName.startsWith(DELEGATION_TOKEN_PREFIX)) { RMDelegationTokenIdentifierData identifierData = new RMDelegationTokenIdentifierData(); identifierData.readFields(fsIn); RMDelegationTokenIdentifier identifier = identifierData.getTokenIdentifier(); long renewDate = identifierData.getRenewDate(); rmState.rmSecretManagerState.delegationTokenState.put(identifier, renewDate); if (LOG.isDebugEnabled()) { LOG.debug( "Loaded RMDelegationTokenIdentifier: " + identifier + " renewDate=" + renewDate); } } else { LOG.warn("Unknown file for recovering RMDelegationTokenSecretManager"); } } } } @Override public synchronized void storeApplicationStateInternal( ApplicationId appId, ApplicationStateData appStateDataPB) throws Exception { Path appDirPath = getAppDir(rmAppRoot, appId); mkdirsWithRetries(appDirPath); Path nodeCreatePath = getNodePath(appDirPath, appId.toString()); LOG.info("Storing info for app: " + appId + " at: " + nodeCreatePath); byte[] appStateData = appStateDataPB.getProto().toByteArray(); try { // currently throw all exceptions. May need to respond differently for HA // based on whether we have lost the right to write to FS writeFileWithRetries(nodeCreatePath, appStateData, true); } catch (Exception e) { LOG.info("Error storing info for app: " + appId, e); throw e; } } @Override public synchronized void updateApplicationStateInternal( ApplicationId appId, ApplicationStateData appStateDataPB) throws Exception { Path appDirPath = getAppDir(rmAppRoot, appId); Path nodeCreatePath = getNodePath(appDirPath, appId.toString()); LOG.info("Updating info for app: " + appId + " at: " + nodeCreatePath); byte[] appStateData = appStateDataPB.getProto().toByteArray(); try { // currently throw all exceptions. May need to respond differently for HA // based on whether we have lost the right to write to FS updateFile(nodeCreatePath, appStateData, true); } catch (Exception e) { LOG.info("Error updating info for app: " + appId, e); throw e; } } @Override public synchronized void storeApplicationAttemptStateInternal( ApplicationAttemptId appAttemptId, ApplicationAttemptStateData attemptStateDataPB) throws Exception { Path appDirPath = getAppDir(rmAppRoot, appAttemptId.getApplicationId()); Path nodeCreatePath = getNodePath(appDirPath, appAttemptId.toString()); LOG.info("Storing info for attempt: " + appAttemptId + " at: " + nodeCreatePath); byte[] attemptStateData = attemptStateDataPB.getProto().toByteArray(); try { // currently throw all exceptions. May need to respond differently for HA // based on whether we have lost the right to write to FS writeFileWithRetries(nodeCreatePath, attemptStateData, true); } catch (Exception e) { LOG.info("Error storing info for attempt: " + appAttemptId, e); throw e; } } @Override public synchronized void updateApplicationAttemptStateInternal( ApplicationAttemptId appAttemptId, ApplicationAttemptStateData attemptStateDataPB) throws Exception { Path appDirPath = getAppDir(rmAppRoot, appAttemptId.getApplicationId()); Path nodeCreatePath = getNodePath(appDirPath, appAttemptId.toString()); LOG.info("Updating info for attempt: " + appAttemptId + " at: " + nodeCreatePath); byte[] attemptStateData = attemptStateDataPB.getProto().toByteArray(); try { // currently throw all exceptions. May need to respond differently for HA // based on whether we have lost the right to write to FS updateFile(nodeCreatePath, attemptStateData, true); } catch (Exception e) { LOG.info("Error updating info for attempt: " + appAttemptId, e); throw e; } } @Override public synchronized void removeApplicationStateInternal(ApplicationStateData appState) throws Exception { ApplicationId appId = appState.getApplicationSubmissionContext().getApplicationId(); Path nodeRemovePath = getAppDir(rmAppRoot, appId); LOG.info("Removing info for app: " + appId + " at: " + nodeRemovePath); deleteFileWithRetries(nodeRemovePath); } @Override public synchronized void storeRMDelegationTokenState( RMDelegationTokenIdentifier identifier, Long renewDate) throws Exception { storeOrUpdateRMDelegationTokenState(identifier, renewDate, false); } @Override public synchronized void removeRMDelegationTokenState(RMDelegationTokenIdentifier identifier) throws Exception { Path nodeCreatePath = getNodePath( rmDTSecretManagerRoot, DELEGATION_TOKEN_PREFIX + identifier.getSequenceNumber()); LOG.info("Removing RMDelegationToken_" + identifier.getSequenceNumber()); deleteFileWithRetries(nodeCreatePath); } @Override protected synchronized void updateRMDelegationTokenState( RMDelegationTokenIdentifier rmDTIdentifier, Long renewDate) throws Exception { storeOrUpdateRMDelegationTokenState(rmDTIdentifier, renewDate, true); } private void storeOrUpdateRMDelegationTokenState( RMDelegationTokenIdentifier identifier, Long renewDate, boolean isUpdate) throws Exception { Path nodeCreatePath = getNodePath( rmDTSecretManagerRoot, DELEGATION_TOKEN_PREFIX + identifier.getSequenceNumber()); RMDelegationTokenIdentifierData identifierData = new RMDelegationTokenIdentifierData(identifier, renewDate); if (isUpdate) { LOG.info("Updating RMDelegationToken_" + identifier.getSequenceNumber()); updateFile(nodeCreatePath, identifierData.toByteArray(), true); } else { LOG.info("Storing RMDelegationToken_" + identifier.getSequenceNumber()); writeFileWithRetries(nodeCreatePath, identifierData.toByteArray(), true); // store sequence number Path latestSequenceNumberPath = getNodePath( rmDTSecretManagerRoot, DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX + identifier.getSequenceNumber()); LOG.info( "Storing " + DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX + identifier.getSequenceNumber()); if (dtSequenceNumberPath == null) { if (!createFileWithRetries(latestSequenceNumberPath)) { throw new Exception("Failed to create " + latestSequenceNumberPath); } } else { if (!renameFileWithRetries(dtSequenceNumberPath, latestSequenceNumberPath)) { throw new Exception("Failed to rename " + dtSequenceNumberPath); } } dtSequenceNumberPath = latestSequenceNumberPath; } } @Override public synchronized void storeRMDTMasterKeyState(DelegationKey masterKey) throws Exception { Path nodeCreatePath = getNodePath(rmDTSecretManagerRoot, DELEGATION_KEY_PREFIX + masterKey.getKeyId()); ByteArrayOutputStream os = new ByteArrayOutputStream(); try (DataOutputStream fsOut = new DataOutputStream(os)) { LOG.info("Storing RMDelegationKey_" + masterKey.getKeyId()); masterKey.write(fsOut); writeFileWithRetries(nodeCreatePath, os.toByteArray(), true); } } @Override public synchronized void removeRMDTMasterKeyState(DelegationKey masterKey) throws Exception { Path nodeCreatePath = getNodePath(rmDTSecretManagerRoot, DELEGATION_KEY_PREFIX + masterKey.getKeyId()); LOG.info("Removing RMDelegationKey_" + masterKey.getKeyId()); deleteFileWithRetries(nodeCreatePath); } @Override public synchronized void deleteStore() throws Exception { if (existsWithRetries(rootDirPath)) { deleteFileWithRetries(rootDirPath); } } @Override public synchronized void removeApplication(ApplicationId removeAppId) throws Exception { Path nodeRemovePath = getAppDir(rmAppRoot, removeAppId); if (existsWithRetries(nodeRemovePath)) { deleteFileWithRetries(nodeRemovePath); } } private Path getAppDir(Path root, ApplicationId appId) { return getNodePath(root, appId.toString()); } @VisibleForTesting protected Path getAppDir(ApplicationId appId) { return getAppDir(rmAppRoot, appId); } @VisibleForTesting protected Path getAppAttemptDir(ApplicationAttemptId appAttId) { return getNodePath(getAppDir(appAttId.getApplicationId()), appAttId.toString()); } // FileSystem related code private boolean checkAndRemovePartialRecordWithRetries(final Path record) throws Exception { return new FSAction<Boolean>() { @Override public Boolean run() throws Exception { return checkAndRemovePartialRecord(record); } }.runWithRetries(); } private void mkdirsWithRetries(final Path appDirPath) throws Exception { new FSAction<Void>() { @Override public Void run() throws Exception { fs.mkdirs(appDirPath); return null; } }.runWithRetries(); } private void writeFileWithRetries( final Path outputPath, final byte[] data, final boolean makeUnreadableByAdmin) throws Exception { new FSAction<Void>() { @Override public Void run() throws Exception { writeFile(outputPath, data, makeUnreadableByAdmin); return null; } }.runWithRetries(); } private void deleteFileWithRetries(final Path deletePath) throws Exception { new FSAction<Void>() { @Override public Void run() throws Exception { deleteFile(deletePath); return null; } }.runWithRetries(); } private boolean renameFileWithRetries(final Path src, final Path dst) throws Exception { return new FSAction<Boolean>() { @Override public Boolean run() throws Exception { return renameFile(src, dst); } }.runWithRetries(); } private boolean createFileWithRetries(final Path newFile) throws Exception { return new FSAction<Boolean>() { @Override public Boolean run() throws Exception { return createFile(newFile); } }.runWithRetries(); } private FileStatus getFileStatusWithRetries(final Path path) throws Exception { return new FSAction<FileStatus>() { @Override public FileStatus run() throws Exception { return getFileStatus(path); } }.runWithRetries(); } private boolean existsWithRetries(final Path path) throws Exception { return new FSAction<Boolean>() { @Override public Boolean run() throws Exception { return fs.exists(path); } }.runWithRetries(); } private byte[] readFileWithRetries(final Path inputPath, final long len) throws Exception { return new FSAction<byte[]>() { @Override public byte[] run() throws Exception { return readFile(inputPath, len); } }.runWithRetries(); } private FileStatus[] listStatusWithRetries(final Path path) throws Exception { return new FSAction<FileStatus[]>() { @Override public FileStatus[] run() throws Exception { return fs.listStatus(path); } }.runWithRetries(); } private FileStatus[] listStatusWithRetries(final Path path, final PathFilter filter) throws Exception { return new FSAction<FileStatus[]>() { @Override public FileStatus[] run() throws Exception { return fs.listStatus(path, filter); } }.runWithRetries(); } private void closeWithRetries() throws Exception { new FSAction<Void>() { @Override public Void run() throws Exception { IOUtils.closeStream(fs); return null; } }.runWithRetries(); } private abstract class FSAction<T> { abstract T run() throws Exception; T runWithRetries() throws Exception { int retry = 0; while (true) { try { return run(); } catch (IOException e) { LOG.info("Exception while executing a FS operation.", e); if (++retry > fsNumRetries) { LOG.info("Maxed out FS retries. Giving up!"); throw e; } LOG.info("Retrying operation on FS. Retry no. " + retry); Thread.sleep(fsRetryInterval); } } } } private void deleteFile(Path deletePath) throws Exception { if (!fs.delete(deletePath, true)) { throw new Exception("Failed to delete " + deletePath); } } private byte[] readFile(Path inputPath, long len) throws Exception { FSDataInputStream fsIn = null; try { fsIn = fs.open(inputPath); // state data will not be that "long" byte[] data = new byte[(int) len]; fsIn.readFully(data); return data; } finally { IOUtils.cleanup(LOG, fsIn); } } private FileStatus getFileStatus(Path path) throws Exception { try { return fs.getFileStatus(path); } catch (FileNotFoundException e) { return null; } } /* * In order to make this write atomic as a part of write we will first write * data to .tmp file and then rename it. Here we are assuming that rename is * atomic for underlying file system. */ protected void writeFile(Path outputPath, byte[] data, boolean makeUnradableByAdmin) throws Exception { Path tempPath = new Path(outputPath.getParent(), outputPath.getName() + ".tmp"); FSDataOutputStream fsOut = null; // This file will be overwritten when app/attempt finishes for saving the // final status. try { fsOut = fs.create(tempPath, true); if (makeUnradableByAdmin) { setUnreadableBySuperuserXattrib(tempPath); } fsOut.write(data); fsOut.close(); fsOut = null; fs.rename(tempPath, outputPath); } finally { IOUtils.cleanup(LOG, fsOut); } } /* * In order to make this update atomic as a part of write we will first write * data to .new file and then rename it. Here we are assuming that rename is * atomic for underlying file system. */ protected void updateFile(Path outputPath, byte[] data, boolean makeUnradableByAdmin) throws Exception { Path newPath = new Path(outputPath.getParent(), outputPath.getName() + ".new"); // use writeFileWithRetries to make sure .new file is created atomically writeFileWithRetries(newPath, data, makeUnradableByAdmin); replaceFile(newPath, outputPath); } protected void replaceFile(Path srcPath, Path dstPath) throws Exception { if (existsWithRetries(dstPath)) { deleteFileWithRetries(dstPath); } else { LOG.info("File doesn't exist. Skip deleting the file " + dstPath); } renameFileWithRetries(srcPath, dstPath); } @Private @VisibleForTesting boolean renameFile(Path src, Path dst) throws Exception { return fs.rename(src, dst); } private boolean createFile(Path newFile) throws Exception { return fs.createNewFile(newFile); } @Private @VisibleForTesting Path getNodePath(Path root, String nodeName) { return new Path(root, nodeName); } @Override public synchronized void storeOrUpdateAMRMTokenSecretManagerState( AMRMTokenSecretManagerState amrmTokenSecretManagerState, boolean isUpdate) throws Exception { Path nodeCreatePath = getNodePath(amrmTokenSecretManagerRoot, AMRMTOKEN_SECRET_MANAGER_NODE); AMRMTokenSecretManagerState data = AMRMTokenSecretManagerState.newInstance(amrmTokenSecretManagerState); byte[] stateData = data.getProto().toByteArray(); if (isUpdate) { updateFile(nodeCreatePath, stateData, true); } else { writeFileWithRetries(nodeCreatePath, stateData, true); } } @Override protected void storeReservationState( ReservationAllocationStateProto reservationAllocation, String planName, String reservationIdName) throws Exception { Path planCreatePath = getNodePath(reservationRoot, planName); mkdirsWithRetries(planCreatePath); Path reservationPath = getNodePath(planCreatePath, reservationIdName); LOG.info( "Storing state for reservation " + reservationIdName + " from " + "plan " + planName + " at path " + reservationPath); byte[] reservationData = reservationAllocation.toByteArray(); writeFileWithRetries(reservationPath, reservationData, true); } @Override protected void removeReservationState(String planName, String reservationIdName) throws Exception { Path planCreatePath = getNodePath(reservationRoot, planName); Path reservationPath = getNodePath(planCreatePath, reservationIdName); LOG.info( "Removing state for reservation " + reservationIdName + " from " + "plan " + planName + " at path " + reservationPath); deleteFileWithRetries(reservationPath); } @VisibleForTesting public int getNumRetries() { return fsNumRetries; } @VisibleForTesting public long getRetryInterval() { return fsRetryInterval; } private void setUnreadableBySuperuserXattrib(Path p) throws IOException { if (fs.getScheme().toLowerCase().contains("hdfs") && intermediateEncryptionEnabled && !fs.getXAttrs(p).containsKey(UNREADABLE_BY_SUPERUSER_XATTRIB)) { fs.setXAttr(p, UNREADABLE_BY_SUPERUSER_XATTRIB, null, EnumSet.of(XAttrSetFlag.CREATE)); } } private static class ReservationStateFileProcessor implements RMStateFileProcessor { private RMState rmState; public ReservationStateFileProcessor(RMState state) { this.rmState = state; } @Override public void processChildNode(String planName, String childNodeName, byte[] childData) throws IOException { ReservationAllocationStateProto allocationState = ReservationAllocationStateProto.parseFrom(childData); if (!rmState.getReservationState().containsKey(planName)) { rmState .getReservationState() .put(planName, new HashMap<ReservationId, ReservationAllocationStateProto>()); } ReservationId reservationId = ReservationId.parseReservationId(childNodeName); rmState.getReservationState().get(planName).put(reservationId, allocationState); } } private static class RMAppStateFileProcessor implements RMStateFileProcessor { private RMState rmState; private List<ApplicationAttemptStateData> attempts; public RMAppStateFileProcessor(RMState rmState, List<ApplicationAttemptStateData> attempts) { this.rmState = rmState; this.attempts = attempts; } @Override public void processChildNode(String appDirName, String childNodeName, byte[] childData) throws com.google.protobuf.InvalidProtocolBufferException { if (childNodeName.startsWith(ApplicationId.appIdStrPrefix)) { // application if (LOG.isDebugEnabled()) { LOG.debug("Loading application from node: " + childNodeName); } ApplicationStateDataPBImpl appState = new ApplicationStateDataPBImpl(ApplicationStateDataProto.parseFrom(childData)); ApplicationId appId = appState.getApplicationSubmissionContext().getApplicationId(); rmState.appState.put(appId, appState); } else if (childNodeName.startsWith(ApplicationAttemptId.appAttemptIdStrPrefix)) { // attempt if (LOG.isDebugEnabled()) { LOG.debug("Loading application attempt from node: " + childNodeName); } ApplicationAttemptStateDataPBImpl attemptState = new ApplicationAttemptStateDataPBImpl( ApplicationAttemptStateDataProto.parseFrom(childData)); attempts.add(attemptState); } else { LOG.info("Unknown child node with name: " + childNodeName); } } } // Interface for common state processing of directory of file layout private interface RMStateFileProcessor { void processChildNode(String appDirName, String childNodeName, byte[] childData) throws IOException; } }
public class LeveldbRMStateStore extends RMStateStore { public static final Log LOG = LogFactory.getLog(LeveldbRMStateStore.class); private static final String SEPARATOR = "/"; private static final String DB_NAME = "yarn-rm-state"; private static final String RM_DT_MASTER_KEY_KEY_PREFIX = RM_DT_SECRET_MANAGER_ROOT + SEPARATOR + DELEGATION_KEY_PREFIX; private static final String RM_DT_TOKEN_KEY_PREFIX = RM_DT_SECRET_MANAGER_ROOT + SEPARATOR + DELEGATION_TOKEN_PREFIX; private static final String RM_DT_SEQUENCE_NUMBER_KEY = RM_DT_SECRET_MANAGER_ROOT + SEPARATOR + "RMDTSequentialNumber"; private static final String RM_APP_KEY_PREFIX = RM_APP_ROOT + SEPARATOR + ApplicationId.appIdStrPrefix; private static final Version CURRENT_VERSION_INFO = Version.newInstance(1, 0); private DB db; private String getApplicationNodeKey(ApplicationId appId) { return RM_APP_ROOT + SEPARATOR + appId; } private String getApplicationAttemptNodeKey(ApplicationAttemptId attemptId) { return getApplicationAttemptNodeKey( getApplicationNodeKey(attemptId.getApplicationId()), attemptId); } private String getApplicationAttemptNodeKey(String appNodeKey, ApplicationAttemptId attemptId) { return appNodeKey + SEPARATOR + attemptId; } private String getRMDTMasterKeyNodeKey(DelegationKey masterKey) { return RM_DT_MASTER_KEY_KEY_PREFIX + masterKey.getKeyId(); } private String getRMDTTokenNodeKey(RMDelegationTokenIdentifier tokenId) { return RM_DT_TOKEN_KEY_PREFIX + tokenId.getSequenceNumber(); } @Override protected void initInternal(Configuration conf) throws Exception {} private Path getStorageDir() throws IOException { Configuration conf = getConfig(); String storePath = conf.get(YarnConfiguration.RM_LEVELDB_STORE_PATH); if (storePath == null) { throw new IOException( "No store location directory configured in " + YarnConfiguration.RM_LEVELDB_STORE_PATH); } return new Path(storePath, DB_NAME); } private Path createStorageDir() throws IOException { Path root = getStorageDir(); FileSystem fs = FileSystem.getLocal(getConfig()); fs.mkdirs(root, new FsPermission((short) 0700)); return root; } @Override protected void startInternal() throws Exception { Path storeRoot = createStorageDir(); Options options = new Options(); options.createIfMissing(false); options.logger(new LeveldbLogger()); LOG.info("Using state database at " + storeRoot + " for recovery"); File dbfile = new File(storeRoot.toString()); try { db = JniDBFactory.factory.open(dbfile, options); } catch (NativeDB.DBException e) { if (e.isNotFound() || e.getMessage().contains(" does not exist ")) { LOG.info("Creating state database at " + dbfile); options.createIfMissing(true); try { db = JniDBFactory.factory.open(dbfile, options); // store version storeVersion(); } catch (DBException dbErr) { throw new IOException(dbErr.getMessage(), dbErr); } } else { throw e; } } } @Override protected void closeInternal() throws Exception { if (db != null) { db.close(); db = null; } } @VisibleForTesting boolean isClosed() { return db == null; } @Override protected Version loadVersion() throws Exception { Version version = null; try { byte[] data = db.get(bytes(VERSION_NODE)); if (data != null) { version = new VersionPBImpl(VersionProto.parseFrom(data)); } } catch (DBException e) { throw new IOException(e); } return version; } @Override protected void storeVersion() throws Exception { dbStoreVersion(CURRENT_VERSION_INFO); } void dbStoreVersion(Version state) throws IOException { String key = VERSION_NODE; byte[] data = ((VersionPBImpl) state).getProto().toByteArray(); try { db.put(bytes(key), data); } catch (DBException e) { throw new IOException(e); } } @Override protected Version getCurrentVersion() { return CURRENT_VERSION_INFO; } @Override public synchronized long getAndIncrementEpoch() throws Exception { long currentEpoch = 0; byte[] dbKeyBytes = bytes(EPOCH_NODE); try { byte[] data = db.get(dbKeyBytes); if (data != null) { currentEpoch = EpochProto.parseFrom(data).getEpoch(); } EpochProto proto = Epoch.newInstance(currentEpoch + 1).getProto(); db.put(dbKeyBytes, proto.toByteArray()); } catch (DBException e) { throw new IOException(e); } return currentEpoch; } @Override public RMState loadState() throws Exception { RMState rmState = new RMState(); loadRMDTSecretManagerState(rmState); loadRMApps(rmState); loadAMRMTokenSecretManagerState(rmState); return rmState; } private void loadRMDTSecretManagerState(RMState state) throws IOException { int numKeys = loadRMDTSecretManagerKeys(state); LOG.info("Recovered " + numKeys + " RM delegation token master keys"); int numTokens = loadRMDTSecretManagerTokens(state); LOG.info("Recovered " + numTokens + " RM delegation tokens"); loadRMDTSecretManagerTokenSequenceNumber(state); } private int loadRMDTSecretManagerKeys(RMState state) throws IOException { int numKeys = 0; LeveldbIterator iter = null; try { iter = new LeveldbIterator(db); iter.seek(bytes(RM_DT_MASTER_KEY_KEY_PREFIX)); while (iter.hasNext()) { Entry<byte[], byte[]> entry = iter.next(); String key = asString(entry.getKey()); if (!key.startsWith(RM_DT_MASTER_KEY_KEY_PREFIX)) { break; } DelegationKey masterKey = loadDelegationKey(entry.getValue()); state.rmSecretManagerState.masterKeyState.add(masterKey); ++numKeys; if (LOG.isDebugEnabled()) { LOG.debug( "Loaded RM delegation key from " + key + ": keyId=" + masterKey.getKeyId() + ", expirationDate=" + masterKey.getExpiryDate()); } } } catch (DBException e) { throw new IOException(e); } finally { if (iter != null) { iter.close(); } } return numKeys; } private DelegationKey loadDelegationKey(byte[] data) throws IOException { DelegationKey key = new DelegationKey(); DataInputStream in = new DataInputStream(new ByteArrayInputStream(data)); try { key.readFields(in); } finally { IOUtils.cleanup(LOG, in); } return key; } private int loadRMDTSecretManagerTokens(RMState state) throws IOException { int numTokens = 0; LeveldbIterator iter = null; try { iter = new LeveldbIterator(db); iter.seek(bytes(RM_DT_TOKEN_KEY_PREFIX)); while (iter.hasNext()) { Entry<byte[], byte[]> entry = iter.next(); String key = asString(entry.getKey()); if (!key.startsWith(RM_DT_TOKEN_KEY_PREFIX)) { break; } RMDelegationTokenIdentifierData tokenData = loadDelegationToken(entry.getValue()); RMDelegationTokenIdentifier tokenId = tokenData.getTokenIdentifier(); long renewDate = tokenData.getRenewDate(); state.rmSecretManagerState.delegationTokenState.put(tokenId, renewDate); ++numTokens; if (LOG.isDebugEnabled()) { LOG.debug( "Loaded RM delegation token from " + key + ": tokenId=" + tokenId + ", renewDate=" + renewDate); } } } catch (DBException e) { throw new IOException(e); } finally { if (iter != null) { iter.close(); } } return numTokens; } private RMDelegationTokenIdentifierData loadDelegationToken(byte[] data) throws IOException { RMDelegationTokenIdentifierData tokenData = new RMDelegationTokenIdentifierData(); DataInputStream in = new DataInputStream(new ByteArrayInputStream(data)); try { tokenData.readFields(in); } finally { IOUtils.cleanup(LOG, in); } return tokenData; } private void loadRMDTSecretManagerTokenSequenceNumber(RMState state) throws IOException { byte[] data = null; try { data = db.get(bytes(RM_DT_SEQUENCE_NUMBER_KEY)); } catch (DBException e) { throw new IOException(e); } if (data != null) { DataInputStream in = new DataInputStream(new ByteArrayInputStream(data)); try { state.rmSecretManagerState.dtSequenceNumber = in.readInt(); } finally { IOUtils.cleanup(LOG, in); } } } private void loadRMApps(RMState state) throws IOException { int numApps = 0; int numAppAttempts = 0; LeveldbIterator iter = null; try { iter = new LeveldbIterator(db); iter.seek(bytes(RM_APP_KEY_PREFIX)); while (iter.hasNext()) { Entry<byte[], byte[]> entry = iter.next(); String key = asString(entry.getKey()); if (!key.startsWith(RM_APP_KEY_PREFIX)) { break; } String appIdStr = key.substring(RM_APP_ROOT.length() + 1); if (appIdStr.contains(SEPARATOR)) { LOG.warn("Skipping extraneous data " + key); continue; } numAppAttempts += loadRMApp(state, iter, appIdStr, entry.getValue()); ++numApps; } } catch (DBException e) { throw new IOException(e); } finally { if (iter != null) { iter.close(); } } LOG.info( "Recovered " + numApps + " applications and " + numAppAttempts + " application attempts"); } private int loadRMApp(RMState rmState, LeveldbIterator iter, String appIdStr, byte[] appData) throws IOException { ApplicationStateData appState = createApplicationState(appIdStr, appData); ApplicationId appId = appState.getApplicationSubmissionContext().getApplicationId(); rmState.appState.put(appId, appState); String attemptNodePrefix = getApplicationNodeKey(appId) + SEPARATOR; while (iter.hasNext()) { Entry<byte[], byte[]> entry = iter.peekNext(); String key = asString(entry.getKey()); if (!key.startsWith(attemptNodePrefix)) { break; } String attemptId = key.substring(attemptNodePrefix.length()); if (attemptId.startsWith(ApplicationAttemptId.appAttemptIdStrPrefix)) { ApplicationAttemptStateData attemptState = createAttemptState(attemptId, entry.getValue()); appState.attempts.put(attemptState.getAttemptId(), attemptState); } else { LOG.warn("Ignoring unknown application key: " + key); } iter.next(); } int numAttempts = appState.attempts.size(); if (LOG.isDebugEnabled()) { LOG.debug("Loaded application " + appId + " with " + numAttempts + " attempts"); } return numAttempts; } private ApplicationStateData createApplicationState(String appIdStr, byte[] data) throws IOException { ApplicationId appId = ConverterUtils.toApplicationId(appIdStr); ApplicationStateDataPBImpl appState = new ApplicationStateDataPBImpl(ApplicationStateDataProto.parseFrom(data)); if (!appId.equals(appState.getApplicationSubmissionContext().getApplicationId())) { throw new YarnRuntimeException( "The database entry for " + appId + " contains data for " + appState.getApplicationSubmissionContext().getApplicationId()); } return appState; } @VisibleForTesting ApplicationStateData loadRMAppState(ApplicationId appId) throws IOException { String appKey = getApplicationNodeKey(appId); byte[] data = null; try { data = db.get(bytes(appKey)); } catch (DBException e) { throw new IOException(e); } if (data == null) { return null; } return createApplicationState(appId.toString(), data); } private ApplicationAttemptStateData createAttemptState(String itemName, byte[] data) throws IOException { ApplicationAttemptId attemptId = ConverterUtils.toApplicationAttemptId(itemName); ApplicationAttemptStateDataPBImpl attemptState = new ApplicationAttemptStateDataPBImpl(ApplicationAttemptStateDataProto.parseFrom(data)); if (!attemptId.equals(attemptState.getAttemptId())) { throw new YarnRuntimeException( "The database entry for " + attemptId + " contains data for " + attemptState.getAttemptId()); } return attemptState; } private void loadAMRMTokenSecretManagerState(RMState rmState) throws IOException { try { byte[] data = db.get(bytes(AMRMTOKEN_SECRET_MANAGER_ROOT)); if (data != null) { AMRMTokenSecretManagerStatePBImpl stateData = new AMRMTokenSecretManagerStatePBImpl(AMRMTokenSecretManagerStateProto.parseFrom(data)); rmState.amrmTokenSecretManagerState = AMRMTokenSecretManagerState.newInstance( stateData.getCurrentMasterKey(), stateData.getNextMasterKey()); } } catch (DBException e) { throw new IOException(e); } } @Override protected void storeApplicationStateInternal( ApplicationId appId, ApplicationStateData appStateData) throws IOException { String key = getApplicationNodeKey(appId); if (LOG.isDebugEnabled()) { LOG.debug("Storing state for app " + appId + " at " + key); } try { db.put(bytes(key), appStateData.getProto().toByteArray()); } catch (DBException e) { throw new IOException(e); } } @Override protected void updateApplicationStateInternal( ApplicationId appId, ApplicationStateData appStateData) throws IOException { storeApplicationStateInternal(appId, appStateData); } @Override protected void storeApplicationAttemptStateInternal( ApplicationAttemptId attemptId, ApplicationAttemptStateData attemptStateData) throws IOException { String key = getApplicationAttemptNodeKey(attemptId); if (LOG.isDebugEnabled()) { LOG.debug("Storing state for attempt " + attemptId + " at " + key); } try { db.put(bytes(key), attemptStateData.getProto().toByteArray()); } catch (DBException e) { throw new IOException(e); } } @Override protected void updateApplicationAttemptStateInternal( ApplicationAttemptId attemptId, ApplicationAttemptStateData attemptStateData) throws IOException { storeApplicationAttemptStateInternal(attemptId, attemptStateData); } @Override protected void removeApplicationStateInternal(ApplicationStateData appState) throws IOException { ApplicationId appId = appState.getApplicationSubmissionContext().getApplicationId(); String appKey = getApplicationNodeKey(appId); try { WriteBatch batch = db.createWriteBatch(); try { batch.delete(bytes(appKey)); for (ApplicationAttemptId attemptId : appState.attempts.keySet()) { String attemptKey = getApplicationAttemptNodeKey(appKey, attemptId); batch.delete(bytes(attemptKey)); } if (LOG.isDebugEnabled()) { LOG.debug( "Removing state for app " + appId + " and " + appState.attempts.size() + " attempts" + " at " + appKey); } db.write(batch); } finally { batch.close(); } } catch (DBException e) { throw new IOException(e); } } private void storeOrUpdateRMDT( RMDelegationTokenIdentifier tokenId, Long renewDate, boolean isUpdate) throws IOException { String tokenKey = getRMDTTokenNodeKey(tokenId); RMDelegationTokenIdentifierData tokenData = new RMDelegationTokenIdentifierData(tokenId, renewDate); if (LOG.isDebugEnabled()) { LOG.debug("Storing token to " + tokenKey); } try { WriteBatch batch = db.createWriteBatch(); try { batch.put(bytes(tokenKey), tokenData.toByteArray()); if (!isUpdate) { ByteArrayOutputStream bs = new ByteArrayOutputStream(); try (DataOutputStream ds = new DataOutputStream(bs)) { ds.writeInt(tokenId.getSequenceNumber()); } if (LOG.isDebugEnabled()) { LOG.debug( "Storing " + tokenId.getSequenceNumber() + " to " + RM_DT_SEQUENCE_NUMBER_KEY); } batch.put(bytes(RM_DT_SEQUENCE_NUMBER_KEY), bs.toByteArray()); } db.write(batch); } finally { batch.close(); } } catch (DBException e) { throw new IOException(e); } } @Override protected void storeRMDelegationTokenState(RMDelegationTokenIdentifier tokenId, Long renewDate) throws IOException { storeOrUpdateRMDT(tokenId, renewDate, false); } @Override protected void updateRMDelegationTokenState(RMDelegationTokenIdentifier tokenId, Long renewDate) throws IOException { storeOrUpdateRMDT(tokenId, renewDate, true); } @Override protected void removeRMDelegationTokenState(RMDelegationTokenIdentifier tokenId) throws IOException { String tokenKey = getRMDTTokenNodeKey(tokenId); if (LOG.isDebugEnabled()) { LOG.debug("Removing token at " + tokenKey); } try { db.delete(bytes(tokenKey)); } catch (DBException e) { throw new IOException(e); } } @Override protected void storeRMDTMasterKeyState(DelegationKey masterKey) throws IOException { String dbKey = getRMDTMasterKeyNodeKey(masterKey); if (LOG.isDebugEnabled()) { LOG.debug("Storing token master key to " + dbKey); } ByteArrayOutputStream os = new ByteArrayOutputStream(); DataOutputStream out = new DataOutputStream(os); try { masterKey.write(out); } finally { out.close(); } try { db.put(bytes(dbKey), os.toByteArray()); } catch (DBException e) { throw new IOException(e); } } @Override protected void removeRMDTMasterKeyState(DelegationKey masterKey) throws IOException { String dbKey = getRMDTMasterKeyNodeKey(masterKey); if (LOG.isDebugEnabled()) { LOG.debug("Removing token master key at " + dbKey); } try { db.delete(bytes(dbKey)); } catch (DBException e) { throw new IOException(e); } } @Override public void storeOrUpdateAMRMTokenSecretManagerState( AMRMTokenSecretManagerState state, boolean isUpdate) { AMRMTokenSecretManagerState data = AMRMTokenSecretManagerState.newInstance(state); byte[] stateData = data.getProto().toByteArray(); db.put(bytes(AMRMTOKEN_SECRET_MANAGER_ROOT), stateData); } @Override public void deleteStore() throws IOException { Path root = getStorageDir(); LOG.info("Deleting state database at " + root); db.close(); db = null; FileSystem fs = FileSystem.getLocal(getConfig()); fs.delete(root, true); } @VisibleForTesting int getNumEntriesInDatabase() throws IOException { int numEntries = 0; LeveldbIterator iter = null; try { iter = new LeveldbIterator(db); iter.seekToFirst(); while (iter.hasNext()) { Entry<byte[], byte[]> entry = iter.next(); LOG.info("entry: " + asString(entry.getKey())); ++numEntries; } } catch (DBException e) { throw new IOException(e); } finally { if (iter != null) { iter.close(); } } return numEntries; } private static class LeveldbLogger implements Logger { private static final Log LOG = LogFactory.getLog(LeveldbLogger.class); @Override public void log(String message) { LOG.info(message); } } }