@Override public void run() { long lastHeartbeatMs = System.currentTimeMillis(); Command cmd = null; while (!mStop) { long diff = System.currentTimeMillis() - lastHeartbeatMs; if (diff < WorkerConf.get().TO_MASTER_HEARTBEAT_INTERVAL_MS) { LOG.debug("Heartbeat process takes {} ms.", diff); CommonUtils.sleepMs(LOG, WorkerConf.get().TO_MASTER_HEARTBEAT_INTERVAL_MS - diff); } else { LOG.error("Heartbeat process takes " + diff + " ms."); } try { cmd = mWorkerStorage.heartbeat(); lastHeartbeatMs = System.currentTimeMillis(); } catch (IOException e) { LOG.error(e.getMessage(), e); mWorkerStorage.resetMasterClient(); CommonUtils.sleepMs(LOG, Constants.SECOND_MS); cmd = null; if (System.currentTimeMillis() - lastHeartbeatMs >= WorkerConf.get().HEARTBEAT_TIMEOUT_MS) { throw new RuntimeException( "Timebeat timeout " + (System.currentTimeMillis() - lastHeartbeatMs) + "ms"); } } if (cmd != null) { switch (cmd.mCommandType) { case Unknown: LOG.error("Unknown command: " + cmd); break; case Nothing: LOG.debug("Nothing command: {}", cmd); break; case Register: LOG.info("Register command: " + cmd); mWorkerStorage.register(); break; case Free: mWorkerStorage.freeBlocks(cmd.mData); LOG.info("Free command: " + cmd); break; case Delete: LOG.info("Delete command: " + cmd); break; default: throw new RuntimeException("Un-recognized command from master " + cmd.toString()); } } mWorkerStorage.checkStatus(); } }
private DataServer createDataServer( final InetSocketAddress dataAddress, final BlocksLocker blockLocker) { switch (WorkerConf.get().NETWORK_TYPE) { case NIO: return new NIODataServer(dataAddress, blockLocker); case NETTY: return new NettyDataServer(dataAddress, blockLocker); default: throw new AssertionError("Unknown network type: " + WorkerConf.get().NETWORK_TYPE); } }
public static void main(String[] args) throws UnknownHostException { if (args.length > 1) { LOG.info( "Usage: java -cp target/tachyon-" + Version.VERSION + "-jar-with-dependencies.jar " + "tachyon.Worker [<MasterHost:Port>]"); System.exit(-1); } WorkerConf wConf = WorkerConf.get(); String resolvedWorkerHost = NetworkUtils.getLocalHostName(); LOG.info("Resolved local TachyonWorker host to " + resolvedWorkerHost); TachyonWorker worker = TachyonWorker.createWorker( getMasterLocation(args), resolvedWorkerHost + ":" + wConf.PORT, wConf.DATA_PORT, wConf.SELECTOR_THREADS, wConf.QUEUE_SIZE_PER_SELECTOR, wConf.SERVER_THREADS, wConf.DATA_FOLDER, wConf.MEMORY_SIZE); try { worker.start(); } catch (Exception e) { LOG.error("Uncaught exception terminating worker", e); throw new RuntimeException(e); } }
/** * Main logic behind the worker process. * * <p>This object is lazily initialized. Before an object of this call should be used, {@link * #initialize()} must be called. * * @param masterAddress The TachyonMaster's address * @param dataFolder This TachyonWorker's local folder's path * @param memoryCapacityBytes The maximum memory space this TachyonWorker can use, in bytes */ public WorkerStorage( InetSocketAddress masterAddress, String dataFolder, long memoryCapacityBytes) { COMMON_CONF = CommonConf.get(); mMasterAddress = masterAddress; mMasterClient = new MasterClient(mMasterAddress); mLocalDataFolder = new File(dataFolder); mWorkerSpaceCounter = new WorkerSpaceCounter(memoryCapacityBytes); mLocalUserFolder = new File(mLocalDataFolder, WorkerConf.get().USER_TEMP_RELATIVE_FOLDER); }
private void login() throws IOException { WorkerConf wConf = WorkerConf.get(); if (wConf.KEYTAB == null || wConf.PRINCIPAL == null) { return; } UnderFileSystem ufs = UnderFileSystem.get(CommonConf.get().UNDERFS_ADDRESS); if (ufs instanceof UnderFileSystemHdfs) { ((UnderFileSystemHdfs) ufs) .login( wConf.KEYTAB_KEY, wConf.KEYTAB, wConf.PRINCIPAL_KEY, wConf.PRINCIPAL, NetworkUtils.getFqdnHost(mWorkerAddress)); } }
public void initialize(final NetAddress address) { mWorkerAddress = address; register(); mUnderfsWorkerFolder = CommonUtils.concat(COMMON_CONF.UNDERFS_WORKERS_FOLDER, mWorkerId); mUnderfsWorkerDataFolder = mUnderfsWorkerFolder + "/data"; mUnderFs = UnderFileSystem.get(COMMON_CONF.UNDERFS_ADDRESS); mUsers = new Users(mLocalUserFolder.toString(), mUnderfsWorkerFolder); for (int k = 0; k < WorkerConf.get().WORKER_CHECKPOINT_THREADS; k++) { Thread thread = new Thread(new CheckpointThread(k)); mCheckpointThreads.add(thread); thread.start(); } try { initializeWorkerStorage(); } catch (IOException e) { throw Throwables.propagate(e); } catch (FileDoesNotExistException e) { throw Throwables.propagate(e); } catch (SuspectedFileSizeException e) { throw Throwables.propagate(e); } catch (BlockInfoException e) { throw Throwables.propagate(e); } catch (TException e) { throw Throwables.propagate(e); } LOG.info( "Current Worker Info: ID " + mWorkerId + ", ADDRESS: " + mWorkerAddress + ", MemoryCapacityBytes: " + mWorkerSpaceCounter.getCapacityBytes()); }
private static String getMasterLocation(String[] args) { WorkerConf wConf = WorkerConf.get(); String confFileMasterLoc = wConf.MASTER_HOSTNAME + ":" + wConf.MASTER_PORT; String masterLocation; if (args.length < 1) { masterLocation = confFileMasterLoc; } else { masterLocation = args[0]; if (masterLocation.indexOf(":") == -1) { masterLocation += ":" + wConf.MASTER_PORT; } if (!masterLocation.equals(confFileMasterLoc)) { LOG.warn( "Master Address in configuration file(" + confFileMasterLoc + ") is different " + "from the command line one(" + masterLocation + ")."); } } return masterLocation; }
/** The structure to store a worker's information in worker node. */ public class WorkerStorage { /** The CheckpointThread, used to checkpoint the files belong to the worker. */ public class CheckpointThread implements Runnable { private final Logger LOG = Logger.getLogger(Constants.LOGGER_TYPE); private final int ID; private UnderFileSystem mCheckpointUnderFs = null; public CheckpointThread(int id) { ID = id; } // This method assumes the mDependencyLock has been acquired. private int getFileIdBasedOnPriorityDependency() throws TException { if (mPriorityDependencies.isEmpty()) { return -1; } for (int depId : mPriorityDependencies) { int fileId = getFileIdFromOneDependency(depId); if (fileId != -1) { return fileId; } } return -1; } // This method assumes the mDependencyLock has been acquired. private int getFileIdFromOneDependency(int depId) throws TException { Set<Integer> fileIds = mDepIdToFiles.get(depId); if (fileIds != null && !fileIds.isEmpty()) { int fileId = fileIds.iterator().next(); fileIds.remove(fileId); mUncheckpointFiles.remove(fileId); if (fileIds.isEmpty()) { mDepIdToFiles.remove(depId); } return fileId; } return -1; } // This method assumes the mDependencyLock has been acquired. private int getRandomUncheckpointedFile() throws TException { if (mUncheckpointFiles.isEmpty()) { return -1; } for (int depId : mDepIdToFiles.keySet()) { int fileId = getFileIdFromOneDependency(depId); if (fileId != -1) { return fileId; } } return -1; } private List<Integer> getSortedPriorityDependencyList() throws TException { List<Integer> ret = mMasterClient.worker_getPriorityDependencyList(); for (int i = 0; i < ret.size(); i++) { for (int j = i + 1; j < ret.size(); j++) { if (ret.get(i) < ret.get(j)) { int k = ret.get(i); ret.set(i, ret.get(j)); ret.set(j, k); } } } return ret; } @Override public void run() { while (true) { try { int fileId = -1; synchronized (mDependencyLock) { fileId = getFileIdBasedOnPriorityDependency(); if (fileId == -1) { if (mPriorityDependencies.size() == 0) { mPriorityDependencies = getSortedPriorityDependencyList(); if (!mPriorityDependencies.isEmpty()) { LOG.info( "Get new mPriorityDependencies " + CommonUtils.listToString(mPriorityDependencies)); } } else { List<Integer> tList = getSortedPriorityDependencyList(); boolean equal = true; if (mPriorityDependencies.size() != tList.size()) { equal = false; } if (equal) { for (int k = 0; k < tList.size(); k++) { if (tList.get(k) != mPriorityDependencies.get(k)) { equal = false; break; } } } if (!equal) { mPriorityDependencies = tList; } } fileId = getFileIdBasedOnPriorityDependency(); } if (fileId == -1) { fileId = getRandomUncheckpointedFile(); } } if (fileId == -1) { LOG.debug("Thread " + ID + " has nothing to checkpoint. Sleep for 1 sec."); CommonUtils.sleepMs(LOG, Constants.SECOND_MS); continue; } // TODO checkpoint process. In future, move from midPath to dstPath should be done by // master String midPath = CommonUtils.concat(mUnderfsWorkerDataFolder, fileId); String dstPath = CommonUtils.concat(CommonConf.get().UNDERFS_DATA_FOLDER, fileId); LOG.info( "Thread " + ID + " is checkpointing file " + fileId + " from " + mLocalDataFolder.toString() + " to " + midPath + " to " + dstPath); if (mCheckpointUnderFs == null) { mCheckpointUnderFs = UnderFileSystem.get(midPath); } long startCopyTimeMs = System.currentTimeMillis(); ClientFileInfo fileInfo = mMasterClient.getClientFileInfoById(fileId); if (!fileInfo.isComplete) { LOG.error("File " + fileInfo + " is not complete!"); continue; } for (int k = 0; k < fileInfo.blockIds.size(); k++) { lockBlock(fileInfo.blockIds.get(k), Users.sCHECKPOINT_USER_ID); } OutputStream os = mCheckpointUnderFs.create(midPath, (int) fileInfo.getBlockSizeByte()); long fileSizeByte = 0; for (int k = 0; k < fileInfo.blockIds.size(); k++) { File tempFile = new File(CommonUtils.concat(mLocalDataFolder.toString(), fileInfo.blockIds.get(k))); fileSizeByte += tempFile.length(); InputStream is = new FileInputStream(tempFile); byte[] buf = new byte[16 * Constants.KB]; int got = is.read(buf); while (got != -1) { os.write(buf, 0, got); got = is.read(buf); } is.close(); } os.close(); if (!mCheckpointUnderFs.rename(midPath, dstPath)) { LOG.error("Failed to rename from " + midPath + " to " + dstPath); } mMasterClient.addCheckpoint(mWorkerId, fileId, fileSizeByte, dstPath); for (int k = 0; k < fileInfo.blockIds.size(); k++) { unlockBlock(fileInfo.blockIds.get(k), Users.sCHECKPOINT_USER_ID); } long shouldTakeMs = (long) (1000.0 * fileSizeByte / Constants.MB / WorkerConf.get().WORKER_PER_THREAD_CHECKPOINT_CAP_MB_SEC); long currentTimeMs = System.currentTimeMillis(); if (startCopyTimeMs + shouldTakeMs > currentTimeMs) { long shouldSleepMs = startCopyTimeMs + shouldTakeMs - currentTimeMs; LOG.info( "Checkpointed last file " + fileId + " took " + (currentTimeMs - startCopyTimeMs) + " ms. Need to sleep " + shouldSleepMs + " ms."); CommonUtils.sleepMs(LOG, shouldSleepMs); } } catch (FileDoesNotExistException e) { LOG.warn(e); } catch (SuspectedFileSizeException e) { LOG.error(e); } catch (BlockInfoException e) { LOG.error(e); } catch (IOException e) { LOG.error(e); } catch (TException e) { LOG.warn(e); } } } } private final Logger LOG = Logger.getLogger(Constants.LOGGER_TYPE); private final CommonConf COMMON_CONF; private volatile MasterClient mMasterClient; private InetSocketAddress mMasterAddress; private NetAddress mWorkerAddress; private WorkerSpaceCounter mWorkerSpaceCounter; private long mWorkerId; private Set<Long> mMemoryData = new HashSet<Long>(); private Map<Long, Long> mBlockSizes = new HashMap<Long, Long>(); private Map<Long, Long> mLatestBlockAccessTimeMs = new HashMap<Long, Long>(); private Map<Long, Set<Long>> mUsersPerLockedBlock = new HashMap<Long, Set<Long>>(); private Map<Long, Set<Long>> mLockedBlocksPerUser = new HashMap<Long, Set<Long>>(); private BlockingQueue<Long> mRemovedBlockList = new ArrayBlockingQueue<Long>(Constants.WORKER_BLOCKS_QUEUE_SIZE); private BlockingQueue<Long> mAddedBlockList = new ArrayBlockingQueue<Long>(Constants.WORKER_BLOCKS_QUEUE_SIZE); private File mLocalDataFolder; private File mLocalUserFolder; private String mUnderfsWorkerFolder; private String mUnderfsWorkerDataFolder; private String mUnderfsOrphansFolder; private UnderFileSystem mUnderFs; private Users mUsers; // Dependency related lock private Object mDependencyLock = new Object(); private Set<Integer> mUncheckpointFiles = new HashSet<Integer>(); // From dependencyId to files in that set. private Map<Integer, Set<Integer>> mDepIdToFiles = new HashMap<Integer, Set<Integer>>(); private List<Integer> mPriorityDependencies = new ArrayList<Integer>(); private ArrayList<Thread> mCheckpointThreads = new ArrayList<Thread>(WorkerConf.get().WORKER_CHECKPOINT_THREADS); /** * Main logic behind the worker process. * * <p>This object is lazily initialized. Before an object of this call should be used, {@link * #initialize()} must be called. * * @param masterAddress The TachyonMaster's address * @param dataFolder This TachyonWorker's local folder's path * @param memoryCapacityBytes The maximum memory space this TachyonWorker can use, in bytes */ public WorkerStorage( InetSocketAddress masterAddress, String dataFolder, long memoryCapacityBytes) { COMMON_CONF = CommonConf.get(); mMasterAddress = masterAddress; mMasterClient = new MasterClient(mMasterAddress); mLocalDataFolder = new File(dataFolder); mWorkerSpaceCounter = new WorkerSpaceCounter(memoryCapacityBytes); mLocalUserFolder = new File(mLocalDataFolder, WorkerConf.get().USER_TEMP_RELATIVE_FOLDER); } public void initialize(final NetAddress address) { mWorkerAddress = address; register(); mUnderfsWorkerFolder = CommonUtils.concat(COMMON_CONF.UNDERFS_WORKERS_FOLDER, mWorkerId); mUnderfsWorkerDataFolder = mUnderfsWorkerFolder + "/data"; mUnderFs = UnderFileSystem.get(COMMON_CONF.UNDERFS_ADDRESS); mUsers = new Users(mLocalUserFolder.toString(), mUnderfsWorkerFolder); for (int k = 0; k < WorkerConf.get().WORKER_CHECKPOINT_THREADS; k++) { Thread thread = new Thread(new CheckpointThread(k)); mCheckpointThreads.add(thread); thread.start(); } try { initializeWorkerStorage(); } catch (IOException e) { throw Throwables.propagate(e); } catch (FileDoesNotExistException e) { throw Throwables.propagate(e); } catch (SuspectedFileSizeException e) { throw Throwables.propagate(e); } catch (BlockInfoException e) { throw Throwables.propagate(e); } catch (TException e) { throw Throwables.propagate(e); } LOG.info( "Current Worker Info: ID " + mWorkerId + ", ADDRESS: " + mWorkerAddress + ", MemoryCapacityBytes: " + mWorkerSpaceCounter.getCapacityBytes()); } /** * Update the latest block access time on the worker. * * @param blockId The id of the block */ void accessBlock(long blockId) { synchronized (mLatestBlockAccessTimeMs) { mLatestBlockAccessTimeMs.put(blockId, System.currentTimeMillis()); } } private void addBlockId(long blockId, long fileSizeBytes) { synchronized (mLatestBlockAccessTimeMs) { mLatestBlockAccessTimeMs.put(blockId, System.currentTimeMillis()); mBlockSizes.put(blockId, fileSizeBytes); mMemoryData.add(blockId); } } /** * Add the checkpoint information of a file. The information is from the user <code>userId</code>. * * <p>This method is normally triggered from {@link tachyon.client.FileOutStream#close()} if and * only if {@link tachyon.client.WriteType#isThrough()} is true. The current implementation of * checkpointing is that through {@link tachyon.client.WriteType} operations write to {@link * tachyon.UnderFileSystem} on the client's write path, but under a user temp directory (temp * directory is defined in the worker as {@link #getUserUnderfsTempFolder(long)}). * * @param userId The user id of the client who send the notification * @param fileId The id of the checkpointed file * @throws FileDoesNotExistException * @throws SuspectedFileSizeException * @throws FailedToCheckpointException * @throws BlockInfoException * @throws TException */ public void addCheckpoint(long userId, int fileId) throws FileDoesNotExistException, SuspectedFileSizeException, FailedToCheckpointException, BlockInfoException, TException { // TODO This part need to be changed. String srcPath = CommonUtils.concat(getUserUnderfsTempFolder(userId), fileId); String dstPath = CommonUtils.concat(COMMON_CONF.UNDERFS_DATA_FOLDER, fileId); try { if (!mUnderFs.rename(srcPath, dstPath)) { throw new FailedToCheckpointException("Failed to rename " + srcPath + " to " + dstPath); } } catch (IOException e) { throw new FailedToCheckpointException("Failed to rename " + srcPath + " to " + dstPath); } long fileSize; try { fileSize = mUnderFs.getFileSize(dstPath); } catch (IOException e) { throw new FailedToCheckpointException("Failed to getFileSize " + dstPath); } mMasterClient.addCheckpoint(mWorkerId, fileId, fileSize, dstPath); } private void addFoundBlock(long blockId, long length) throws FileDoesNotExistException, SuspectedFileSizeException, BlockInfoException, TException { addBlockId(blockId, length); mMasterClient.worker_cacheBlock(mWorkerId, mWorkerSpaceCounter.getUsedBytes(), blockId, length); } /** * Notify the worker to checkpoint the file asynchronously. * * @param fileId The id of the file * @return true if succeed, false otherwise * @throws IOException * @throws TException */ public boolean asyncCheckpoint(int fileId) throws IOException, TException { ClientFileInfo fileInfo = mMasterClient.getClientFileInfoById(fileId); if (fileInfo.getDependencyId() != -1) { synchronized (mDependencyLock) { mUncheckpointFiles.add(fileId); if (!mDepIdToFiles.containsKey(fileInfo.getDependencyId())) { mDepIdToFiles.put(fileInfo.getDependencyId(), new HashSet<Integer>()); } mDepIdToFiles.get(fileInfo.getDependencyId()).add(fileId); } return true; } return false; } /** * Notify the worker the block is cached. * * <p>This call is called remotely from {@link tachyon.client.TachyonFS#cacheBlock(long)} which is * only ever called from {@link tachyon.client.BlockOutStream#close()} (though its a public api so * anyone could call it). There are a few interesting preconditions for this to work. * * <p>1) Client process writes to files locally under a tachyon defined temp directory. 2) Worker * process is on the same node as the client 3) Client is talking to the local worker directly * * <p>If all conditions are true, then and only then can this method ever be called; all * operations work on local files. * * @param userId The user id of the client who send the notification * @param blockId The id of the block * @throws FileDoesNotExistException * @throws SuspectedFileSizeException * @throws BlockInfoException * @throws TException */ public void cacheBlock(long userId, long blockId) throws FileDoesNotExistException, SuspectedFileSizeException, BlockInfoException, TException { File srcFile = new File(CommonUtils.concat(getUserTempFolder(userId), blockId)); File dstFile = new File(CommonUtils.concat(mLocalDataFolder, blockId)); long fileSizeBytes = srcFile.length(); if (!srcFile.exists()) { throw new FileDoesNotExistException("File " + srcFile + " does not exist."); } if (!srcFile.renameTo(dstFile)) { throw new FileDoesNotExistException( "Failed to rename file from " + srcFile.getPath() + " to " + dstFile.getPath()); } addBlockId(blockId, fileSizeBytes); mUsers.addOwnBytes(userId, -fileSizeBytes); mMasterClient.worker_cacheBlock( mWorkerId, mWorkerSpaceCounter.getUsedBytes(), blockId, fileSizeBytes); LOG.info(userId + " " + dstFile); } /** * Check worker's status. This should be executed periodically. * * <p>It finds the timeout users and cleans them up. */ public void checkStatus() { List<Long> removedUsers = mUsers.checkStatus(); for (long userId : removedUsers) { mWorkerSpaceCounter.returnUsedBytes(mUsers.removeUser(userId)); synchronized (mUsersPerLockedBlock) { Set<Long> blockds = mLockedBlocksPerUser.get(userId); mLockedBlocksPerUser.remove(userId); if (blockds != null) { for (long blockId : blockds) { try { unlockBlock(blockId, userId); } catch (TException e) { throw Throwables.propagate(e); } } } } } } /** * Remove a block from the memory. * * @param blockId The block to be removed. * @return Removed file size in bytes. */ private long freeBlock(long blockId) { long freedFileBytes = 0; synchronized (mLatestBlockAccessTimeMs) { if (mBlockSizes.containsKey(blockId)) { mWorkerSpaceCounter.returnUsedBytes(mBlockSizes.get(blockId)); File srcFile = new File(CommonUtils.concat(mLocalDataFolder, blockId)); srcFile.delete(); mLatestBlockAccessTimeMs.remove(blockId); freedFileBytes = mBlockSizes.remove(blockId); mRemovedBlockList.add(blockId); mMemoryData.remove(blockId); LOG.info("Removed Data " + blockId); } else { LOG.warn("File " + blockId + " does not exist in memory."); } } return freedFileBytes; } /** * Remove blocks from the memory. * * <p>This is triggered when the worker heartbeats to the master, which sends a {@link * tachyon.thrift.Command} with type {@link tachyon.thrift.CommandType#Free} * * @param blocks The list of blocks to be removed. */ public void freeBlocks(List<Long> blocks) { for (long blockId : blocks) { freeBlock(blockId); } } /** * @return The root local data folder of the worker * @throws TException */ public String getDataFolder() throws TException { return mLocalDataFolder.toString(); } /** @return The orphans' folder in the under file system */ public String getUnderfsOrphansFolder() { return mUnderfsOrphansFolder; } /** * Get the local user temporary folder of the specified user. * * <p>This method is a wrapper around {@link tachyon.Users#getUserTempFolder(long)}, and as such * should be referentially transparent with {@link tachyon.Users#getUserTempFolder(long)}. In the * context of {@code this}, this call will output the result of path concat of {@link * #mLocalUserFolder} with the provided {@literal userId}. * * <p>This method differs from {@link #getUserUnderfsTempFolder(long)} in the context of where * write operations end up. This temp folder generated lives inside the tachyon file system, and * as such, will be stored in memory. * * @see tachyon.Users#getUserTempFolder(long) * @param userId The id of the user * @return The local user temporary folder of the specified user * @throws TException */ public String getUserTempFolder(long userId) throws TException { String ret = mUsers.getUserTempFolder(userId); LOG.info("Return UserTempFolder for " + userId + " : " + ret); return ret; } /** * Get the user temporary folder in the under file system of the specified user. * * <p>This method is a wrapper around {@link tachyon.Users#getUserUnderfsTempFolder(long)}, and as * such should be referentially transparent with {@link Users#getUserUnderfsTempFolder(long)}. In * the context of {@code this}, this call will output the result of path concat of {@link * #mUnderfsWorkerFolder} with the provided {@literal userId}. * * <p>This method differs from {@link #getUserTempFolder(long)} in the context of where write * operations end up. This temp folder generated lives inside the {@link tachyon.UnderFileSystem}, * and as such, will be stored remotely, most likely on disk. * * @param userId The id of the user * @return The user temporary folder in the under file system * @throws TException */ public String getUserUnderfsTempFolder(long userId) throws TException { String ret = mUsers.getUserUnderfsTempFolder(userId); LOG.info("Return UserHdfsTempFolder for " + userId + " : " + ret); return ret; } /** * Heartbeat with the TachyonMaster. Send the removed block list to the Master. * * @return The Command received from the Master * @throws BlockInfoException * @throws TException */ public Command heartbeat() throws BlockInfoException, TException { ArrayList<Long> sendRemovedPartitionList = new ArrayList<Long>(); while (mRemovedBlockList.size() > 0) { sendRemovedPartitionList.add(mRemovedBlockList.poll()); } return mMasterClient.worker_heartbeat( mWorkerId, mWorkerSpaceCounter.getUsedBytes(), sendRemovedPartitionList); } private void initializeWorkerStorage() throws IOException, FileDoesNotExistException, SuspectedFileSizeException, BlockInfoException, TException { LOG.info("Initializing the worker storage."); if (!mLocalDataFolder.exists()) { LOG.info("Local folder " + mLocalDataFolder + " does not exist. Creating a new one."); mLocalDataFolder.mkdirs(); mLocalUserFolder.mkdirs(); CommonUtils.changeLocalFilePermission(mLocalDataFolder.getPath(), "775"); CommonUtils.changeLocalFilePermission(mLocalUserFolder.getPath(), "775"); return; } if (!mLocalDataFolder.isDirectory()) { String tmp = "Data folder " + mLocalDataFolder + " is not a folder!"; LOG.error(tmp); throw new IllegalArgumentException(tmp); } if (mLocalUserFolder.exists()) { try { FileUtils.deleteDirectory(mLocalUserFolder); } catch (IOException e) { LOG.error(e.getMessage(), e); } } mLocalUserFolder.mkdir(); CommonUtils.changeLocalFilePermission(mLocalUserFolder.getPath(), "775"); mUnderfsOrphansFolder = mUnderfsWorkerFolder + "/orphans"; if (!mUnderFs.exists(mUnderfsOrphansFolder)) { mUnderFs.mkdirs(mUnderfsOrphansFolder, true); } int cnt = 0; for (File tFile : mLocalDataFolder.listFiles()) { if (tFile.isFile()) { cnt++; LOG.info("File " + cnt + ": " + tFile.getPath() + " with size " + tFile.length() + " Bs."); long blockId = CommonUtils.getBlockIdFromFileName(tFile.getName()); boolean success = mWorkerSpaceCounter.requestSpaceBytes(tFile.length()); try { addFoundBlock(blockId, tFile.length()); } catch (FileDoesNotExistException e) { LOG.error("BlockId: " + blockId + " becomes orphan for: \"" + e.message + "\""); LOG.info( "Swapout File " + cnt + ": blockId: " + blockId + " to " + mUnderfsOrphansFolder); swapoutOrphanBlocks(blockId, tFile); freeBlock(blockId); continue; } mAddedBlockList.add(blockId); if (!success) { throw new RuntimeException("Pre-existing files exceed the local memory capacity."); } } } } /** * Lock the block * * <p>Used internally to make sure blocks are unmodified, but also used in {@link * tachyon.client.TachyonFS} for cacheing blocks locally for users. When a user tries to read a * block ({@link tachyon.client.TachyonFile#readByteBuffer()}), the client will attempt to cache * the block on the local users's node, while the user is reading from the local block, the given * block is locked and unlocked once read. * * @param blockId The id of the block * @param userId The id of the user who locks the block * @throws TException */ public void lockBlock(long blockId, long userId) throws TException { synchronized (mUsersPerLockedBlock) { if (!mUsersPerLockedBlock.containsKey(blockId)) { mUsersPerLockedBlock.put(blockId, new HashSet<Long>()); } mUsersPerLockedBlock.get(blockId).add(userId); if (!mLockedBlocksPerUser.containsKey(userId)) { mLockedBlocksPerUser.put(userId, new HashSet<Long>()); } mLockedBlocksPerUser.get(userId).add(blockId); } } /** * Use local LRU to evict data, and get <code> requestBytes </code> available space. * * @param requestBytes The data requested. * @return <code> true </code> if the space is granted, <code> false </code> if not. */ private boolean memoryEvictionLRU(long requestBytes) { Set<Integer> pinList; try { pinList = mMasterClient.worker_getPinIdList(); } catch (TException e) { LOG.error(e.getMessage()); pinList = new HashSet<Integer>(); } synchronized (mLatestBlockAccessTimeMs) { synchronized (mUsersPerLockedBlock) { while (mWorkerSpaceCounter.getAvailableBytes() < requestBytes) { long blockId = -1; long latestTimeMs = Long.MAX_VALUE; for (Entry<Long, Long> entry : mLatestBlockAccessTimeMs.entrySet()) { if (entry.getValue() < latestTimeMs && !pinList.contains(BlockInfo.computeInodeId(entry.getKey()))) { if (!mUsersPerLockedBlock.containsKey(entry.getKey())) { blockId = entry.getKey(); latestTimeMs = entry.getValue(); } } } if (blockId != -1) { freeBlock(blockId); } else { return false; } } } } return true; } /** Register this TachyonWorker to the TachyonMaster */ public void register() { long id = 0; while (id == 0) { try { mMasterClient.connect(); id = mMasterClient.worker_register( mWorkerAddress, mWorkerSpaceCounter.getCapacityBytes(), mWorkerSpaceCounter.getUsedBytes(), new ArrayList<Long>(mMemoryData)); } catch (BlockInfoException e) { LOG.error(e.getMessage(), e); id = 0; CommonUtils.sleepMs(LOG, Constants.SECOND_MS); } catch (TException e) { LOG.error(e.getMessage(), e); id = 0; CommonUtils.sleepMs(LOG, Constants.SECOND_MS); } } mWorkerId = id; } /** * Request space from the worker * * @param userId The id of the user who send the request * @param requestBytes The requested space size, in bytes * @return true if succeed, false otherwise * @throws TException */ public boolean requestSpace(long userId, long requestBytes) throws TException { LOG.info( "requestSpace(" + userId + ", " + requestBytes + "): Current available: " + mWorkerSpaceCounter.getAvailableBytes() + " requested: " + requestBytes); if (mWorkerSpaceCounter.getCapacityBytes() < requestBytes) { LOG.info( "user_requestSpace(): requested memory size is larger than the total memory on" + " the machine."); return false; } while (!mWorkerSpaceCounter.requestSpaceBytes(requestBytes)) { if (!memoryEvictionLRU(requestBytes)) { return false; } } mUsers.addOwnBytes(userId, requestBytes); return true; } /** * Set a new MasterClient and connect to it. * * @throws TException */ public void resetMasterClient() throws TException { MasterClient tMasterClient = new MasterClient(mMasterAddress); tMasterClient.connect(); mMasterClient = tMasterClient; } /** * Return the space which has been requested * * @param userId The id of the user who wants to return the space * @param returnedBytes The returned space size, in bytes * @throws TException */ public void returnSpace(long userId, long returnedBytes) throws TException { long preAvailableBytes = mWorkerSpaceCounter.getAvailableBytes(); if (returnedBytes > mUsers.ownBytes(userId)) { LOG.error("User " + userId + " does not own " + returnedBytes + " bytes."); } else { mWorkerSpaceCounter.returnUsedBytes(returnedBytes); mUsers.addOwnBytes(userId, -returnedBytes); } LOG.info( "returnSpace(" + userId + ", " + returnedBytes + ") : " + preAvailableBytes + " returned: " + returnedBytes + ". New Available: " + mWorkerSpaceCounter.getAvailableBytes()); } /** Disconnect to the Master. */ public void stop() { mMasterClient.shutdown(); } /** * Swap out those blocks missing INode information onto underFS which can be retrieved by user * later. Its cleanup only happens while formating the TFS. */ private void swapoutOrphanBlocks(long blockId, File file) throws IOException { RandomAccessFile localFile = new RandomAccessFile(file, "r"); ByteBuffer buf = localFile.getChannel().map(MapMode.READ_ONLY, 0, file.length()); String ufsOrphanBlock = CommonUtils.concat(mUnderfsOrphansFolder, blockId); OutputStream os = mUnderFs.create(ufsOrphanBlock); int BULKSIZE = Constants.KB * 64; byte[] bulk = new byte[BULKSIZE]; for (int k = 0; k < (buf.limit() + BULKSIZE - 1) / BULKSIZE; k++) { int len = BULKSIZE < buf.remaining() ? BULKSIZE : buf.remaining(); buf.get(bulk, 0, len); os.write(bulk, 0, len); } os.close(); localFile.close(); } /** * Unlock the block * * <p>Used internally to make sure blocks are unmodified, but also used in {@link * tachyon.client.TachyonFS} for cacheing blocks locally for users. When a user tries to read a * block ({@link tachyon.client.TachyonFile#readByteBuffer()}), the client will attempt to cache * the block on the local users's node, while the user is reading from the local block, the given * block is locked and unlocked once read. * * @param blockId The id of the block * @param userId The id of the user who unlocks the block * @throws TException */ public void unlockBlock(long blockId, long userId) throws TException { synchronized (mUsersPerLockedBlock) { if (mUsersPerLockedBlock.containsKey(blockId)) { mUsersPerLockedBlock.get(blockId).remove(userId); if (mUsersPerLockedBlock.get(blockId).size() == 0) { mUsersPerLockedBlock.remove(blockId); } } if (mLockedBlocksPerUser.containsKey(userId)) { mLockedBlocksPerUser.get(userId).remove(blockId); } } } /** * Handle the user's heartbeat. * * @param userId The id of the user * @throws TException */ public void userHeartbeat(long userId) throws TException { mUsers.userHeartbeat(userId); } }
@Override public void run() { while (true) { try { int fileId = -1; synchronized (mDependencyLock) { fileId = getFileIdBasedOnPriorityDependency(); if (fileId == -1) { if (mPriorityDependencies.size() == 0) { mPriorityDependencies = getSortedPriorityDependencyList(); if (!mPriorityDependencies.isEmpty()) { LOG.info( "Get new mPriorityDependencies " + CommonUtils.listToString(mPriorityDependencies)); } } else { List<Integer> tList = getSortedPriorityDependencyList(); boolean equal = true; if (mPriorityDependencies.size() != tList.size()) { equal = false; } if (equal) { for (int k = 0; k < tList.size(); k++) { if (tList.get(k) != mPriorityDependencies.get(k)) { equal = false; break; } } } if (!equal) { mPriorityDependencies = tList; } } fileId = getFileIdBasedOnPriorityDependency(); } if (fileId == -1) { fileId = getRandomUncheckpointedFile(); } } if (fileId == -1) { LOG.debug("Thread " + ID + " has nothing to checkpoint. Sleep for 1 sec."); CommonUtils.sleepMs(LOG, Constants.SECOND_MS); continue; } // TODO checkpoint process. In future, move from midPath to dstPath should be done by // master String midPath = CommonUtils.concat(mUnderfsWorkerDataFolder, fileId); String dstPath = CommonUtils.concat(CommonConf.get().UNDERFS_DATA_FOLDER, fileId); LOG.info( "Thread " + ID + " is checkpointing file " + fileId + " from " + mLocalDataFolder.toString() + " to " + midPath + " to " + dstPath); if (mCheckpointUnderFs == null) { mCheckpointUnderFs = UnderFileSystem.get(midPath); } long startCopyTimeMs = System.currentTimeMillis(); ClientFileInfo fileInfo = mMasterClient.getClientFileInfoById(fileId); if (!fileInfo.isComplete) { LOG.error("File " + fileInfo + " is not complete!"); continue; } for (int k = 0; k < fileInfo.blockIds.size(); k++) { lockBlock(fileInfo.blockIds.get(k), Users.sCHECKPOINT_USER_ID); } OutputStream os = mCheckpointUnderFs.create(midPath, (int) fileInfo.getBlockSizeByte()); long fileSizeByte = 0; for (int k = 0; k < fileInfo.blockIds.size(); k++) { File tempFile = new File(CommonUtils.concat(mLocalDataFolder.toString(), fileInfo.blockIds.get(k))); fileSizeByte += tempFile.length(); InputStream is = new FileInputStream(tempFile); byte[] buf = new byte[16 * Constants.KB]; int got = is.read(buf); while (got != -1) { os.write(buf, 0, got); got = is.read(buf); } is.close(); } os.close(); if (!mCheckpointUnderFs.rename(midPath, dstPath)) { LOG.error("Failed to rename from " + midPath + " to " + dstPath); } mMasterClient.addCheckpoint(mWorkerId, fileId, fileSizeByte, dstPath); for (int k = 0; k < fileInfo.blockIds.size(); k++) { unlockBlock(fileInfo.blockIds.get(k), Users.sCHECKPOINT_USER_ID); } long shouldTakeMs = (long) (1000.0 * fileSizeByte / Constants.MB / WorkerConf.get().WORKER_PER_THREAD_CHECKPOINT_CAP_MB_SEC); long currentTimeMs = System.currentTimeMillis(); if (startCopyTimeMs + shouldTakeMs > currentTimeMs) { long shouldSleepMs = startCopyTimeMs + shouldTakeMs - currentTimeMs; LOG.info( "Checkpointed last file " + fileId + " took " + (currentTimeMs - startCopyTimeMs) + " ms. Need to sleep " + shouldSleepMs + " ms."); CommonUtils.sleepMs(LOG, shouldSleepMs); } } catch (FileDoesNotExistException e) { LOG.warn(e); } catch (SuspectedFileSizeException e) { LOG.error(e); } catch (BlockInfoException e) { LOG.error(e); } catch (IOException e) { LOG.error(e); } catch (TException e) { LOG.warn(e); } } }