/** * Open the connection to the worker. And start the heartbeat thread. * * @return true if succeed, false otherwise * @throws IOException */ private synchronized boolean connect() throws IOException { if (!mConnected) { NetAddress workerNetAddress = null; try { String localHostName = NetworkAddressUtils.getLocalHostName(mTachyonConf); LOG.info("Trying to get local worker host : " + localHostName); workerNetAddress = mMasterClient.user_getWorker(false, localHostName); mIsLocal = workerNetAddress .getMHost() .equals(InetAddress.getByName(localHostName).getHostAddress()); } catch (NoWorkerException e) { LOG.info(e.getMessage()); workerNetAddress = null; } catch (UnknownHostException e) { LOG.info(e.getMessage()); workerNetAddress = null; } if (workerNetAddress == null) { try { workerNetAddress = mMasterClient.user_getWorker(true, ""); } catch (NoWorkerException e) { LOG.info("No worker running in the system: " + e.getMessage()); mClient = null; return false; } } String host = NetworkAddressUtils.getFqdnHost(workerNetAddress); int port = workerNetAddress.mPort; mWorkerAddress = new InetSocketAddress(host, port); mWorkerDataServerAddress = new InetSocketAddress(host, workerNetAddress.mSecondaryPort); LOG.info("Connecting " + (mIsLocal ? "local" : "remote") + " worker @ " + mWorkerAddress); mProtocol = new TBinaryProtocol(new TFramedTransport(new TSocket(host, port))); mClient = new WorkerService.Client(mProtocol); mHeartbeatExecutor = new WorkerClientHeartbeatExecutor(this, mMasterClient.getUserId()); String threadName = "worker-heartbeat-" + mWorkerAddress; int interval = mTachyonConf.getInt(Constants.USER_HEARTBEAT_INTERVAL_MS, Constants.SECOND_MS); mHeartbeat = mExecutorService.submit(new HeartbeatThread(threadName, mHeartbeatExecutor, interval)); try { mProtocol.getTransport().open(); } catch (TTransportException e) { LOG.error(e.getMessage(), e); return false; } mConnected = true; } return mConnected; }
/** * Use local LRU to evict data, and get <code> requestBytes </code> available space. * * @param requestBytes The data requested. * @return <code> true </code> if the space is granted, <code> false </code> if not. */ private boolean memoryEvictionLRU(long requestBytes) { Set<Integer> pinList; try { pinList = mMasterClient.worker_getPinIdList(); } catch (TException e) { LOG.error(e.getMessage()); pinList = new HashSet<Integer>(); } synchronized (mLatestBlockAccessTimeMs) { synchronized (mUsersPerLockedBlock) { while (mWorkerSpaceCounter.getAvailableBytes() < requestBytes) { long blockId = -1; long latestTimeMs = Long.MAX_VALUE; for (Entry<Long, Long> entry : mLatestBlockAccessTimeMs.entrySet()) { if (entry.getValue() < latestTimeMs && !pinList.contains(BlockInfo.computeInodeId(entry.getKey()))) { if (!mUsersPerLockedBlock.containsKey(entry.getKey())) { blockId = entry.getKey(); latestTimeMs = entry.getValue(); } } } if (blockId != -1) { freeBlock(blockId); } else { return false; } } } } return true; }
/** * Heartbeat with the TachyonMaster. Send the removed block list to the Master. * * @return The Command received from the Master * @throws BlockInfoException * @throws TException */ public Command heartbeat() throws BlockInfoException, TException { ArrayList<Long> sendRemovedPartitionList = new ArrayList<Long>(); while (mRemovedBlockList.size() > 0) { sendRemovedPartitionList.add(mRemovedBlockList.poll()); } return mMasterClient.worker_heartbeat( mWorkerId, mWorkerSpaceCounter.getUsedBytes(), sendRemovedPartitionList); }
/** * Unlock the block * * @param blockId The id of the block * @return true if success, false otherwise * @throws IOException */ public synchronized boolean unlockBlock(long blockId) throws IOException { mustConnect(); try { return mClient.unlockBlock(blockId, mMasterClient.getUserId()); } catch (TException e) { mConnected = false; throw new IOException(e); } }
/** * Get the user temporary folder in the under file system of the specified user. * * @return The user temporary folder in the under file system * @throws IOException */ public synchronized String getUserUfsTempFolder() throws IOException { mustConnect(); try { return mClient.getUserUfsTempFolder(mMasterClient.getUserId()); } catch (TException e) { mConnected = false; throw new IOException(e); } }
/** * Notify worker that the block has been cancelled * * @param blockId The Id of the block to be cancelled * @throws IOException */ public synchronized void cancelBlock(long blockId) throws IOException { mustConnect(); try { mClient.cancelBlock(mMasterClient.getUserId(), blockId); } catch (TException e) { mConnected = false; throw new IOException(e); } }
/** * Lock the block, therefore, the worker will not evict the block from the memory until it is * unlocked. * * @param blockId The id of the block * @return the path of the block file locked * @throws IOException */ public synchronized String lockBlock(long blockId) throws IOException { mustConnect(); try { return mClient.lockBlock(blockId, mMasterClient.getUserId()); } catch (FileDoesNotExistException e) { return null; } catch (TException e) { mConnected = false; throw new IOException(e); } }
/** Register this TachyonWorker to the TachyonMaster */ public void register() { long id = 0; while (id == 0) { try { mMasterClient.connect(); id = mMasterClient.worker_register( mWorkerAddress, mWorkerSpaceCounter.getCapacityBytes(), mWorkerSpaceCounter.getUsedBytes(), new ArrayList<Long>(mMemoryData)); } catch (BlockInfoException e) { LOG.error(e.getMessage(), e); id = 0; CommonUtils.sleepMs(LOG, Constants.SECOND_MS); } catch (TException e) { LOG.error(e.getMessage(), e); id = 0; CommonUtils.sleepMs(LOG, Constants.SECOND_MS); } } mWorkerId = id; }
/** * Notify the worker the block is cached. * * @param blockId The id of the block * @throws IOException */ public synchronized void cacheBlock(long blockId) throws IOException { mustConnect(); try { mClient.cacheBlock(mMasterClient.getUserId(), blockId); } catch (FileDoesNotExistException e) { throw new IOException(e); } catch (BlockInfoException e) { throw new IOException(e); } catch (TException e) { mConnected = false; throw new IOException(e); } }
/** * Request space for some block from worker * * @param blockId The id of the block * @param requestBytes The requested space size, in bytes * @return true if success, false otherwise * @throws IOException */ public synchronized boolean requestSpace(long blockId, long requestBytes) throws IOException { mustConnect(); try { return mClient.requestSpace(mMasterClient.getUserId(), blockId, requestBytes); } catch (OutOfSpaceException e) { return false; } catch (FileDoesNotExistException e) { throw new IOException(e); } catch (TException e) { mConnected = false; throw new IOException(e); } }
/** * Get temporary path for the block from the worker * * @param blockId The id of the block * @param initialBytes The initial size bytes allocated for the block * @return the temporary path of the block * @throws IOException */ public synchronized String requestBlockLocation(long blockId, long initialBytes) throws IOException { mustConnect(); try { return mClient.requestBlockLocation(mMasterClient.getUserId(), blockId, initialBytes); } catch (OutOfSpaceException e) { throw new IOException(e); } catch (FileAlreadyExistException e) { throw new IOException(e); } catch (TException e) { mConnected = false; throw new IOException(e); } }
/** * Notify the worker to checkpoint the file asynchronously. * * @param fileId The id of the file * @return true if succeed, false otherwise * @throws IOException * @throws TException */ public boolean asyncCheckpoint(int fileId) throws IOException, TException { ClientFileInfo fileInfo = mMasterClient.getClientFileInfoById(fileId); if (fileInfo.getDependencyId() != -1) { synchronized (mDependencyLock) { mUncheckpointFiles.add(fileId); if (!mDepIdToFiles.containsKey(fileInfo.getDependencyId())) { mDepIdToFiles.put(fileInfo.getDependencyId(), new HashSet<Integer>()); } mDepIdToFiles.get(fileInfo.getDependencyId()).add(fileId); } return true; } return false; }
/** * Notify the worker the block is cached. * * <p>This call is called remotely from {@link tachyon.client.TachyonFS#cacheBlock(long)} which is * only ever called from {@link tachyon.client.BlockOutStream#close()} (though its a public api so * anyone could call it). There are a few interesting preconditions for this to work. * * <p>1) Client process writes to files locally under a tachyon defined temp directory. 2) Worker * process is on the same node as the client 3) Client is talking to the local worker directly * * <p>If all conditions are true, then and only then can this method ever be called; all * operations work on local files. * * @param userId The user id of the client who send the notification * @param blockId The id of the block * @throws FileDoesNotExistException * @throws SuspectedFileSizeException * @throws BlockInfoException * @throws TException */ public void cacheBlock(long userId, long blockId) throws FileDoesNotExistException, SuspectedFileSizeException, BlockInfoException, TException { File srcFile = new File(CommonUtils.concat(getUserTempFolder(userId), blockId)); File dstFile = new File(CommonUtils.concat(mLocalDataFolder, blockId)); long fileSizeBytes = srcFile.length(); if (!srcFile.exists()) { throw new FileDoesNotExistException("File " + srcFile + " does not exist."); } if (!srcFile.renameTo(dstFile)) { throw new FileDoesNotExistException( "Failed to rename file from " + srcFile.getPath() + " to " + dstFile.getPath()); } addBlockId(blockId, fileSizeBytes); mUsers.addOwnBytes(userId, -fileSizeBytes); mMasterClient.worker_cacheBlock( mWorkerId, mWorkerSpaceCounter.getUsedBytes(), blockId, fileSizeBytes); LOG.info(userId + " " + dstFile); }
/** * Notify the worker that the checkpoint file of the file has been added. * * @param fileId The id of the checkpointed file * @throws IOException */ public synchronized void addCheckpoint(int fileId) throws IOException { mustConnect(); try { mClient.addCheckpoint(mMasterClient.getUserId(), fileId); } catch (FileDoesNotExistException e) { throw new IOException(e); } catch (SuspectedFileSizeException e) { throw new IOException(e); } catch (FailedToCheckpointException e) { throw new IOException(e); } catch (BlockInfoException e) { throw new IOException(e); } catch (TException e) { mConnected = false; throw new IOException(e); } }
/** * Add the checkpoint information of a file. The information is from the user <code>userId</code>. * * <p>This method is normally triggered from {@link tachyon.client.FileOutStream#close()} if and * only if {@link tachyon.client.WriteType#isThrough()} is true. The current implementation of * checkpointing is that through {@link tachyon.client.WriteType} operations write to {@link * tachyon.UnderFileSystem} on the client's write path, but under a user temp directory (temp * directory is defined in the worker as {@link #getUserUnderfsTempFolder(long)}). * * @param userId The user id of the client who send the notification * @param fileId The id of the checkpointed file * @throws FileDoesNotExistException * @throws SuspectedFileSizeException * @throws FailedToCheckpointException * @throws BlockInfoException * @throws TException */ public void addCheckpoint(long userId, int fileId) throws FileDoesNotExistException, SuspectedFileSizeException, FailedToCheckpointException, BlockInfoException, TException { // TODO This part need to be changed. String srcPath = CommonUtils.concat(getUserUnderfsTempFolder(userId), fileId); String dstPath = CommonUtils.concat(COMMON_CONF.UNDERFS_DATA_FOLDER, fileId); try { if (!mUnderFs.rename(srcPath, dstPath)) { throw new FailedToCheckpointException("Failed to rename " + srcPath + " to " + dstPath); } } catch (IOException e) { throw new FailedToCheckpointException("Failed to rename " + srcPath + " to " + dstPath); } long fileSize; try { fileSize = mUnderFs.getFileSize(dstPath); } catch (IOException e) { throw new FailedToCheckpointException("Failed to getFileSize " + dstPath); } mMasterClient.addCheckpoint(mWorkerId, fileId, fileSize, dstPath); }
/** * Set a new MasterClient and connect to it. * * @throws TException */ public void resetMasterClient() throws TException { MasterClient tMasterClient = new MasterClient(mMasterAddress); tMasterClient.connect(); mMasterClient = tMasterClient; }
/** Disconnect to the Master. */ public void stop() { mMasterClient.shutdown(); }
private void addFoundBlock(long blockId, long length) throws FileDoesNotExistException, SuspectedFileSizeException, BlockInfoException, TException { addBlockId(blockId, length); mMasterClient.worker_cacheBlock(mWorkerId, mWorkerSpaceCounter.getUsedBytes(), blockId, length); }