@Override public void run() { long lastHeartbeatMs = System.currentTimeMillis(); Command cmd = null; while (!mStop) { long diff = System.currentTimeMillis() - lastHeartbeatMs; if (diff < WorkerConf.get().TO_MASTER_HEARTBEAT_INTERVAL_MS) { LOG.debug("Heartbeat process takes {} ms.", diff); CommonUtils.sleepMs(LOG, WorkerConf.get().TO_MASTER_HEARTBEAT_INTERVAL_MS - diff); } else { LOG.error("Heartbeat process takes " + diff + " ms."); } try { cmd = mWorkerStorage.heartbeat(); lastHeartbeatMs = System.currentTimeMillis(); } catch (IOException e) { LOG.error(e.getMessage(), e); mWorkerStorage.resetMasterClient(); CommonUtils.sleepMs(LOG, Constants.SECOND_MS); cmd = null; if (System.currentTimeMillis() - lastHeartbeatMs >= WorkerConf.get().HEARTBEAT_TIMEOUT_MS) { throw new RuntimeException( "Timebeat timeout " + (System.currentTimeMillis() - lastHeartbeatMs) + "ms"); } } if (cmd != null) { switch (cmd.mCommandType) { case Unknown: LOG.error("Unknown command: " + cmd); break; case Nothing: LOG.debug("Nothing command: {}", cmd); break; case Register: LOG.info("Register command: " + cmd); mWorkerStorage.register(); break; case Free: mWorkerStorage.freeBlocks(cmd.mData); LOG.info("Free command: " + cmd); break; case Delete: LOG.info("Delete command: " + cmd); break; default: throw new RuntimeException("Un-recognized command from master " + cmd.toString()); } } mWorkerStorage.checkStatus(); } }
private void swapoutOrphanBlocksFileTestUtil(int filesize) throws Exception { int fid = TestUtils.createByteFile(mTfs, "/xyz", WriteType.MUST_CACHE, filesize); long bid = mTfs.getBlockId(fid, 0); mLocalTachyonCluster.stopWorker(); mTfs.delete(fid, true); WorkerStorage ws = new WorkerStorage(mMasterAddress, mWorkerAddress, mWorkerDataFolder, WORKER_CAPACITY_BYTES); File orphanblock = new File(ws.getUnderfsOrphansFolder() + "/" + bid); Assert.assertFalse( "Orphan block file isn't deleted from workerDataFolder", new File(mWorkerDataFolder + "/" + bid).exists()); Assert.assertTrue("UFS hasn't the orphan block file ", orphanblock.exists()); Assert.assertTrue("Orpahblock file size is changed", orphanblock.length() == filesize); }
/** * @param masterAddress The TachyonMaster's address. * @param workerAddress This TachyonWorker's address. * @param dataPort This TachyonWorker's data server's port * @param selectorThreads The number of selector threads of the worker's thrift server * @param acceptQueueSizePerThreads The accept queue size per thread of the worker's thrift server * @param workerThreads The number of threads of the worker's thrift server * @param dataFolder This TachyonWorker's local folder's path * @param memoryCapacityBytes The maximum memory space this TachyonWorker can use, in bytes */ private TachyonWorker( InetSocketAddress masterAddress, InetSocketAddress workerAddress, int dataPort, int selectorThreads, int acceptQueueSizePerThreads, int workerThreads, String dataFolder, long memoryCapacityBytes) { CommonConf.assertValidPort(masterAddress); CommonConf.assertValidPort(workerAddress); CommonConf.assertValidPort(dataPort); mMasterAddress = masterAddress; mWorkerStorage = new WorkerStorage(mMasterAddress, dataFolder, memoryCapacityBytes, mExecutorService); mWorkerServiceHandler = new WorkerServiceHandler(mWorkerStorage); // Extract the port from the generated socket. // When running tests, its great to use port '0' so the system will figure out what port to use // (any random free port). // In a production or any real deployment setup, port '0' should not be used as it will make // deployment more complicated. InetSocketAddress dataAddress = new InetSocketAddress(workerAddress.getHostName(), dataPort); BlocksLocker blockLocker = new BlocksLocker(mWorkerStorage, Users.DATASERVER_USER_ID); mDataServer = createDataServer(dataAddress, blockLocker); mDataPort = mDataServer.getPort(); mHeartbeatThread = new Thread(this); try { LOG.info("Tachyon Worker version " + Version.VERSION + " tries to start @ " + workerAddress); WorkerService.Processor<WorkerServiceHandler> processor = new WorkerService.Processor<WorkerServiceHandler>(mWorkerServiceHandler); mServerTNonblockingServerSocket = new TNonblockingServerSocket(workerAddress); mPort = NetworkUtils.getPort(mServerTNonblockingServerSocket); mServer = new TThreadedSelectorServer( new TThreadedSelectorServer.Args(mServerTNonblockingServerSocket) .processor(processor) .selectorThreads(selectorThreads) .acceptQueueSizePerThread(acceptQueueSizePerThreads) .workerThreads(workerThreads)); } catch (TTransportException e) { LOG.error(e.getMessage(), e); throw Throwables.propagate(e); } mWorkerAddress = new NetAddress(workerAddress.getAddress().getCanonicalHostName(), mPort, mDataPort); mWorkerStorage.initialize(mWorkerAddress); }
/** * Stop this TachyonWorker. Stop all the threads belong to this TachyonWorker. * * @throws IOException * @throws InterruptedException */ public void stop() throws IOException, InterruptedException { mStop = true; mWorkerStorage.stop(); mDataServer.close(); mServer.stop(); mServerTNonblockingServerSocket.close(); mExecutorService.shutdown(); while (!mDataServer.isClosed() || mServer.isServing() || mHeartbeatThread.isAlive()) { // TODO The reason to stop and close again is due to some issues in Thrift. mServer.stop(); mServerTNonblockingServerSocket.close(); CommonUtils.sleepMs(null, 100); } mHeartbeatThread.join(); }