/** * @param master the master to apply the journal entries to * @param journal the journal to tail */ public JournalTailerThread(Master master, Journal journal) { mMaster = Preconditions.checkNotNull(master); mJournal = Preconditions.checkNotNull(journal); TachyonConf conf = MasterContext.getConf(); mShutdownQuietWaitTimeMs = conf.getInt(Constants.MASTER_JOURNAL_TAILER_SHUTDOWN_QUIET_WAIT_TIME_MS); mJournalTailerSleepTimeMs = conf.getInt(Constants.MASTER_JOURNAL_TAILER_SLEEP_TIME_MS); }
@Override public void close() throws IOException { int quietPeriodSecs = mTachyonConf.getInt(Constants.WORKER_NETWORK_NETTY_SHUTDOWN_QUIET_PERIOD); int timeoutSecs = mTachyonConf.getInt(Constants.WORKER_NETWORK_NETTY_SHUTDOWN_TIMEOUT); // TODO(binfan): investigate when timeoutSecs is zero (e.g., set in integration tests), does // this still complete successfully. mChannelFuture.channel().close().awaitUninterruptibly(timeoutSecs, TimeUnit.SECONDS); mBootstrap.group().shutdownGracefully(quietPeriodSecs, timeoutSecs, TimeUnit.SECONDS); mBootstrap.childGroup().shutdownGracefully(quietPeriodSecs, timeoutSecs, TimeUnit.SECONDS); }
/** * Helper method to create a {@link org.apache.thrift.server.TThreadPoolServer} for handling * incoming RPC requests. * * @return a thrift server */ private TThreadPoolServer createThriftServer() { int minWorkerThreads = mTachyonConf.getInt(Constants.WORKER_WORKER_BLOCK_THREADS_MIN); int maxWorkerThreads = mTachyonConf.getInt(Constants.WORKER_WORKER_BLOCK_THREADS_MAX); BlockWorkerClientService.Processor<BlockWorkerClientServiceHandler> processor = new BlockWorkerClientService.Processor<BlockWorkerClientServiceHandler>(mServiceHandler); TTransportFactory tTransportFactory; try { tTransportFactory = AuthenticationUtils.getServerTransportFactory(mTachyonConf); } catch (IOException ioe) { throw Throwables.propagate(ioe); } Args args = new TThreadPoolServer.Args(mThriftServerSocket) .minWorkerThreads(minWorkerThreads) .maxWorkerThreads(maxWorkerThreads) .processor(processor) .transportFactory(tTransportFactory) .protocolFactory(new TBinaryProtocol.Factory(true, true)); if (WorkerContext.getConf().getBoolean(Constants.IN_TEST_MODE)) { args.stopTimeoutVal = 0; } else { args.stopTimeoutVal = Constants.THRIFT_STOP_TIMEOUT_SECONDS; } return new TThreadPoolServer(args); }
/** * Helper method to create a {@link org.apache.thrift.server.TThreadPoolServer} for handling * incoming RPC requests. * * @return a thrift server */ private TThreadPoolServer createThriftServer() { int minWorkerThreads = mTachyonConf.getInt(Constants.WORKER_WORKER_BLOCK_THREADS_MIN); int maxWorkerThreads = mTachyonConf.getInt(Constants.WORKER_WORKER_BLOCK_THREADS_MAX); TMultiplexedProcessor processor = new TMultiplexedProcessor(); registerServices(processor, mBlockWorker.getServices()); registerServices(processor, mFileSystemWorker.getServices()); // register additional workers for RPC service for (Worker worker : mAdditionalWorkers) { registerServices(processor, worker.getServices()); } // Return a TTransportFactory based on the authentication type TTransportFactory tTransportFactory; try { tTransportFactory = AuthenticationUtils.getServerTransportFactory(mTachyonConf); } catch (IOException e) { throw Throwables.propagate(e); } TThreadPoolServer.Args args = new TThreadPoolServer.Args(mThriftServerSocket) .minWorkerThreads(minWorkerThreads) .maxWorkerThreads(maxWorkerThreads) .processor(processor) .transportFactory(tTransportFactory) .protocolFactory(new TBinaryProtocol.Factory(true, true)); if (WorkerContext.getConf().getBoolean(Constants.IN_TEST_MODE)) { args.stopTimeoutVal = 0; } else { args.stopTimeoutVal = Constants.THRIFT_STOP_TIMEOUT_SECONDS; } return new TThreadPoolServer(args); }
/** * Creates a default {@link io.netty.bootstrap.ServerBootstrap} where the channel and groups are * preset. * * @param type the channel type; current channel types supported are nio and epoll * @return an instance of {@code ServerBootstrap} */ private ServerBootstrap createBootstrapOfType(final ChannelType type) { final ServerBootstrap boot = new ServerBootstrap(); final int bossThreadCount = mTachyonConf.getInt(Constants.WORKER_NETWORK_NETTY_BOSS_THREADS); // If number of worker threads is 0, Netty creates (#processors * 2) threads by default. final int workerThreadCount = mTachyonConf.getInt(Constants.WORKER_NETWORK_NETTY_WORKER_THREADS); final EventLoopGroup bossGroup = NettyUtils.createEventLoop(type, bossThreadCount, "data-server-boss-%d", false); final EventLoopGroup workerGroup = NettyUtils.createEventLoop(type, workerThreadCount, "data-server-worker-%d", false); final Class<? extends ServerChannel> socketChannelClass = NettyUtils.getServerChannelClass(type); boot.group(bossGroup, workerGroup).channel(socketChannelClass); return boot; }
/** Starts the lineage worker service. */ public void start() { mFilePersistenceService = getExecutorService() .submit( new HeartbeatThread( HeartbeatContext.WORKER_FILESYSTEM_MASTER_SYNC, new FileWorkerMasterSyncExecutor( mFileDataManager, mFileSystemMasterWorkerClient), mTachyonConf.getInt(Constants.WORKER_FILESYSTEM_HEARTBEAT_INTERVAL_MS))); }
@Test public void freeTest() throws IOException, TException { TachyonFile file = TachyonFSTestUtils.createByteFile( mTfs, "/testFile", TachyonStorageType.STORE, UnderStorageType.NO_PERSIST, 10); mFsShell.run(new String[] {"free", "/testFile"}); TachyonConf tachyonConf = mLocalTachyonCluster.getMasterTachyonConf(); CommonUtils.sleepMs(tachyonConf.getInt(Constants.WORKER_TO_MASTER_HEARTBEAT_INTERVAL_MS)); Assert.assertFalse(mTfs.getInfo(file).getInMemoryPercentage() == 100); }
/** * Open the connection to the worker. And start the heartbeat thread. * * @return true if succeed, false otherwise * @throws IOException */ private synchronized boolean connect() throws IOException { if (!mConnected) { NetAddress workerNetAddress = null; try { String localHostName = NetworkAddressUtils.getLocalHostName(mTachyonConf); LOG.info("Trying to get local worker host : " + localHostName); workerNetAddress = mMasterClient.user_getWorker(false, localHostName); mIsLocal = workerNetAddress .getMHost() .equals(InetAddress.getByName(localHostName).getHostAddress()); } catch (NoWorkerException e) { LOG.info(e.getMessage()); workerNetAddress = null; } catch (UnknownHostException e) { LOG.info(e.getMessage()); workerNetAddress = null; } if (workerNetAddress == null) { try { workerNetAddress = mMasterClient.user_getWorker(true, ""); } catch (NoWorkerException e) { LOG.info("No worker running in the system: " + e.getMessage()); mClient = null; return false; } } String host = NetworkAddressUtils.getFqdnHost(workerNetAddress); int port = workerNetAddress.mPort; mWorkerAddress = new InetSocketAddress(host, port); mWorkerDataServerAddress = new InetSocketAddress(host, workerNetAddress.mSecondaryPort); LOG.info("Connecting " + (mIsLocal ? "local" : "remote") + " worker @ " + mWorkerAddress); mProtocol = new TBinaryProtocol(new TFramedTransport(new TSocket(host, port))); mClient = new WorkerService.Client(mProtocol); mHeartbeatExecutor = new WorkerClientHeartbeatExecutor(this, mMasterClient.getUserId()); String threadName = "worker-heartbeat-" + mWorkerAddress; int interval = mTachyonConf.getInt(Constants.USER_HEARTBEAT_INTERVAL_MS, Constants.SECOND_MS); mHeartbeat = mExecutorService.submit(new HeartbeatThread(threadName, mHeartbeatExecutor, interval)); try { mProtocol.getTransport().open(); } catch (TTransportException e) { LOG.error(e.getMessage(), e); return false; } mConnected = true; } return mConnected; }
private void initBlockMetadataManager(TachyonConf tachyonConf) throws AlreadyExistsException, IOException, OutOfSpaceException { // Initialize storage tiers int totalTiers = tachyonConf.getInt(Constants.WORKER_MAX_TIERED_STORAGE_LEVEL, 1); mAliasToTiers = new HashMap<Integer, StorageTier>(totalTiers); mTiers = new ArrayList<StorageTier>(totalTiers); for (int level = 0; level < totalTiers; level++) { StorageTier tier = StorageTier.newStorageTier(tachyonConf, level); mTiers.add(tier); mAliasToTiers.put(tier.getTierAlias(), tier); } }
@Before public final void before() throws Exception { TachyonConf tachyonConf = MasterContext.getConf(); tachyonConf.set(Constants.USER_FILE_BUFFER_BYTES, String.valueOf(100)); mLocalTachyonCluster = new LocalTachyonCluster(MEM_CAPACITY_BYTES, USER_QUOTA_UNIT_BYTES, Constants.GB); mLocalTachyonCluster.start(); mTFS = mLocalTachyonCluster.getClient(); mWorkerConf = mLocalTachyonCluster.getWorkerTachyonConf(); mWorkerToMasterHeartbeatIntervalMs = mWorkerConf.getInt(Constants.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS); mSetPinned = new SetStateOptions.Builder(mWorkerConf).setPinned(true).build(); mSetUnpinned = new SetStateOptions.Builder(mWorkerConf).setPinned(false).build(); }
/** * Starts the Tachyon worker server. * * @throws Exception if the workers fail to start */ public void start() throws Exception { // NOTE: the order to start different services is sensitive. If you change it, do it cautiously. // Start serving metrics system, this will not block mWorkerMetricsSystem.start(); // Start serving the web server, this will not block // Requirement: metrics system started so we could add the metrics servlet to the web server // Consequence: when starting webserver, the webport will be updated. mWebServer.addHandler(mWorkerMetricsSystem.getServletHandler()); mWebServer.startWebServer(); // Set updated net address for this worker in context // Requirement: RPC, web, and dataserver ports are updated // Consequence: create a NetAddress object and set it into WorkerContext mNetAddress = new NetAddress( NetworkAddressUtils.getConnectHost(ServiceType.WORKER_RPC, mTachyonConf), mTachyonConf.getInt(Constants.WORKER_RPC_PORT), getDataLocalPort(), mTachyonConf.getInt(Constants.WORKER_WEB_PORT)); WorkerContext.setWorkerNetAddress(mNetAddress); // Start each worker // Requirement: NetAddress set in WorkerContext, so block worker can initialize BlockMasterSync // Consequence: worker id is granted startWorkers(); LOG.info("Started worker with id {}", WorkerIdRegistry.getWorkerId()); mIsServingRPC = true; // Start serving RPC, this will block LOG.info("Tachyon Worker version {} started @ {}", Version.VERSION, mWorkerAddress); mThriftServer.serve(); LOG.info("Tachyon Worker version {} ended @ {}", Version.VERSION, mWorkerAddress); }
private ServerBootstrap createBootstrap() { final ServerBootstrap boot = createBootstrapOfType( mTachyonConf.getEnum(Constants.WORKER_NETWORK_NETTY_CHANNEL, ChannelType.class)); // use pooled buffers boot.option(ChannelOption.ALLOCATOR, PooledByteBufAllocator.DEFAULT); boot.childOption(ChannelOption.ALLOCATOR, PooledByteBufAllocator.DEFAULT); // set write buffer // this is the default, but its recommended to set it in case of change in future netty. boot.childOption( ChannelOption.WRITE_BUFFER_HIGH_WATER_MARK, (int) mTachyonConf.getBytes(Constants.WORKER_NETWORK_NETTY_WATERMARK_HIGH)); boot.childOption( ChannelOption.WRITE_BUFFER_LOW_WATER_MARK, (int) mTachyonConf.getBytes(Constants.WORKER_NETWORK_NETTY_WATERMARK_LOW)); // more buffer settings on Netty socket option, one can tune them by specifying // properties, e.g.: // tachyon.worker.network.netty.backlog=50 // tachyon.worker.network.netty.buffer.send=64KB // tachyon.worker.network.netty.buffer.receive=64KB if (mTachyonConf.containsKey(Constants.WORKER_NETWORK_NETTY_BACKLOG)) { boot.option( ChannelOption.SO_BACKLOG, mTachyonConf.getInt(Constants.WORKER_NETWORK_NETTY_BACKLOG)); } if (mTachyonConf.containsKey(Constants.WORKER_NETWORK_NETTY_BUFFER_SEND)) { boot.option( ChannelOption.SO_SNDBUF, (int) mTachyonConf.getBytes(Constants.WORKER_NETWORK_NETTY_BUFFER_SEND)); } if (mTachyonConf.containsKey(Constants.WORKER_NETWORK_NETTY_BUFFER_RECEIVE)) { boot.option( ChannelOption.SO_RCVBUF, (int) mTachyonConf.getBytes(Constants.WORKER_NETWORK_NETTY_BUFFER_RECEIVE)); } return boot; }
@Override public void heartbeat() { LOG.debug("System status checking."); TachyonConf conf = MasterContext.getConf(); int masterWorkerTimeoutMs = conf.getInt(Constants.MASTER_WORKER_TIMEOUT_MS); synchronized (mBlocks) { synchronized (mWorkers) { Iterator<MasterWorkerInfo> iter = mWorkers.iterator(); while (iter.hasNext()) { MasterWorkerInfo worker = iter.next(); final long lastUpdate = CommonUtils.getCurrentMs() - worker.getLastUpdatedTimeMs(); if (lastUpdate > masterWorkerTimeoutMs) { LOG.error("The worker {} got timed out!", worker); mLostWorkers.add(worker); iter.remove(); processLostWorker(worker); } } } } }
@Test public void readMultiFiles() throws IOException, TException { final int length = WORKER_CAPACITY_BYTES / 2 + 1; TachyonFile file1 = TachyonFSTestUtils.createByteFile( mTFS, "/readFile1", TachyonStorageType.STORE, UnderStorageType.NO_PERSIST, length); BlockInfo block1 = getFirstBlockInfo(file1); DataServerMessage recvMsg1 = request(block1); assertValid(recvMsg1, length, block1.getBlockId(), 0, length); TachyonFile file2 = TachyonFSTestUtils.createByteFile( mTFS, "/readFile2", TachyonStorageType.STORE, UnderStorageType.NO_PERSIST, length); BlockInfo block2 = getFirstBlockInfo(file2); DataServerMessage recvMsg2 = request(block2); assertValid(recvMsg2, length, block2.getBlockId(), 0, length); CommonUtils.sleepMs( mWorkerTachyonConf.getInt(Constants.WORKER_TO_MASTER_HEARTBEAT_INTERVAL_MS) * 2 + 10); FileInfo fileInfo = mTFS.getInfo(mTFS.open(new TachyonURI("/readFile1"))); Assert.assertEquals(0, fileInfo.inMemoryPercentage); }
/** * Validates the path, verifying that it contains the {@link Constants#HEADER} or {@link * Constants#HEADER_FT} and a hostname:port specified. * * @param path the path to be verified * @param tachyonConf the instance of {@link tachyon.conf.TachyonConf} to be used * @return the verified path in a form like tachyon://host:port/dir. If only the "/dir" or "dir" * part is provided, the host and port are retrieved from property, tachyon.master.hostname * and tachyon.master.port, respectively. * @throws IOException if the given path is not valid */ public static String validatePath(String path, TachyonConf tachyonConf) throws IOException { if (path.startsWith(Constants.HEADER) || path.startsWith(Constants.HEADER_FT)) { if (!path.contains(":")) { throw new IOException( "Invalid Path: " + path + ". Use " + Constants.HEADER + "host:port/ ," + Constants.HEADER_FT + "host:port/" + " , or /file"); } else { return path; } } else { String hostname = NetworkAddressUtils.getConnectHost(ServiceType.MASTER_RPC, tachyonConf); int port = tachyonConf.getInt(Constants.MASTER_PORT); if (tachyonConf.getBoolean(Constants.ZOOKEEPER_ENABLED)) { return PathUtils.concatPath(Constants.HEADER_FT + hostname + ":" + port, path); } return PathUtils.concatPath(Constants.HEADER + hostname + ":" + port, path); } }