/** * Creates a new instance of {@link FileSystemWorker}. * * @param blockDataManager a block data manager handle * @throws IOException if an I/O error occurs */ public FileSystemWorker(BlockDataManager blockDataManager) throws IOException { super( Executors.newFixedThreadPool( 3, ThreadFactoryUtils.build("file-system-worker-heartbeat-%d", true))); Preconditions.checkState(WorkerIdRegistry.getWorkerId() != 0, "Failed to register worker"); mTachyonConf = WorkerContext.getConf(); mFileDataManager = new FileDataManager(Preconditions.checkNotNull(blockDataManager)); // Setup MasterClientBase mFileSystemMasterWorkerClient = new FileSystemMasterClient( NetworkAddressUtils.getConnectAddress(ServiceType.MASTER_RPC, mTachyonConf), mTachyonConf); }
/** * Commits a block to Tachyon managed space. The block must be temporary. The block is persisted * after {@link BlockStore#commitBlock(long, long)}. The block will not be accessible until {@link * WorkerBlockMasterClient#commitBlock} succeeds. * * @param sessionId The id of the client * @param blockId The id of the block to commit * @throws BlockAlreadyExistsException if blockId already exists in committed blocks * @throws BlockDoesNotExistException if the temporary block cannot be found * @throws InvalidWorkerStateException if blockId does not belong to sessionId * @throws IOException if the block cannot be moved from temporary path to committed path * @throws WorkerOutOfSpaceException if there is no more space left to hold the block */ public void commitBlock(long sessionId, long blockId) throws BlockAlreadyExistsException, BlockDoesNotExistException, InvalidWorkerStateException, IOException, WorkerOutOfSpaceException { mBlockStore.commitBlock(sessionId, blockId); // TODO(calvin): Reconsider how to do this without heavy locking. // Block successfully committed, update master with new block metadata Long lockId = mBlockStore.lockBlock(sessionId, blockId); try { BlockMeta meta = mBlockStore.getBlockMeta(sessionId, blockId, lockId); BlockStoreLocation loc = meta.getBlockLocation(); Long length = meta.getBlockSize(); BlockStoreMeta storeMeta = mBlockStore.getBlockStoreMeta(); Long bytesUsedOnTier = storeMeta.getUsedBytesOnTiers().get(loc.tierAlias()); mBlockMasterClient.commitBlock( WorkerIdRegistry.getWorkerId(), bytesUsedOnTier, loc.tierAlias(), blockId, length); } catch (IOException ioe) { throw new IOException("Failed to commit block to master.", ioe); } finally { mBlockStore.unlockBlock(lockId); } }
public void start() throws IOException { int numLevels = 1; mTachyonHome = File.createTempFile("Tachyon", "U" + System.currentTimeMillis()).getAbsolutePath(); mWorkerDataFolder = "/datastore"; mHostname = NetworkAddressUtils.getLocalHostName(100); mMasterConf = MasterContext.getConf(); mMasterConf.set(Constants.IN_TEST_MODE, "true"); mMasterConf.set(Constants.TACHYON_HOME, mTachyonHome); mMasterConf.set(Constants.ZOOKEEPER_ENABLED, "true"); mMasterConf.set(Constants.MASTER_HOSTNAME, mHostname); mMasterConf.set(Constants.MASTER_BIND_HOST, mHostname); mMasterConf.set(Constants.MASTER_PORT, "0"); mMasterConf.set(Constants.MASTER_WEB_BIND_HOST, mHostname); mMasterConf.set(Constants.MASTER_WEB_PORT, "0"); mMasterConf.set(Constants.ZOOKEEPER_ADDRESS, mCuratorServer.getConnectString()); mMasterConf.set(Constants.ZOOKEEPER_ELECTION_PATH, "/election"); mMasterConf.set(Constants.ZOOKEEPER_LEADER_PATH, "/leader"); mMasterConf.set(Constants.USER_QUOTA_UNIT_BYTES, "10000"); mMasterConf.set(Constants.USER_BLOCK_SIZE_BYTES_DEFAULT, Integer.toString(mUserBlockSize)); mMasterConf.set(Constants.MASTER_TTLCHECKER_INTERVAL_MS, Integer.toString(1000)); // Since tests are always running on a single host keep the resolution timeout low as otherwise // people running with strange network configurations will see very slow tests mMasterConf.set(Constants.NETWORK_HOST_RESOLUTION_TIMEOUT_MS, "250"); // Disable hdfs client caching to avoid file system close() affecting other clients System.setProperty("fs.hdfs.impl.disable.cache", "true"); // re-build the dir to set permission to 777 deleteDir(mTachyonHome); mkdir(mTachyonHome); for (int k = 0; k < mNumOfMasters; k++) { final LocalTachyonMaster master = LocalTachyonMaster.create(mTachyonHome); master.start(); LOG.info( "master NO.{} started, isServing: {}, address: {}", k, master.isServing(), master.getAddress()); mMasters.add(master); // Each master should generate a new port for binding mMasterConf.set(Constants.MASTER_PORT, "0"); } // Create the UFS directory after LocalTachyonMaster construction, because LocalTachyonMaster // sets UNDERFS_ADDRESS. mkdir(mMasterConf.get(Constants.UNDERFS_ADDRESS)); LOG.info("all {} masters started.", mNumOfMasters); LOG.info("waiting for a leader."); boolean hasLeader = false; while (!hasLeader) { for (int i = 0; i < mMasters.size(); i++) { if (mMasters.get(i).isServing()) { LOG.info( "master NO.{} is selected as leader. address: {}", i, mMasters.get(i).getAddress()); hasLeader = true; break; } } } // Use first master port mMasterConf.set(Constants.MASTER_PORT, getMasterPort() + ""); CommonUtils.sleepMs(10); mWorkerConf = WorkerContext.getConf(); mWorkerConf.merge(mMasterConf); mWorkerConf.set(Constants.WORKER_DATA_FOLDER, mWorkerDataFolder); mWorkerConf.set(Constants.WORKER_MEMORY_SIZE, mWorkerCapacityBytes + ""); mWorkerConf.set(Constants.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS, 15 + ""); // Setup conf for worker mWorkerConf.set(Constants.WORKER_TIERED_STORE_LEVELS, Integer.toString(numLevels)); mWorkerConf.set(String.format(Constants.WORKER_TIERED_STORE_LEVEL_ALIAS_FORMAT, 0), "MEM"); mWorkerConf.set( String.format(Constants.WORKER_TIERED_STORE_LEVEL_DIRS_PATH_FORMAT, 0), mTachyonHome + "/ramdisk"); mWorkerConf.set( String.format(Constants.WORKER_TIERED_STORE_LEVEL_DIRS_QUOTA_FORMAT, 0), mWorkerCapacityBytes + ""); // Since tests are always running on a single host keep the resolution timeout low as otherwise // people running with strange network configurations will see very slow tests mWorkerConf.set(Constants.NETWORK_HOST_RESOLUTION_TIMEOUT_MS, "250"); for (int level = 1; level < numLevels; level++) { String tierLevelDirPath = String.format(Constants.WORKER_TIERED_STORE_LEVEL_DIRS_PATH_FORMAT, level); String[] dirPaths = mWorkerConf.get(tierLevelDirPath).split(","); String newPath = ""; for (String dirPath : dirPaths) { newPath += mTachyonHome + dirPath + ","; } mWorkerConf.set( String.format(Constants.WORKER_TIERED_STORE_LEVEL_DIRS_PATH_FORMAT, level), newPath.substring(0, newPath.length() - 1)); } mWorkerConf.set(Constants.WORKER_BIND_HOST, mHostname); mWorkerConf.set(Constants.WORKER_PORT, "0"); mWorkerConf.set(Constants.WORKER_DATA_BIND_HOST, mHostname); mWorkerConf.set(Constants.WORKER_DATA_PORT, "0"); mWorkerConf.set(Constants.WORKER_WEB_BIND_HOST, mHostname); mWorkerConf.set(Constants.WORKER_WEB_PORT, "0"); mWorkerConf.set(Constants.WORKER_WORKER_BLOCK_THREADS_MIN, "1"); mWorkerConf.set(Constants.WORKER_WORKER_BLOCK_THREADS_MAX, "100"); // Perform immediate shutdown of data server. Graceful shutdown is unnecessary and slow mWorkerConf.set(Constants.WORKER_NETWORK_NETTY_SHUTDOWN_QUIET_PERIOD, Integer.toString(0)); mWorkerConf.set(Constants.WORKER_NETWORK_NETTY_SHUTDOWN_TIMEOUT, Integer.toString(0)); mWorker = new BlockWorker(); if (LineageUtils.isLineageEnabled(WorkerContext.getConf())) { // Setup the lineage worker LOG.info("Started lineage worker at worker with ID {}", WorkerIdRegistry.getWorkerId()); mLineageWorker = new LineageWorker(mWorker.getBlockDataManager()); } Runnable runWorker = new Runnable() { @Override public void run() { try { // Start the lineage worker if (LineageUtils.isLineageEnabled(WorkerContext.getConf())) { mLineageWorker.start(); } mWorker.process(); } catch (Exception e) { throw new RuntimeException(e + " \n Start Master Error \n" + e.getMessage(), e); } } }; mWorkerThread = new Thread(runWorker); mWorkerThread.start(); // The client context should reflect the updates to the conf. ClientContext.reset(mWorkerConf); }
/** * Creates a new instance of {@link BlockWorker}. * * @throws ConnectionFailedException if network connection failed * @throws IOException for other exceptions */ public BlockWorker() throws IOException, ConnectionFailedException { super( Executors.newFixedThreadPool( 4, ThreadFactoryUtils.build("block-worker-heartbeat-%d", true))); mTachyonConf = WorkerContext.getConf(); mStartTimeMs = System.currentTimeMillis(); // Setup MasterClientBase mBlockMasterClient = new BlockMasterClient( NetworkAddressUtils.getConnectAddress(ServiceType.MASTER_RPC, mTachyonConf), mTachyonConf); mFileSystemMasterClient = new FileSystemMasterClient( NetworkAddressUtils.getConnectAddress(ServiceType.MASTER_RPC, mTachyonConf), mTachyonConf); // Set up BlockDataManager WorkerSource workerSource = new WorkerSource(); mBlockDataManager = new BlockDataManager( workerSource, mBlockMasterClient, mFileSystemMasterClient, new TieredBlockStore()); // Setup metrics collection mWorkerMetricsSystem = new MetricsSystem("worker", mTachyonConf); workerSource.registerGauges(mBlockDataManager); mWorkerMetricsSystem.registerSource(workerSource); // Setup DataServer mDataServer = DataServer.Factory.create( NetworkAddressUtils.getBindAddress(ServiceType.WORKER_DATA, mTachyonConf), mBlockDataManager, mTachyonConf); // Reset data server port mTachyonConf.set(Constants.WORKER_DATA_PORT, Integer.toString(mDataServer.getPort())); // Setup RPC Server mServiceHandler = new BlockWorkerClientServiceHandler(mBlockDataManager); mThriftServerSocket = createThriftServerSocket(); mPort = NetworkAddressUtils.getThriftPort(mThriftServerSocket); // Reset worker RPC port mTachyonConf.set(Constants.WORKER_RPC_PORT, Integer.toString(mPort)); mThriftServer = createThriftServer(); // Setup web server mWebServer = new WorkerUIWebServer( ServiceType.WORKER_WEB, NetworkAddressUtils.getBindAddress(ServiceType.WORKER_WEB, mTachyonConf), mBlockDataManager, NetworkAddressUtils.getConnectAddress(ServiceType.WORKER_RPC, mTachyonConf), mStartTimeMs, mTachyonConf); mWorkerMetricsSystem.start(); // Add the metrics servlet to the web server, this must be done after the metrics system starts mWebServer.addHandler(mWorkerMetricsSystem.getServletHandler()); mWebServer.startWebServer(); int webPort = mWebServer.getLocalPort(); // Get the worker id mWorkerNetAddress = new NetAddress( NetworkAddressUtils.getConnectHost(ServiceType.WORKER_RPC, mTachyonConf), mPort, mDataServer.getPort(), webPort); WorkerIdRegistry.registerWithBlockMaster(mBlockMasterClient, mWorkerNetAddress); mBlockMasterSync = new BlockMasterSync(mBlockDataManager, mWorkerNetAddress, mBlockMasterClient); // Setup PinListSyncer mPinListSync = new PinListSync(mBlockDataManager, mFileSystemMasterClient); // Setup session cleaner mSessionCleanerThread = new SessionCleaner(mBlockDataManager); // Setup space reserver if (mTachyonConf.getBoolean(Constants.WORKER_TIERED_STORE_RESERVER_ENABLED)) { mSpaceReserver = new SpaceReserver(mBlockDataManager); } }