예제 #1
0
  /**
   * Creates a new instance of {@link FileSystemWorker}.
   *
   * @param blockDataManager a block data manager handle
   * @throws IOException if an I/O error occurs
   */
  public FileSystemWorker(BlockDataManager blockDataManager) throws IOException {
    super(
        Executors.newFixedThreadPool(
            3, ThreadFactoryUtils.build("file-system-worker-heartbeat-%d", true)));
    Preconditions.checkState(WorkerIdRegistry.getWorkerId() != 0, "Failed to register worker");

    mTachyonConf = WorkerContext.getConf();
    mFileDataManager = new FileDataManager(Preconditions.checkNotNull(blockDataManager));

    // Setup MasterClientBase
    mFileSystemMasterWorkerClient =
        new FileSystemMasterClient(
            NetworkAddressUtils.getConnectAddress(ServiceType.MASTER_RPC, mTachyonConf),
            mTachyonConf);
  }
예제 #2
0
  /**
   * Commits a block to Tachyon managed space. The block must be temporary. The block is persisted
   * after {@link BlockStore#commitBlock(long, long)}. The block will not be accessible until {@link
   * WorkerBlockMasterClient#commitBlock} succeeds.
   *
   * @param sessionId The id of the client
   * @param blockId The id of the block to commit
   * @throws BlockAlreadyExistsException if blockId already exists in committed blocks
   * @throws BlockDoesNotExistException if the temporary block cannot be found
   * @throws InvalidWorkerStateException if blockId does not belong to sessionId
   * @throws IOException if the block cannot be moved from temporary path to committed path
   * @throws WorkerOutOfSpaceException if there is no more space left to hold the block
   */
  public void commitBlock(long sessionId, long blockId)
      throws BlockAlreadyExistsException, BlockDoesNotExistException, InvalidWorkerStateException,
          IOException, WorkerOutOfSpaceException {
    mBlockStore.commitBlock(sessionId, blockId);

    // TODO(calvin): Reconsider how to do this without heavy locking.
    // Block successfully committed, update master with new block metadata
    Long lockId = mBlockStore.lockBlock(sessionId, blockId);
    try {
      BlockMeta meta = mBlockStore.getBlockMeta(sessionId, blockId, lockId);
      BlockStoreLocation loc = meta.getBlockLocation();
      Long length = meta.getBlockSize();
      BlockStoreMeta storeMeta = mBlockStore.getBlockStoreMeta();
      Long bytesUsedOnTier = storeMeta.getUsedBytesOnTiers().get(loc.tierAlias());
      mBlockMasterClient.commitBlock(
          WorkerIdRegistry.getWorkerId(), bytesUsedOnTier, loc.tierAlias(), blockId, length);
    } catch (IOException ioe) {
      throw new IOException("Failed to commit block to master.", ioe);
    } finally {
      mBlockStore.unlockBlock(lockId);
    }
  }
  public void start() throws IOException {
    int numLevels = 1;
    mTachyonHome =
        File.createTempFile("Tachyon", "U" + System.currentTimeMillis()).getAbsolutePath();
    mWorkerDataFolder = "/datastore";

    mHostname = NetworkAddressUtils.getLocalHostName(100);

    mMasterConf = MasterContext.getConf();
    mMasterConf.set(Constants.IN_TEST_MODE, "true");
    mMasterConf.set(Constants.TACHYON_HOME, mTachyonHome);
    mMasterConf.set(Constants.ZOOKEEPER_ENABLED, "true");
    mMasterConf.set(Constants.MASTER_HOSTNAME, mHostname);
    mMasterConf.set(Constants.MASTER_BIND_HOST, mHostname);
    mMasterConf.set(Constants.MASTER_PORT, "0");
    mMasterConf.set(Constants.MASTER_WEB_BIND_HOST, mHostname);
    mMasterConf.set(Constants.MASTER_WEB_PORT, "0");
    mMasterConf.set(Constants.ZOOKEEPER_ADDRESS, mCuratorServer.getConnectString());
    mMasterConf.set(Constants.ZOOKEEPER_ELECTION_PATH, "/election");
    mMasterConf.set(Constants.ZOOKEEPER_LEADER_PATH, "/leader");
    mMasterConf.set(Constants.USER_QUOTA_UNIT_BYTES, "10000");
    mMasterConf.set(Constants.USER_BLOCK_SIZE_BYTES_DEFAULT, Integer.toString(mUserBlockSize));
    mMasterConf.set(Constants.MASTER_TTLCHECKER_INTERVAL_MS, Integer.toString(1000));
    // Since tests are always running on a single host keep the resolution timeout low as otherwise
    // people running with strange network configurations will see very slow tests
    mMasterConf.set(Constants.NETWORK_HOST_RESOLUTION_TIMEOUT_MS, "250");

    // Disable hdfs client caching to avoid file system close() affecting other clients
    System.setProperty("fs.hdfs.impl.disable.cache", "true");

    // re-build the dir to set permission to 777
    deleteDir(mTachyonHome);
    mkdir(mTachyonHome);

    for (int k = 0; k < mNumOfMasters; k++) {
      final LocalTachyonMaster master = LocalTachyonMaster.create(mTachyonHome);
      master.start();
      LOG.info(
          "master NO.{} started, isServing: {}, address: {}",
          k,
          master.isServing(),
          master.getAddress());
      mMasters.add(master);
      // Each master should generate a new port for binding
      mMasterConf.set(Constants.MASTER_PORT, "0");
    }

    // Create the UFS directory after LocalTachyonMaster construction, because LocalTachyonMaster
    // sets UNDERFS_ADDRESS.
    mkdir(mMasterConf.get(Constants.UNDERFS_ADDRESS));

    LOG.info("all {} masters started.", mNumOfMasters);
    LOG.info("waiting for a leader.");
    boolean hasLeader = false;
    while (!hasLeader) {
      for (int i = 0; i < mMasters.size(); i++) {
        if (mMasters.get(i).isServing()) {
          LOG.info(
              "master NO.{} is selected as leader. address: {}", i, mMasters.get(i).getAddress());
          hasLeader = true;
          break;
        }
      }
    }
    // Use first master port
    mMasterConf.set(Constants.MASTER_PORT, getMasterPort() + "");

    CommonUtils.sleepMs(10);

    mWorkerConf = WorkerContext.getConf();
    mWorkerConf.merge(mMasterConf);
    mWorkerConf.set(Constants.WORKER_DATA_FOLDER, mWorkerDataFolder);
    mWorkerConf.set(Constants.WORKER_MEMORY_SIZE, mWorkerCapacityBytes + "");
    mWorkerConf.set(Constants.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS, 15 + "");

    // Setup conf for worker
    mWorkerConf.set(Constants.WORKER_TIERED_STORE_LEVELS, Integer.toString(numLevels));
    mWorkerConf.set(String.format(Constants.WORKER_TIERED_STORE_LEVEL_ALIAS_FORMAT, 0), "MEM");
    mWorkerConf.set(
        String.format(Constants.WORKER_TIERED_STORE_LEVEL_DIRS_PATH_FORMAT, 0),
        mTachyonHome + "/ramdisk");
    mWorkerConf.set(
        String.format(Constants.WORKER_TIERED_STORE_LEVEL_DIRS_QUOTA_FORMAT, 0),
        mWorkerCapacityBytes + "");

    // Since tests are always running on a single host keep the resolution timeout low as otherwise
    // people running with strange network configurations will see very slow tests
    mWorkerConf.set(Constants.NETWORK_HOST_RESOLUTION_TIMEOUT_MS, "250");

    for (int level = 1; level < numLevels; level++) {
      String tierLevelDirPath =
          String.format(Constants.WORKER_TIERED_STORE_LEVEL_DIRS_PATH_FORMAT, level);
      String[] dirPaths = mWorkerConf.get(tierLevelDirPath).split(",");
      String newPath = "";
      for (String dirPath : dirPaths) {
        newPath += mTachyonHome + dirPath + ",";
      }
      mWorkerConf.set(
          String.format(Constants.WORKER_TIERED_STORE_LEVEL_DIRS_PATH_FORMAT, level),
          newPath.substring(0, newPath.length() - 1));
    }

    mWorkerConf.set(Constants.WORKER_BIND_HOST, mHostname);
    mWorkerConf.set(Constants.WORKER_PORT, "0");
    mWorkerConf.set(Constants.WORKER_DATA_BIND_HOST, mHostname);
    mWorkerConf.set(Constants.WORKER_DATA_PORT, "0");
    mWorkerConf.set(Constants.WORKER_WEB_BIND_HOST, mHostname);
    mWorkerConf.set(Constants.WORKER_WEB_PORT, "0");
    mWorkerConf.set(Constants.WORKER_WORKER_BLOCK_THREADS_MIN, "1");
    mWorkerConf.set(Constants.WORKER_WORKER_BLOCK_THREADS_MAX, "100");

    // Perform immediate shutdown of data server. Graceful shutdown is unnecessary and slow
    mWorkerConf.set(Constants.WORKER_NETWORK_NETTY_SHUTDOWN_QUIET_PERIOD, Integer.toString(0));
    mWorkerConf.set(Constants.WORKER_NETWORK_NETTY_SHUTDOWN_TIMEOUT, Integer.toString(0));

    mWorker = new BlockWorker();
    if (LineageUtils.isLineageEnabled(WorkerContext.getConf())) {
      // Setup the lineage worker
      LOG.info("Started lineage worker at worker with ID {}", WorkerIdRegistry.getWorkerId());
      mLineageWorker = new LineageWorker(mWorker.getBlockDataManager());
    }
    Runnable runWorker =
        new Runnable() {
          @Override
          public void run() {
            try {
              // Start the lineage worker
              if (LineageUtils.isLineageEnabled(WorkerContext.getConf())) {
                mLineageWorker.start();
              }
              mWorker.process();
            } catch (Exception e) {
              throw new RuntimeException(e + " \n Start Master Error \n" + e.getMessage(), e);
            }
          }
        };
    mWorkerThread = new Thread(runWorker);
    mWorkerThread.start();
    // The client context should reflect the updates to the conf.
    ClientContext.reset(mWorkerConf);
  }
예제 #4
0
  /**
   * Creates a new instance of {@link BlockWorker}.
   *
   * @throws ConnectionFailedException if network connection failed
   * @throws IOException for other exceptions
   */
  public BlockWorker() throws IOException, ConnectionFailedException {
    super(
        Executors.newFixedThreadPool(
            4, ThreadFactoryUtils.build("block-worker-heartbeat-%d", true)));
    mTachyonConf = WorkerContext.getConf();
    mStartTimeMs = System.currentTimeMillis();

    // Setup MasterClientBase
    mBlockMasterClient =
        new BlockMasterClient(
            NetworkAddressUtils.getConnectAddress(ServiceType.MASTER_RPC, mTachyonConf),
            mTachyonConf);

    mFileSystemMasterClient =
        new FileSystemMasterClient(
            NetworkAddressUtils.getConnectAddress(ServiceType.MASTER_RPC, mTachyonConf),
            mTachyonConf);

    // Set up BlockDataManager
    WorkerSource workerSource = new WorkerSource();
    mBlockDataManager =
        new BlockDataManager(
            workerSource, mBlockMasterClient, mFileSystemMasterClient, new TieredBlockStore());

    // Setup metrics collection
    mWorkerMetricsSystem = new MetricsSystem("worker", mTachyonConf);
    workerSource.registerGauges(mBlockDataManager);
    mWorkerMetricsSystem.registerSource(workerSource);

    // Setup DataServer
    mDataServer =
        DataServer.Factory.create(
            NetworkAddressUtils.getBindAddress(ServiceType.WORKER_DATA, mTachyonConf),
            mBlockDataManager,
            mTachyonConf);
    // Reset data server port
    mTachyonConf.set(Constants.WORKER_DATA_PORT, Integer.toString(mDataServer.getPort()));

    // Setup RPC Server
    mServiceHandler = new BlockWorkerClientServiceHandler(mBlockDataManager);
    mThriftServerSocket = createThriftServerSocket();
    mPort = NetworkAddressUtils.getThriftPort(mThriftServerSocket);
    // Reset worker RPC port
    mTachyonConf.set(Constants.WORKER_RPC_PORT, Integer.toString(mPort));
    mThriftServer = createThriftServer();

    // Setup web server
    mWebServer =
        new WorkerUIWebServer(
            ServiceType.WORKER_WEB,
            NetworkAddressUtils.getBindAddress(ServiceType.WORKER_WEB, mTachyonConf),
            mBlockDataManager,
            NetworkAddressUtils.getConnectAddress(ServiceType.WORKER_RPC, mTachyonConf),
            mStartTimeMs,
            mTachyonConf);
    mWorkerMetricsSystem.start();
    // Add the metrics servlet to the web server, this must be done after the metrics system starts
    mWebServer.addHandler(mWorkerMetricsSystem.getServletHandler());
    mWebServer.startWebServer();
    int webPort = mWebServer.getLocalPort();

    // Get the worker id
    mWorkerNetAddress =
        new NetAddress(
            NetworkAddressUtils.getConnectHost(ServiceType.WORKER_RPC, mTachyonConf),
            mPort,
            mDataServer.getPort(),
            webPort);
    WorkerIdRegistry.registerWithBlockMaster(mBlockMasterClient, mWorkerNetAddress);

    mBlockMasterSync =
        new BlockMasterSync(mBlockDataManager, mWorkerNetAddress, mBlockMasterClient);

    // Setup PinListSyncer
    mPinListSync = new PinListSync(mBlockDataManager, mFileSystemMasterClient);

    // Setup session cleaner
    mSessionCleanerThread = new SessionCleaner(mBlockDataManager);

    // Setup space reserver
    if (mTachyonConf.getBoolean(Constants.WORKER_TIERED_STORE_RESERVER_ENABLED)) {
      mSpaceReserver = new SpaceReserver(mBlockDataManager);
    }
  }