Пример #1
0
  /**
   * Open the connection to the worker. And start the heartbeat thread.
   *
   * @return true if succeed, false otherwise
   * @throws IOException
   */
  private synchronized boolean connect() throws IOException {
    if (!mConnected) {
      NetAddress workerNetAddress = null;
      try {
        String localHostName = NetworkAddressUtils.getLocalHostName(mTachyonConf);
        LOG.info("Trying to get local worker host : " + localHostName);
        workerNetAddress = mMasterClient.user_getWorker(false, localHostName);
        mIsLocal =
            workerNetAddress
                .getMHost()
                .equals(InetAddress.getByName(localHostName).getHostAddress());
      } catch (NoWorkerException e) {
        LOG.info(e.getMessage());
        workerNetAddress = null;
      } catch (UnknownHostException e) {
        LOG.info(e.getMessage());
        workerNetAddress = null;
      }

      if (workerNetAddress == null) {
        try {
          workerNetAddress = mMasterClient.user_getWorker(true, "");
        } catch (NoWorkerException e) {
          LOG.info("No worker running in the system: " + e.getMessage());
          mClient = null;
          return false;
        }
      }

      String host = NetworkAddressUtils.getFqdnHost(workerNetAddress);
      int port = workerNetAddress.mPort;
      mWorkerAddress = new InetSocketAddress(host, port);
      mWorkerDataServerAddress = new InetSocketAddress(host, workerNetAddress.mSecondaryPort);
      LOG.info("Connecting " + (mIsLocal ? "local" : "remote") + " worker @ " + mWorkerAddress);

      mProtocol = new TBinaryProtocol(new TFramedTransport(new TSocket(host, port)));
      mClient = new WorkerService.Client(mProtocol);

      mHeartbeatExecutor = new WorkerClientHeartbeatExecutor(this, mMasterClient.getUserId());
      String threadName = "worker-heartbeat-" + mWorkerAddress;
      int interval = mTachyonConf.getInt(Constants.USER_HEARTBEAT_INTERVAL_MS, Constants.SECOND_MS);
      mHeartbeat =
          mExecutorService.submit(new HeartbeatThread(threadName, mHeartbeatExecutor, interval));

      try {
        mProtocol.getTransport().open();
      } catch (TTransportException e) {
        LOG.error(e.getMessage(), e);
        return false;
      }
      mConnected = true;
    }

    return mConnected;
  }
Пример #2
0
  /** Constructor of {@link TachyonWorker}. */
  public TachyonWorker() {
    try {
      mStartTimeMs = System.currentTimeMillis();
      mTachyonConf = WorkerContext.getConf();

      mBlockWorker = new BlockWorker();
      mFileSystemWorker = new FileSystemWorker(mBlockWorker);

      mAdditionalWorkers = Lists.newArrayList();
      List<? extends Worker> workers = Lists.newArrayList(mBlockWorker, mFileSystemWorker);
      // Discover and register the available factories
      // NOTE: ClassLoader is explicitly specified so we don't need to set ContextClassLoader
      ServiceLoader<WorkerFactory> discoveredMasterFactories =
          ServiceLoader.load(WorkerFactory.class, WorkerFactory.class.getClassLoader());
      for (WorkerFactory factory : discoveredMasterFactories) {
        Worker worker = factory.create(workers);
        if (worker != null) {
          mAdditionalWorkers.add(worker);
        }
      }

      // Setup metrics collection system
      mWorkerMetricsSystem = new MetricsSystem("worker", mTachyonConf);
      WorkerSource workerSource = WorkerContext.getWorkerSource();
      workerSource.registerGauges(mBlockWorker);
      mWorkerMetricsSystem.registerSource(workerSource);

      // Setup web server
      mWebServer =
          new WorkerUIWebServer(
              ServiceType.WORKER_WEB,
              NetworkAddressUtils.getBindAddress(ServiceType.WORKER_WEB, mTachyonConf),
              mBlockWorker,
              NetworkAddressUtils.getConnectAddress(ServiceType.WORKER_RPC, mTachyonConf),
              mStartTimeMs,
              mTachyonConf);

      // Setup Thrift server
      mThriftServerSocket = createThriftServerSocket();
      mRPCPort = NetworkAddressUtils.getThriftPort(mThriftServerSocket);
      // Reset worker RPC port based on assigned port number
      mTachyonConf.set(Constants.WORKER_RPC_PORT, Integer.toString(mRPCPort));
      mThriftServer = createThriftServer();

      mWorkerAddress =
          NetworkAddressUtils.getConnectAddress(
              NetworkAddressUtils.ServiceType.WORKER_RPC, mTachyonConf);

    } catch (Exception e) {
      LOG.error("Failed to initialize {}", this.getClass().getName(), e);
      System.exit(-1);
    }
  }
Пример #3
0
 /**
  * Creates a {@link BlockWorkerClient}.
  *
  * @param workerNetAddress to worker's location
  * @param executorService the executor service
  * @param conf Tachyon configuration
  * @param sessionId the id of the session
  * @param isLocal true if it is a local client, false otherwise
  * @param clientMetrics metrics of the client
  */
 public BlockWorkerClient(
     NetAddress workerNetAddress,
     ExecutorService executorService,
     TachyonConf conf,
     long sessionId,
     boolean isLocal,
     ClientMetrics clientMetrics) {
   super(NetworkAddressUtils.getRpcPortSocketAddress(workerNetAddress), conf, "blockWorker");
   mWorkerDataServerAddress = NetworkAddressUtils.getDataPortSocketAddress(workerNetAddress);
   mExecutorService = Preconditions.checkNotNull(executorService);
   mSessionId = sessionId;
   mIsLocal = isLocal;
   mClientMetrics = Preconditions.checkNotNull(clientMetrics);
   mHeartbeatExecutor = new BlockWorkerClientHeartbeatExecutor(this);
 }
Пример #4
0
 /**
  * Helper method to create a {@link org.apache.thrift.transport.TServerSocket} for the RPC server
  *
  * @return a thrift server socket
  */
 private TServerSocket createThriftServerSocket() {
   try {
     return new TServerSocket(
         NetworkAddressUtils.getBindAddress(ServiceType.WORKER_RPC, mTachyonConf));
   } catch (TTransportException tte) {
     LOG.error(tte.getMessage(), tte);
     throw Throwables.propagate(tte);
   }
 }
Пример #5
0
  /**
   * Test constructor for TachyonConfTest class.
   *
   * <p>Here is the order of the sources to load the properties: -) System properties if desired -)
   * Environment variables via tachyon-env.sh or from OS settings -) Site specific properties via
   * tachyon-site.properties file -) Default properties via tachyon-default.properties file
   */
  TachyonConf(boolean includeSystemProperties) {
    // Load default
    Properties defaultProps = new Properties();

    // Override runtime default
    defaultProps.setProperty(Constants.MASTER_HOSTNAME, NetworkAddressUtils.getLocalHostName(250));
    defaultProps.setProperty(
        Constants.WORKER_MIN_WORKER_THREADS,
        String.valueOf(Runtime.getRuntime().availableProcessors()));
    defaultProps.setProperty(
        Constants.MASTER_MIN_WORKER_THREADS,
        String.valueOf(Runtime.getRuntime().availableProcessors()));

    InputStream defaultInputStream =
        TachyonConf.class.getClassLoader().getResourceAsStream(DEFAULT_PROPERTIES);
    if (defaultInputStream == null) {
      throw new RuntimeException("The default Tachyon properties file does not exist.");
    }
    try {
      defaultProps.load(defaultInputStream);
    } catch (IOException e) {
      throw new RuntimeException("Unable to load default Tachyon properties file.", e);
    }

    // Load site specific properties file
    Properties siteProps = new Properties();
    InputStream siteInputStream =
        TachyonConf.class.getClassLoader().getResourceAsStream(SITE_PROPERTIES);
    if (siteInputStream != null) {
      try {
        siteProps.load(siteInputStream);
      } catch (IOException e) {
        LOG.warn("Unable to load site Tachyon configuration file.", e);
      }
    }

    // Load system properties
    Properties systemProps = new Properties();
    if (includeSystemProperties) {
      systemProps.putAll(System.getProperties());
    }

    // Now lets combine
    mProperties.putAll(defaultProps);
    mProperties.putAll(siteProps);
    mProperties.putAll(systemProps);

    // Update tachyon.master_address
    String masterHostname = mProperties.getProperty(Constants.MASTER_HOSTNAME);
    String masterPort = mProperties.getProperty(Constants.MASTER_PORT);
    boolean useZk = Boolean.parseBoolean(mProperties.getProperty(Constants.USE_ZOOKEEPER));
    String masterAddress =
        (useZk ? Constants.HEADER_FT : Constants.HEADER) + masterHostname + ":" + masterPort;
    mProperties.setProperty(Constants.MASTER_ADDRESS, masterAddress);
  }
Пример #6
0
 @Test
 public void mkdirTest() throws IOException {
   String qualifiedPath =
       "tachyon://"
           + NetworkAddressUtils.getLocalHostName(Constants.DEFAULT_HOST_RESOLUTION_TIMEOUT_MS)
           + ":"
           + mLocalTachyonCluster.getMasterPort()
           + "/root/testFile1";
   mFsShell.mkdir(new String[] {"mkdir", qualifiedPath});
   TachyonFile tFile = mTfs.getFile(new TachyonURI("/root/testFile1"));
   Assert.assertNotNull(tFile);
   Assert.assertEquals(
       getCommandOutput(new String[] {"mkdir", qualifiedPath}), mOutput.toString());
   Assert.assertTrue(tFile.isDirectory());
 }
Пример #7
0
  private TachyonFS(TachyonConf tachyonConf) {
    super(tachyonConf);

    mMasterAddress = NetworkAddressUtils.getConnectAddress(ServiceType.MASTER_RPC, tachyonConf);
    mZookeeperMode = mTachyonConf.getBoolean(Constants.USE_ZOOKEEPER);
    mExecutorService =
        Executors.newFixedThreadPool(2, ThreadFactoryUtils.build("client-heartbeat-%d", true));
    mFSMasterClient = mCloser.register(FileSystemContext.INSTANCE.acquireMasterClient());
    mBlockMasterClient = mCloser.register(BlockStoreContext.INSTANCE.acquireMasterClient());
    mWorkerClient = mCloser.register(BlockStoreContext.INSTANCE.acquireWorkerClient());
    mUserFailedSpaceRequestLimits = mTachyonConf.getInt(Constants.USER_FAILED_SPACE_REQUEST_LIMITS);
    String scheme = mZookeeperMode ? Constants.SCHEME_FT : Constants.SCHEME;
    String authority = mMasterAddress.getHostName() + ":" + mMasterAddress.getPort();
    mRootUri = new TachyonURI(scheme, authority, TachyonURI.SEPARATOR);
  }
Пример #8
0
  /**
   * Creates a new instance of {@link FileSystemWorker}.
   *
   * @param blockDataManager a block data manager handle
   * @throws IOException if an I/O error occurs
   */
  public FileSystemWorker(BlockDataManager blockDataManager) throws IOException {
    super(
        Executors.newFixedThreadPool(
            3, ThreadFactoryUtils.build("file-system-worker-heartbeat-%d", true)));
    Preconditions.checkState(WorkerIdRegistry.getWorkerId() != 0, "Failed to register worker");

    mTachyonConf = WorkerContext.getConf();
    mFileDataManager = new FileDataManager(Preconditions.checkNotNull(blockDataManager));

    // Setup MasterClientBase
    mFileSystemMasterWorkerClient =
        new FileSystemMasterClient(
            NetworkAddressUtils.getConnectAddress(ServiceType.MASTER_RPC, mTachyonConf),
            mTachyonConf);
  }
Пример #9
0
  /**
   * Returns the {@link InetSocketAddress} of the master. If zookeeper is used, this will consult
   * the zookeeper instance for the master address.
   *
   * @return the {@link InetSocketAddress} of the master
   */
  @Override
  protected synchronized InetSocketAddress getAddress() {
    if (!mUseZookeeper) {
      return super.getAddress();
    }

    Preconditions.checkState(mTachyonConf.containsKey(Constants.ZOOKEEPER_ADDRESS));
    Preconditions.checkState(mTachyonConf.containsKey(Constants.ZOOKEEPER_LEADER_PATH));
    LeaderInquireClient leaderInquireClient =
        LeaderInquireClient.getClient(
            mTachyonConf.get(Constants.ZOOKEEPER_ADDRESS),
            mTachyonConf.get(Constants.ZOOKEEPER_LEADER_PATH));
    try {
      String temp = leaderInquireClient.getMasterAddress();
      return NetworkAddressUtils.parseInetSocketAddress(temp);
    } catch (IOException e) {
      LOG.error(e.getMessage(), e);
      throw Throwables.propagate(e);
    }
  }
Пример #10
0
  /**
   * Creates a new local block input stream.
   *
   * @param blockId the block id
   * @throws IOException if I/O error occurs
   */
  public LocalBlockInStream(long blockId, long blockSize) throws IOException {
    super(blockId, blockSize);
    mContext = BlockStoreContext.INSTANCE;

    mCloser = Closer.create();
    mWorkerClient =
        mContext.acquireWorkerClient(NetworkAddressUtils.getLocalHostName(ClientContext.getConf()));
    FileChannel localFileChannel = null;

    try {
      String blockPath = mWorkerClient.lockBlock(blockId);
      if (blockPath == null) {
        throw new IOException(ExceptionMessage.BLOCK_NOT_LOCALLY_AVAILABLE.getMessage(mBlockId));
      }
      RandomAccessFile localFile = mCloser.register(new RandomAccessFile(blockPath, "r"));
      localFileChannel = mCloser.register(localFile.getChannel());
    } catch (IOException e) {
      mContext.releaseWorkerClient(mWorkerClient);
      throw e;
    }

    mLocalFileChannel = localFileChannel;
  }
Пример #11
0
  /**
   * Starts the Tachyon worker server.
   *
   * @throws Exception if the workers fail to start
   */
  public void start() throws Exception {
    // NOTE: the order to start different services is sensitive. If you change it, do it cautiously.

    // Start serving metrics system, this will not block
    mWorkerMetricsSystem.start();

    // Start serving the web server, this will not block
    // Requirement: metrics system started so we could add the metrics servlet to the web server
    // Consequence: when starting webserver, the webport will be updated.
    mWebServer.addHandler(mWorkerMetricsSystem.getServletHandler());
    mWebServer.startWebServer();

    // Set updated net address for this worker in context
    // Requirement: RPC, web, and dataserver ports are updated
    // Consequence: create a NetAddress object and set it into WorkerContext
    mNetAddress =
        new NetAddress(
            NetworkAddressUtils.getConnectHost(ServiceType.WORKER_RPC, mTachyonConf),
            mTachyonConf.getInt(Constants.WORKER_RPC_PORT),
            getDataLocalPort(),
            mTachyonConf.getInt(Constants.WORKER_WEB_PORT));
    WorkerContext.setWorkerNetAddress(mNetAddress);

    // Start each worker
    // Requirement: NetAddress set in WorkerContext, so block worker can initialize BlockMasterSync
    // Consequence: worker id is granted
    startWorkers();
    LOG.info("Started worker with id {}", WorkerIdRegistry.getWorkerId());

    mIsServingRPC = true;

    // Start serving RPC, this will block
    LOG.info("Tachyon Worker version {} started @ {}", Version.VERSION, mWorkerAddress);
    mThriftServer.serve();
    LOG.info("Tachyon Worker version {} ended @ {}", Version.VERSION, mWorkerAddress);
  }
Пример #12
0
 /**
  * Validates the path, verifying that it contains the {@link Constants#HEADER} or {@link
  * Constants#HEADER_FT} and a hostname:port specified.
  *
  * @param path the path to be verified
  * @param tachyonConf the instance of {@link tachyon.conf.TachyonConf} to be used
  * @return the verified path in a form like tachyon://host:port/dir. If only the "/dir" or "dir"
  *     part is provided, the host and port are retrieved from property, tachyon.master.hostname
  *     and tachyon.master.port, respectively.
  * @throws IOException if the given path is not valid
  */
 public static String validatePath(String path, TachyonConf tachyonConf) throws IOException {
   if (path.startsWith(Constants.HEADER) || path.startsWith(Constants.HEADER_FT)) {
     if (!path.contains(":")) {
       throw new IOException(
           "Invalid Path: "
               + path
               + ". Use "
               + Constants.HEADER
               + "host:port/ ,"
               + Constants.HEADER_FT
               + "host:port/"
               + " , or /file");
     } else {
       return path;
     }
   } else {
     String hostname = NetworkAddressUtils.getConnectHost(ServiceType.MASTER_RPC, tachyonConf);
     int port = tachyonConf.getInt(Constants.MASTER_PORT);
     if (tachyonConf.getBoolean(Constants.ZOOKEEPER_ENABLED)) {
       return PathUtils.concatPath(Constants.HEADER_FT + hostname + ":" + port, path);
     }
     return PathUtils.concatPath(Constants.HEADER + hostname + ":" + port, path);
   }
 }
Пример #13
0
 /** Constructs a {@link LocalFirstPolicy}. */
 public LocalFirstPolicy() {
   mLocalHostName = NetworkAddressUtils.getLocalHostName(ClientContext.getConf());
 }
Пример #14
0
  private LocalTachyonMaster(final String tachyonHome) throws IOException {
    mTachyonHome = tachyonHome;

    TachyonConf tachyonConf = MasterContext.getConf();
    mHostname = NetworkAddressUtils.getConnectHost(ServiceType.MASTER_RPC, tachyonConf);

    // To start the UFS either for integration or unit test. If it targets the unit test, UFS is
    // setup over the local file system (see also {@link LocalFilesystemCluster} - under folder of
    // "mTachyonHome/tachyon*". Otherwise, it starts some distributed file system cluster e.g.,
    // miniDFSCluster (see also {@link tachyon.LocalMiniDFScluster} and setup the folder like
    // "hdfs://xxx:xxx/tachyon*".
    mUnderFSCluster = UnderFileSystemCluster.get(mTachyonHome + "/dfs", tachyonConf);
    mUnderFSFolder = mUnderFSCluster.getUnderFilesystemAddress() + "/tachyon_underfs_folder";
    // To setup the journalFolder under either local file system or distributed ufs like
    // miniDFSCluster
    mJournalFolder = mUnderFSCluster.getUnderFilesystemAddress() + "/journal";

    UnderFileSystemUtils.mkdirIfNotExists(mJournalFolder, tachyonConf);
    String[] masterServiceNames =
        new String[] {
          Constants.BLOCK_MASTER_SERVICE_NAME,
          Constants.FILE_SYSTEM_MASTER_SERVICE_NAME,
          Constants.RAW_TABLE_MASTER_SERVICE_NAME,
        };
    for (String masterServiceName : masterServiceNames) {
      UnderFileSystemUtils.mkdirIfNotExists(
          PathUtils.concatPath(mJournalFolder, masterServiceName), tachyonConf);
    }
    UnderFileSystemUtils.touch(
        mJournalFolder + "/_format_" + System.currentTimeMillis(), tachyonConf);

    tachyonConf.set(Constants.MASTER_JOURNAL_FOLDER, mJournalFolder);
    tachyonConf.set(Constants.UNDERFS_ADDRESS, mUnderFSFolder);

    tachyonConf.set(Constants.MASTER_MIN_WORKER_THREADS, "1");
    tachyonConf.set(Constants.MASTER_MAX_WORKER_THREADS, "100");

    // If tests fail to connect they should fail early rather than using the default ridiculously
    // high retries
    tachyonConf.set(Constants.MASTER_RETRY_COUNT, "3");

    // Since tests are always running on a single host keep the resolution timeout low as otherwise
    // people running with strange network configurations will see very slow tests
    tachyonConf.set(Constants.HOST_RESOLUTION_TIMEOUT_MS, "250");

    tachyonConf.set(Constants.WEB_THREAD_COUNT, "1");
    tachyonConf.set(
        Constants.WEB_RESOURCES,
        PathUtils.concatPath(System.getProperty("user.dir"), "../servers/src/main/webapp"));

    mTachyonMaster = TachyonMaster.Factory.createMaster();

    // Reset the master port
    tachyonConf.set(Constants.MASTER_PORT, Integer.toString(getRPCLocalPort()));

    Runnable runMaster =
        new Runnable() {
          @Override
          public void run() {
            try {
              mTachyonMaster.start();
            } catch (Exception e) {
              throw new RuntimeException(e + " \n Start Master Error \n" + e.getMessage(), e);
            }
          }
        };

    mMasterThread = new Thread(runMaster);
  }
  public void start() throws IOException {
    int maxLevel = 1;
    mTachyonHome =
        File.createTempFile("Tachyon", "U" + System.currentTimeMillis()).getAbsolutePath();
    mWorkerDataFolder = "/datastore";

    mHostname = NetworkAddressUtils.getLocalHostName(100);

    mMasterConf = MasterContext.getConf();
    mMasterConf.set(Constants.IN_TEST_MODE, "true");
    mMasterConf.set(Constants.TACHYON_HOME, mTachyonHome);
    mMasterConf.set(Constants.USE_ZOOKEEPER, "true");
    mMasterConf.set(Constants.MASTER_HOSTNAME, mHostname);
    mMasterConf.set(Constants.MASTER_BIND_HOST, mHostname);
    mMasterConf.set(Constants.MASTER_PORT, "0");
    mMasterConf.set(Constants.MASTER_WEB_BIND_HOST, mHostname);
    mMasterConf.set(Constants.MASTER_WEB_PORT, "0");
    mMasterConf.set(Constants.ZOOKEEPER_ADDRESS, mCuratorServer.getConnectString());
    mMasterConf.set(Constants.ZOOKEEPER_ELECTION_PATH, "/election");
    mMasterConf.set(Constants.ZOOKEEPER_LEADER_PATH, "/leader");
    mMasterConf.set(Constants.USER_QUOTA_UNIT_BYTES, "10000");
    mMasterConf.set(Constants.USER_DEFAULT_BLOCK_SIZE_BYTE, Integer.toString(mUserBlockSize));

    // Since tests are always running on a single host keep the resolution timeout low as otherwise
    // people running with strange network configurations will see very slow tests
    mMasterConf.set(Constants.HOST_RESOLUTION_TIMEOUT_MS, "250");

    // Disable hdfs client caching to avoid file system close() affecting other clients
    System.setProperty("fs.hdfs.impl.disable.cache", "true");

    // re-build the dir to set permission to 777
    deleteDir(mTachyonHome);
    mkdir(mTachyonHome);

    for (int k = 0; k < mNumOfMasters; k++) {
      final LocalTachyonMaster master = LocalTachyonMaster.create(mTachyonHome);
      master.start();
      LOG.info(
          "master NO."
              + k
              + " started, isServing: "
              + master.isServing()
              + ", address: "
              + master.getAddress());
      mMasters.add(master);
      // Each master should generate a new port for binding
      mMasterConf.set(Constants.MASTER_PORT, "0");
    }

    // Create the directories for the data and workers after LocalTachyonMaster construction,
    // because LocalTachyonMaster sets the UNDERFS_DATA_FOLDER and UNDERFS_WORKERS_FOLDER.
    mkdir(mMasterConf.get(Constants.UNDERFS_DATA_FOLDER));
    mkdir(mMasterConf.get(Constants.UNDERFS_WORKERS_FOLDER));

    LOG.info("all " + mNumOfMasters + " masters started.");
    LOG.info("waiting for a leader.");
    boolean hasLeader = false;
    while (!hasLeader) {
      for (int i = 0; i < mMasters.size(); i++) {
        if (mMasters.get(i).isServing()) {
          LOG.info(
              "master NO."
                  + i
                  + " is selected as leader. address: "
                  + mMasters.get(i).getAddress());
          hasLeader = true;
          break;
        }
      }
    }
    // Use first master port
    mMasterConf.set(Constants.MASTER_PORT, getMasterPort() + "");

    CommonUtils.sleepMs(10);

    mWorkerConf = WorkerContext.getConf();
    mWorkerConf.merge(mMasterConf);
    mWorkerConf.set(Constants.WORKER_DATA_FOLDER, mWorkerDataFolder);
    mWorkerConf.set(Constants.WORKER_MEMORY_SIZE, mWorkerCapacityBytes + "");
    mWorkerConf.set(Constants.WORKER_TO_MASTER_HEARTBEAT_INTERVAL_MS, 15 + "");

    // Setup conf for worker
    mWorkerConf.set(Constants.WORKER_MAX_TIERED_STORAGE_LEVEL, Integer.toString(maxLevel));
    mWorkerConf.set(String.format(Constants.WORKER_TIERED_STORAGE_LEVEL_ALIAS_FORMAT, 0), "MEM");
    mWorkerConf.set(
        String.format(Constants.WORKER_TIERED_STORAGE_LEVEL_DIRS_PATH_FORMAT, 0),
        mTachyonHome + "/ramdisk");
    mWorkerConf.set(
        String.format(Constants.WORKER_TIERED_STORAGE_LEVEL_DIRS_QUOTA_FORMAT, 0),
        mWorkerCapacityBytes + "");

    // Since tests are always running on a single host keep the resolution timeout low as otherwise
    // people running with strange network configurations will see very slow tests
    mWorkerConf.set(Constants.HOST_RESOLUTION_TIMEOUT_MS, "250");

    for (int level = 1; level < maxLevel; level++) {
      String tierLevelDirPath =
          String.format(Constants.WORKER_TIERED_STORAGE_LEVEL_DIRS_PATH_FORMAT, level);
      String[] dirPaths = mWorkerConf.get(tierLevelDirPath).split(",");
      String newPath = "";
      for (String dirPath : dirPaths) {
        newPath += mTachyonHome + dirPath + ",";
      }
      mWorkerConf.set(
          String.format(Constants.WORKER_TIERED_STORAGE_LEVEL_DIRS_PATH_FORMAT, level),
          newPath.substring(0, newPath.length() - 1));
    }

    mWorkerConf.set(Constants.WORKER_BIND_HOST, mHostname);
    mWorkerConf.set(Constants.WORKER_PORT, "0");
    mWorkerConf.set(Constants.WORKER_DATA_BIND_HOST, mHostname);
    mWorkerConf.set(Constants.WORKER_DATA_PORT, "0");
    mWorkerConf.set(Constants.WORKER_WEB_BIND_HOST, mHostname);
    mWorkerConf.set(Constants.WORKER_WEB_PORT, "0");
    mWorkerConf.set(Constants.WORKER_MIN_WORKER_THREADS, "1");
    mWorkerConf.set(Constants.WORKER_MAX_WORKER_THREADS, "100");

    // Perform immediate shutdown of data server. Graceful shutdown is unnecessary and slow
    mWorkerConf.set(Constants.WORKER_NETWORK_NETTY_SHUTDOWN_QUIET_PERIOD, Integer.toString(0));
    mWorkerConf.set(Constants.WORKER_NETWORK_NETTY_SHUTDOWN_TIMEOUT, Integer.toString(0));

    mWorker = new BlockWorker();
    Runnable runWorker =
        new Runnable() {
          @Override
          public void run() {
            try {
              mWorker.process();
            } catch (Exception e) {
              throw new RuntimeException(e + " \n Start Master Error \n" + e.getMessage(), e);
            }
          }
        };
    mWorkerThread = new Thread(runWorker);
    mWorkerThread.start();
    // The client context should reflect the updates to the conf.
    if (sReinitializer == null) {
      ClientContext.accessReinitializer(sReinitializerAccesser);
    }
    sReinitializer.reinitializeWithConf(mWorkerConf);
  }
Пример #16
0
 @Override
 public List<String> getFileLocations(String path) throws IOException {
   List<String> ret = new ArrayList<String>();
   ret.add(NetworkAddressUtils.getConnectHost(ServiceType.WORKER_RPC, mTachyonConf));
   return ret;
 }
Пример #17
0
  /**
   * Creates a new instance of {@link BlockWorker}.
   *
   * @throws ConnectionFailedException if network connection failed
   * @throws IOException for other exceptions
   */
  public BlockWorker() throws IOException, ConnectionFailedException {
    super(
        Executors.newFixedThreadPool(
            4, ThreadFactoryUtils.build("block-worker-heartbeat-%d", true)));
    mTachyonConf = WorkerContext.getConf();
    mStartTimeMs = System.currentTimeMillis();

    // Setup MasterClientBase
    mBlockMasterClient =
        new BlockMasterClient(
            NetworkAddressUtils.getConnectAddress(ServiceType.MASTER_RPC, mTachyonConf),
            mTachyonConf);

    mFileSystemMasterClient =
        new FileSystemMasterClient(
            NetworkAddressUtils.getConnectAddress(ServiceType.MASTER_RPC, mTachyonConf),
            mTachyonConf);

    // Set up BlockDataManager
    WorkerSource workerSource = new WorkerSource();
    mBlockDataManager =
        new BlockDataManager(
            workerSource, mBlockMasterClient, mFileSystemMasterClient, new TieredBlockStore());

    // Setup metrics collection
    mWorkerMetricsSystem = new MetricsSystem("worker", mTachyonConf);
    workerSource.registerGauges(mBlockDataManager);
    mWorkerMetricsSystem.registerSource(workerSource);

    // Setup DataServer
    mDataServer =
        DataServer.Factory.create(
            NetworkAddressUtils.getBindAddress(ServiceType.WORKER_DATA, mTachyonConf),
            mBlockDataManager,
            mTachyonConf);
    // Reset data server port
    mTachyonConf.set(Constants.WORKER_DATA_PORT, Integer.toString(mDataServer.getPort()));

    // Setup RPC Server
    mServiceHandler = new BlockWorkerClientServiceHandler(mBlockDataManager);
    mThriftServerSocket = createThriftServerSocket();
    mPort = NetworkAddressUtils.getThriftPort(mThriftServerSocket);
    // Reset worker RPC port
    mTachyonConf.set(Constants.WORKER_RPC_PORT, Integer.toString(mPort));
    mThriftServer = createThriftServer();

    // Setup web server
    mWebServer =
        new WorkerUIWebServer(
            ServiceType.WORKER_WEB,
            NetworkAddressUtils.getBindAddress(ServiceType.WORKER_WEB, mTachyonConf),
            mBlockDataManager,
            NetworkAddressUtils.getConnectAddress(ServiceType.WORKER_RPC, mTachyonConf),
            mStartTimeMs,
            mTachyonConf);
    mWorkerMetricsSystem.start();
    // Add the metrics servlet to the web server, this must be done after the metrics system starts
    mWebServer.addHandler(mWorkerMetricsSystem.getServletHandler());
    mWebServer.startWebServer();
    int webPort = mWebServer.getLocalPort();

    // Get the worker id
    mWorkerNetAddress =
        new NetAddress(
            NetworkAddressUtils.getConnectHost(ServiceType.WORKER_RPC, mTachyonConf),
            mPort,
            mDataServer.getPort(),
            webPort);
    WorkerIdRegistry.registerWithBlockMaster(mBlockMasterClient, mWorkerNetAddress);

    mBlockMasterSync =
        new BlockMasterSync(mBlockDataManager, mWorkerNetAddress, mBlockMasterClient);

    // Setup PinListSyncer
    mPinListSync = new PinListSync(mBlockDataManager, mFileSystemMasterClient);

    // Setup session cleaner
    mSessionCleanerThread = new SessionCleaner(mBlockDataManager);

    // Setup space reserver
    if (mTachyonConf.getBoolean(Constants.WORKER_TIERED_STORE_RESERVER_ENABLED)) {
      mSpaceReserver = new SpaceReserver(mBlockDataManager);
    }
  }
Пример #18
0
 /** @return the worker RPC service bind host */
 public String getRPCBindHost() {
   return NetworkAddressUtils.getThriftSocket(mThriftServerSocket)
       .getInetAddress()
       .getHostAddress();
 }
Пример #19
0
  /**
   * Reads from a remote byte buffer.
   *
   * @param tachyonFS a TachyonFS
   * @param blockInfo the block information
   * @param offset offset to start the read at
   * @param len number of bytes to read
   * @param conf Tachyon configuration
   * @return <code>ByteBuffer</code> containing the bytes read
   */
  public ByteBuffer readRemoteByteBuffer(
      TachyonFS tachyonFS, ClientBlockInfo blockInfo, long offset, long len, TachyonConf conf) {
    ByteBuffer buf = null;

    try {
      List<NetAddress> blockLocations = blockInfo.getLocations();
      LOG.info("Block locations:" + blockLocations);
      String localhost = NetworkAddressUtils.getConnectHost(ServiceType.WORKER_RPC, conf);

      for (NetAddress blockLocation : blockLocations) {
        String host = blockLocation.mHost;
        int port = blockLocation.mSecondaryPort;

        // The data is not in remote machine's memory if primary port == -1. We check primary port
        // because if the data is in the under storage, the secondary port (data transfer port)
        // will be set.
        if (blockLocation.mPort == -1) {
          continue;
        }

        if (host.equals(InetAddress.getLocalHost().getHostName())
            || host.equals(InetAddress.getLocalHost().getHostAddress())
            || host.equals(localhost)) {
          LOG.warn(
              "Master thinks the local machine has data, but not!"
                  + "(or local read is disabled) blockId:{}",
              blockInfo.blockId);
        }
        LOG.info(
            host
                + ":"
                + port
                + " current host is "
                + localhost
                + " "
                + NetworkAddressUtils.getLocalIpAddress(conf));

        try {
          buf =
              retrieveByteBufferFromRemoteMachine(
                  new InetSocketAddress(host, port), blockInfo.blockId, offset, len, conf);
          if (buf != null) {
            break;
          }
        } catch (IOException e) {
          LOG.error(
              "Fail to retrieve byte buffer for block "
                  + blockInfo.blockId
                  + " from remote "
                  + host
                  + ":"
                  + port
                  + " with offset "
                  + offset
                  + " and length "
                  + len,
              e);
          buf = null;
        }
      }
    } catch (IOException e) {
      LOG.error("Failed to get read data from remote ", e);
      buf = null;
    }

    return buf;
  }