コード例 #1
ファイル: TachyonWorker.java プロジェクト: rmetzger/tachyon
  public void run() {
    long lastHeartbeatMs = System.currentTimeMillis();
    Command cmd = null;
    while (!mStop) {
      long diff = System.currentTimeMillis() - lastHeartbeatMs;
      if (diff < WorkerConf.get().TO_MASTER_HEARTBEAT_INTERVAL_MS) {
        LOG.debug("Heartbeat process takes {} ms.", diff);
        CommonUtils.sleepMs(LOG, WorkerConf.get().TO_MASTER_HEARTBEAT_INTERVAL_MS - diff);
      } else {
        LOG.error("Heartbeat process takes " + diff + " ms.");

      try {
        cmd = mWorkerStorage.heartbeat();

        lastHeartbeatMs = System.currentTimeMillis();
      } catch (IOException e) {
        LOG.error(e.getMessage(), e);
        CommonUtils.sleepMs(LOG, Constants.SECOND_MS);
        cmd = null;
        if (System.currentTimeMillis() - lastHeartbeatMs >= WorkerConf.get().HEARTBEAT_TIMEOUT_MS) {
          throw new RuntimeException(
              "Timebeat timeout " + (System.currentTimeMillis() - lastHeartbeatMs) + "ms");

      if (cmd != null) {
        switch (cmd.mCommandType) {
          case Unknown:
            LOG.error("Unknown command: " + cmd);
          case Nothing:
            LOG.debug("Nothing command: {}", cmd);
          case Register:
            LOG.info("Register command: " + cmd);
          case Free:
            LOG.info("Free command: " + cmd);
          case Delete:
            LOG.info("Delete command: " + cmd);
            throw new RuntimeException("Un-recognized command from master " + cmd.toString());

コード例 #2
ファイル: TachyonWorker.java プロジェクト: rmetzger/tachyon
 private DataServer createDataServer(
     final InetSocketAddress dataAddress, final BlocksLocker blockLocker) {
   switch (WorkerConf.get().NETWORK_TYPE) {
     case NIO:
       return new NIODataServer(dataAddress, blockLocker);
     case NETTY:
       return new NettyDataServer(dataAddress, blockLocker);
       throw new AssertionError("Unknown network type: " + WorkerConf.get().NETWORK_TYPE);
コード例 #3
ファイル: TachyonWorker.java プロジェクト: rmetzger/tachyon
  public static void main(String[] args) throws UnknownHostException {
    if (args.length > 1) {
          "Usage: java -cp target/tachyon-"
              + Version.VERSION
              + "-jar-with-dependencies.jar "
              + "tachyon.Worker [<MasterHost:Port>]");

    WorkerConf wConf = WorkerConf.get();

    String resolvedWorkerHost = NetworkUtils.getLocalHostName();
    LOG.info("Resolved local TachyonWorker host to " + resolvedWorkerHost);

    TachyonWorker worker =
            resolvedWorkerHost + ":" + wConf.PORT,
    try {
    } catch (Exception e) {
      LOG.error("Uncaught exception terminating worker", e);
      throw new RuntimeException(e);
コード例 #4
ファイル: WorkerStorage.java プロジェクト: kinleyma/tachyon
   * Main logic behind the worker process.
   * <p>This object is lazily initialized. Before an object of this call should be used, {@link
   * #initialize()} must be called.
   * @param masterAddress The TachyonMaster's address
   * @param dataFolder This TachyonWorker's local folder's path
   * @param memoryCapacityBytes The maximum memory space this TachyonWorker can use, in bytes
  public WorkerStorage(
      InetSocketAddress masterAddress, String dataFolder, long memoryCapacityBytes) {
    COMMON_CONF = CommonConf.get();

    mMasterAddress = masterAddress;
    mMasterClient = new MasterClient(mMasterAddress);
    mLocalDataFolder = new File(dataFolder);

    mWorkerSpaceCounter = new WorkerSpaceCounter(memoryCapacityBytes);
    mLocalUserFolder = new File(mLocalDataFolder, WorkerConf.get().USER_TEMP_RELATIVE_FOLDER);
コード例 #5
ファイル: TachyonWorker.java プロジェクト: rmetzger/tachyon
 private void login() throws IOException {
   WorkerConf wConf = WorkerConf.get();
   if (wConf.KEYTAB == null || wConf.PRINCIPAL == null) {
   UnderFileSystem ufs = UnderFileSystem.get(CommonConf.get().UNDERFS_ADDRESS);
   if (ufs instanceof UnderFileSystemHdfs) {
     ((UnderFileSystemHdfs) ufs)
コード例 #6
ファイル: WorkerStorage.java プロジェクト: kinleyma/tachyon
  public void initialize(final NetAddress address) {
    mWorkerAddress = address;


    mUnderfsWorkerFolder = CommonUtils.concat(COMMON_CONF.UNDERFS_WORKERS_FOLDER, mWorkerId);
    mUnderfsWorkerDataFolder = mUnderfsWorkerFolder + "/data";
    mUnderFs = UnderFileSystem.get(COMMON_CONF.UNDERFS_ADDRESS);
    mUsers = new Users(mLocalUserFolder.toString(), mUnderfsWorkerFolder);

    for (int k = 0; k < WorkerConf.get().WORKER_CHECKPOINT_THREADS; k++) {
      Thread thread = new Thread(new CheckpointThread(k));

    try {
    } catch (IOException e) {
      throw Throwables.propagate(e);
    } catch (FileDoesNotExistException e) {
      throw Throwables.propagate(e);
    } catch (SuspectedFileSizeException e) {
      throw Throwables.propagate(e);
    } catch (BlockInfoException e) {
      throw Throwables.propagate(e);
    } catch (TException e) {
      throw Throwables.propagate(e);

        "Current Worker Info: ID "
            + mWorkerId
            + ", ADDRESS: "
            + mWorkerAddress
            + ", MemoryCapacityBytes: "
            + mWorkerSpaceCounter.getCapacityBytes());
コード例 #7
ファイル: TachyonWorker.java プロジェクト: rmetzger/tachyon
 private static String getMasterLocation(String[] args) {
   WorkerConf wConf = WorkerConf.get();
   String confFileMasterLoc = wConf.MASTER_HOSTNAME + ":" + wConf.MASTER_PORT;
   String masterLocation;
   if (args.length < 1) {
     masterLocation = confFileMasterLoc;
   } else {
     masterLocation = args[0];
     if (masterLocation.indexOf(":") == -1) {
       masterLocation += ":" + wConf.MASTER_PORT;
     if (!masterLocation.equals(confFileMasterLoc)) {
           "Master Address in configuration file("
               + confFileMasterLoc
               + ") is different "
               + "from the command line one("
               + masterLocation
               + ").");
   return masterLocation;
コード例 #8
ファイル: WorkerStorage.java プロジェクト: kinleyma/tachyon
/** The structure to store a worker's information in worker node. */
public class WorkerStorage {
  /** The CheckpointThread, used to checkpoint the files belong to the worker. */
  public class CheckpointThread implements Runnable {
    private final Logger LOG = Logger.getLogger(Constants.LOGGER_TYPE);
    private final int ID;
    private UnderFileSystem mCheckpointUnderFs = null;

    public CheckpointThread(int id) {
      ID = id;

    // This method assumes the mDependencyLock has been acquired.
    private int getFileIdBasedOnPriorityDependency() throws TException {
      if (mPriorityDependencies.isEmpty()) {
        return -1;
      for (int depId : mPriorityDependencies) {
        int fileId = getFileIdFromOneDependency(depId);
        if (fileId != -1) {
          return fileId;
      return -1;

    // This method assumes the mDependencyLock has been acquired.
    private int getFileIdFromOneDependency(int depId) throws TException {
      Set<Integer> fileIds = mDepIdToFiles.get(depId);
      if (fileIds != null && !fileIds.isEmpty()) {
        int fileId = fileIds.iterator().next();
        if (fileIds.isEmpty()) {
        return fileId;
      return -1;

    // This method assumes the mDependencyLock has been acquired.
    private int getRandomUncheckpointedFile() throws TException {
      if (mUncheckpointFiles.isEmpty()) {
        return -1;
      for (int depId : mDepIdToFiles.keySet()) {
        int fileId = getFileIdFromOneDependency(depId);
        if (fileId != -1) {
          return fileId;
      return -1;

    private List<Integer> getSortedPriorityDependencyList() throws TException {
      List<Integer> ret = mMasterClient.worker_getPriorityDependencyList();
      for (int i = 0; i < ret.size(); i++) {
        for (int j = i + 1; j < ret.size(); j++) {
          if (ret.get(i) < ret.get(j)) {
            int k = ret.get(i);
            ret.set(i, ret.get(j));
            ret.set(j, k);
      return ret;

    public void run() {
      while (true) {
        try {
          int fileId = -1;
          synchronized (mDependencyLock) {
            fileId = getFileIdBasedOnPriorityDependency();

            if (fileId == -1) {
              if (mPriorityDependencies.size() == 0) {
                mPriorityDependencies = getSortedPriorityDependencyList();
                if (!mPriorityDependencies.isEmpty()) {
                      "Get new mPriorityDependencies "
                          + CommonUtils.listToString(mPriorityDependencies));
              } else {
                List<Integer> tList = getSortedPriorityDependencyList();
                boolean equal = true;
                if (mPriorityDependencies.size() != tList.size()) {
                  equal = false;
                if (equal) {
                  for (int k = 0; k < tList.size(); k++) {
                    if (tList.get(k) != mPriorityDependencies.get(k)) {
                      equal = false;

                if (!equal) {
                  mPriorityDependencies = tList;

              fileId = getFileIdBasedOnPriorityDependency();

            if (fileId == -1) {
              fileId = getRandomUncheckpointedFile();

          if (fileId == -1) {
            LOG.debug("Thread " + ID + " has nothing to checkpoint. Sleep for 1 sec.");
            CommonUtils.sleepMs(LOG, Constants.SECOND_MS);

          // TODO checkpoint process. In future, move from midPath to dstPath should be done by
          // master
          String midPath = CommonUtils.concat(mUnderfsWorkerDataFolder, fileId);
          String dstPath = CommonUtils.concat(CommonConf.get().UNDERFS_DATA_FOLDER, fileId);
              "Thread "
                  + ID
                  + " is checkpointing file "
                  + fileId
                  + " from "
                  + mLocalDataFolder.toString()
                  + " to "
                  + midPath
                  + " to "
                  + dstPath);

          if (mCheckpointUnderFs == null) {
            mCheckpointUnderFs = UnderFileSystem.get(midPath);

          long startCopyTimeMs = System.currentTimeMillis();
          ClientFileInfo fileInfo = mMasterClient.getClientFileInfoById(fileId);
          if (!fileInfo.isComplete) {
            LOG.error("File " + fileInfo + " is not complete!");
          for (int k = 0; k < fileInfo.blockIds.size(); k++) {
            lockBlock(fileInfo.blockIds.get(k), Users.sCHECKPOINT_USER_ID);
          OutputStream os = mCheckpointUnderFs.create(midPath, (int) fileInfo.getBlockSizeByte());
          long fileSizeByte = 0;
          for (int k = 0; k < fileInfo.blockIds.size(); k++) {
            File tempFile =
                new File(CommonUtils.concat(mLocalDataFolder.toString(), fileInfo.blockIds.get(k)));
            fileSizeByte += tempFile.length();
            InputStream is = new FileInputStream(tempFile);
            byte[] buf = new byte[16 * Constants.KB];
            int got = is.read(buf);
            while (got != -1) {
              os.write(buf, 0, got);
              got = is.read(buf);
          if (!mCheckpointUnderFs.rename(midPath, dstPath)) {
            LOG.error("Failed to rename from " + midPath + " to " + dstPath);
          mMasterClient.addCheckpoint(mWorkerId, fileId, fileSizeByte, dstPath);
          for (int k = 0; k < fileInfo.blockIds.size(); k++) {
            unlockBlock(fileInfo.blockIds.get(k), Users.sCHECKPOINT_USER_ID);

          long shouldTakeMs =
                      * fileSizeByte
                      / Constants.MB
                      / WorkerConf.get().WORKER_PER_THREAD_CHECKPOINT_CAP_MB_SEC);
          long currentTimeMs = System.currentTimeMillis();
          if (startCopyTimeMs + shouldTakeMs > currentTimeMs) {
            long shouldSleepMs = startCopyTimeMs + shouldTakeMs - currentTimeMs;
                "Checkpointed last file "
                    + fileId
                    + " took "
                    + (currentTimeMs - startCopyTimeMs)
                    + " ms. Need to sleep "
                    + shouldSleepMs
                    + " ms.");
            CommonUtils.sleepMs(LOG, shouldSleepMs);
        } catch (FileDoesNotExistException e) {
        } catch (SuspectedFileSizeException e) {
        } catch (BlockInfoException e) {
        } catch (IOException e) {
        } catch (TException e) {

  private final Logger LOG = Logger.getLogger(Constants.LOGGER_TYPE);

  private final CommonConf COMMON_CONF;
  private volatile MasterClient mMasterClient;
  private InetSocketAddress mMasterAddress;
  private NetAddress mWorkerAddress;
  private WorkerSpaceCounter mWorkerSpaceCounter;

  private long mWorkerId;
  private Set<Long> mMemoryData = new HashSet<Long>();
  private Map<Long, Long> mBlockSizes = new HashMap<Long, Long>();

  private Map<Long, Long> mLatestBlockAccessTimeMs = new HashMap<Long, Long>();
  private Map<Long, Set<Long>> mUsersPerLockedBlock = new HashMap<Long, Set<Long>>();

  private Map<Long, Set<Long>> mLockedBlocksPerUser = new HashMap<Long, Set<Long>>();
  private BlockingQueue<Long> mRemovedBlockList =
      new ArrayBlockingQueue<Long>(Constants.WORKER_BLOCKS_QUEUE_SIZE);

  private BlockingQueue<Long> mAddedBlockList =
      new ArrayBlockingQueue<Long>(Constants.WORKER_BLOCKS_QUEUE_SIZE);
  private File mLocalDataFolder;
  private File mLocalUserFolder;
  private String mUnderfsWorkerFolder;
  private String mUnderfsWorkerDataFolder;
  private String mUnderfsOrphansFolder;

  private UnderFileSystem mUnderFs;

  private Users mUsers;
  // Dependency related lock
  private Object mDependencyLock = new Object();
  private Set<Integer> mUncheckpointFiles = new HashSet<Integer>();
  // From dependencyId to files in that set.
  private Map<Integer, Set<Integer>> mDepIdToFiles = new HashMap<Integer, Set<Integer>>();

  private List<Integer> mPriorityDependencies = new ArrayList<Integer>();

  private ArrayList<Thread> mCheckpointThreads =
      new ArrayList<Thread>(WorkerConf.get().WORKER_CHECKPOINT_THREADS);

   * Main logic behind the worker process.
   * <p>This object is lazily initialized. Before an object of this call should be used, {@link
   * #initialize()} must be called.
   * @param masterAddress The TachyonMaster's address
   * @param dataFolder This TachyonWorker's local folder's path
   * @param memoryCapacityBytes The maximum memory space this TachyonWorker can use, in bytes
  public WorkerStorage(
      InetSocketAddress masterAddress, String dataFolder, long memoryCapacityBytes) {
    COMMON_CONF = CommonConf.get();

    mMasterAddress = masterAddress;
    mMasterClient = new MasterClient(mMasterAddress);
    mLocalDataFolder = new File(dataFolder);

    mWorkerSpaceCounter = new WorkerSpaceCounter(memoryCapacityBytes);
    mLocalUserFolder = new File(mLocalDataFolder, WorkerConf.get().USER_TEMP_RELATIVE_FOLDER);

  public void initialize(final NetAddress address) {
    mWorkerAddress = address;


    mUnderfsWorkerFolder = CommonUtils.concat(COMMON_CONF.UNDERFS_WORKERS_FOLDER, mWorkerId);
    mUnderfsWorkerDataFolder = mUnderfsWorkerFolder + "/data";
    mUnderFs = UnderFileSystem.get(COMMON_CONF.UNDERFS_ADDRESS);
    mUsers = new Users(mLocalUserFolder.toString(), mUnderfsWorkerFolder);

    for (int k = 0; k < WorkerConf.get().WORKER_CHECKPOINT_THREADS; k++) {
      Thread thread = new Thread(new CheckpointThread(k));

    try {
    } catch (IOException e) {
      throw Throwables.propagate(e);
    } catch (FileDoesNotExistException e) {
      throw Throwables.propagate(e);
    } catch (SuspectedFileSizeException e) {
      throw Throwables.propagate(e);
    } catch (BlockInfoException e) {
      throw Throwables.propagate(e);
    } catch (TException e) {
      throw Throwables.propagate(e);

        "Current Worker Info: ID "
            + mWorkerId
            + ", ADDRESS: "
            + mWorkerAddress
            + ", MemoryCapacityBytes: "
            + mWorkerSpaceCounter.getCapacityBytes());

   * Update the latest block access time on the worker.
   * @param blockId The id of the block
  void accessBlock(long blockId) {
    synchronized (mLatestBlockAccessTimeMs) {
      mLatestBlockAccessTimeMs.put(blockId, System.currentTimeMillis());

  private void addBlockId(long blockId, long fileSizeBytes) {
    synchronized (mLatestBlockAccessTimeMs) {
      mLatestBlockAccessTimeMs.put(blockId, System.currentTimeMillis());
      mBlockSizes.put(blockId, fileSizeBytes);

   * Add the checkpoint information of a file. The information is from the user <code>userId</code>.
   * <p>This method is normally triggered from {@link tachyon.client.FileOutStream#close()} if and
   * only if {@link tachyon.client.WriteType#isThrough()} is true. The current implementation of
   * checkpointing is that through {@link tachyon.client.WriteType} operations write to {@link
   * tachyon.UnderFileSystem} on the client's write path, but under a user temp directory (temp
   * directory is defined in the worker as {@link #getUserUnderfsTempFolder(long)}).
   * @param userId The user id of the client who send the notification
   * @param fileId The id of the checkpointed file
   * @throws FileDoesNotExistException
   * @throws SuspectedFileSizeException
   * @throws FailedToCheckpointException
   * @throws BlockInfoException
   * @throws TException
  public void addCheckpoint(long userId, int fileId)
      throws FileDoesNotExistException, SuspectedFileSizeException, FailedToCheckpointException,
          BlockInfoException, TException {
    // TODO This part need to be changed.
    String srcPath = CommonUtils.concat(getUserUnderfsTempFolder(userId), fileId);
    String dstPath = CommonUtils.concat(COMMON_CONF.UNDERFS_DATA_FOLDER, fileId);
    try {
      if (!mUnderFs.rename(srcPath, dstPath)) {
        throw new FailedToCheckpointException("Failed to rename " + srcPath + " to " + dstPath);
    } catch (IOException e) {
      throw new FailedToCheckpointException("Failed to rename " + srcPath + " to " + dstPath);
    long fileSize;
    try {
      fileSize = mUnderFs.getFileSize(dstPath);
    } catch (IOException e) {
      throw new FailedToCheckpointException("Failed to getFileSize " + dstPath);
    mMasterClient.addCheckpoint(mWorkerId, fileId, fileSize, dstPath);

  private void addFoundBlock(long blockId, long length)
      throws FileDoesNotExistException, SuspectedFileSizeException, BlockInfoException, TException {
    addBlockId(blockId, length);
    mMasterClient.worker_cacheBlock(mWorkerId, mWorkerSpaceCounter.getUsedBytes(), blockId, length);

   * Notify the worker to checkpoint the file asynchronously.
   * @param fileId The id of the file
   * @return true if succeed, false otherwise
   * @throws IOException
   * @throws TException
  public boolean asyncCheckpoint(int fileId) throws IOException, TException {
    ClientFileInfo fileInfo = mMasterClient.getClientFileInfoById(fileId);

    if (fileInfo.getDependencyId() != -1) {
      synchronized (mDependencyLock) {
        if (!mDepIdToFiles.containsKey(fileInfo.getDependencyId())) {
          mDepIdToFiles.put(fileInfo.getDependencyId(), new HashSet<Integer>());
      return true;

    return false;

   * Notify the worker the block is cached.
   * <p>This call is called remotely from {@link tachyon.client.TachyonFS#cacheBlock(long)} which is
   * only ever called from {@link tachyon.client.BlockOutStream#close()} (though its a public api so
   * anyone could call it). There are a few interesting preconditions for this to work.
   * <p>1) Client process writes to files locally under a tachyon defined temp directory. 2) Worker
   * process is on the same node as the client 3) Client is talking to the local worker directly
   * <p>If all conditions are true, then and only then can this method ever be called; all
   * operations work on local files.
   * @param userId The user id of the client who send the notification
   * @param blockId The id of the block
   * @throws FileDoesNotExistException
   * @throws SuspectedFileSizeException
   * @throws BlockInfoException
   * @throws TException
  public void cacheBlock(long userId, long blockId)
      throws FileDoesNotExistException, SuspectedFileSizeException, BlockInfoException, TException {
    File srcFile = new File(CommonUtils.concat(getUserTempFolder(userId), blockId));
    File dstFile = new File(CommonUtils.concat(mLocalDataFolder, blockId));
    long fileSizeBytes = srcFile.length();
    if (!srcFile.exists()) {
      throw new FileDoesNotExistException("File " + srcFile + " does not exist.");
    if (!srcFile.renameTo(dstFile)) {
      throw new FileDoesNotExistException(
          "Failed to rename file from " + srcFile.getPath() + " to " + dstFile.getPath());
    addBlockId(blockId, fileSizeBytes);
    mUsers.addOwnBytes(userId, -fileSizeBytes);
        mWorkerId, mWorkerSpaceCounter.getUsedBytes(), blockId, fileSizeBytes);
    LOG.info(userId + " " + dstFile);

   * Check worker's status. This should be executed periodically.
   * <p>It finds the timeout users and cleans them up.
  public void checkStatus() {
    List<Long> removedUsers = mUsers.checkStatus();

    for (long userId : removedUsers) {
      synchronized (mUsersPerLockedBlock) {
        Set<Long> blockds = mLockedBlocksPerUser.get(userId);
        if (blockds != null) {
          for (long blockId : blockds) {
            try {
              unlockBlock(blockId, userId);
            } catch (TException e) {
              throw Throwables.propagate(e);

   * Remove a block from the memory.
   * @param blockId The block to be removed.
   * @return Removed file size in bytes.
  private long freeBlock(long blockId) {
    long freedFileBytes = 0;
    synchronized (mLatestBlockAccessTimeMs) {
      if (mBlockSizes.containsKey(blockId)) {
        File srcFile = new File(CommonUtils.concat(mLocalDataFolder, blockId));
        freedFileBytes = mBlockSizes.remove(blockId);
        LOG.info("Removed Data " + blockId);
      } else {
        LOG.warn("File " + blockId + " does not exist in memory.");

    return freedFileBytes;

   * Remove blocks from the memory.
   * <p>This is triggered when the worker heartbeats to the master, which sends a {@link
   * tachyon.thrift.Command} with type {@link tachyon.thrift.CommandType#Free}
   * @param blocks The list of blocks to be removed.
  public void freeBlocks(List<Long> blocks) {
    for (long blockId : blocks) {

   * @return The root local data folder of the worker
   * @throws TException
  public String getDataFolder() throws TException {
    return mLocalDataFolder.toString();

  /** @return The orphans' folder in the under file system */
  public String getUnderfsOrphansFolder() {
    return mUnderfsOrphansFolder;

   * Get the local user temporary folder of the specified user.
   * <p>This method is a wrapper around {@link tachyon.Users#getUserTempFolder(long)}, and as such
   * should be referentially transparent with {@link tachyon.Users#getUserTempFolder(long)}. In the
   * context of {@code this}, this call will output the result of path concat of {@link
   * #mLocalUserFolder} with the provided {@literal userId}.
   * <p>This method differs from {@link #getUserUnderfsTempFolder(long)} in the context of where
   * write operations end up. This temp folder generated lives inside the tachyon file system, and
   * as such, will be stored in memory.
   * @see tachyon.Users#getUserTempFolder(long)
   * @param userId The id of the user
   * @return The local user temporary folder of the specified user
   * @throws TException
  public String getUserTempFolder(long userId) throws TException {
    String ret = mUsers.getUserTempFolder(userId);
    LOG.info("Return UserTempFolder for " + userId + " : " + ret);
    return ret;

   * Get the user temporary folder in the under file system of the specified user.
   * <p>This method is a wrapper around {@link tachyon.Users#getUserUnderfsTempFolder(long)}, and as
   * such should be referentially transparent with {@link Users#getUserUnderfsTempFolder(long)}. In
   * the context of {@code this}, this call will output the result of path concat of {@link
   * #mUnderfsWorkerFolder} with the provided {@literal userId}.
   * <p>This method differs from {@link #getUserTempFolder(long)} in the context of where write
   * operations end up. This temp folder generated lives inside the {@link tachyon.UnderFileSystem},
   * and as such, will be stored remotely, most likely on disk.
   * @param userId The id of the user
   * @return The user temporary folder in the under file system
   * @throws TException
  public String getUserUnderfsTempFolder(long userId) throws TException {
    String ret = mUsers.getUserUnderfsTempFolder(userId);
    LOG.info("Return UserHdfsTempFolder for " + userId + " : " + ret);
    return ret;

   * Heartbeat with the TachyonMaster. Send the removed block list to the Master.
   * @return The Command received from the Master
   * @throws BlockInfoException
   * @throws TException
  public Command heartbeat() throws BlockInfoException, TException {
    ArrayList<Long> sendRemovedPartitionList = new ArrayList<Long>();
    while (mRemovedBlockList.size() > 0) {
    return mMasterClient.worker_heartbeat(
        mWorkerId, mWorkerSpaceCounter.getUsedBytes(), sendRemovedPartitionList);

  private void initializeWorkerStorage()
      throws IOException, FileDoesNotExistException, SuspectedFileSizeException, BlockInfoException,
          TException {
    LOG.info("Initializing the worker storage.");
    if (!mLocalDataFolder.exists()) {
      LOG.info("Local folder " + mLocalDataFolder + " does not exist. Creating a new one.");

      CommonUtils.changeLocalFilePermission(mLocalDataFolder.getPath(), "775");
      CommonUtils.changeLocalFilePermission(mLocalUserFolder.getPath(), "775");

    if (!mLocalDataFolder.isDirectory()) {
      String tmp = "Data folder " + mLocalDataFolder + " is not a folder!";
      throw new IllegalArgumentException(tmp);

    if (mLocalUserFolder.exists()) {
      try {
      } catch (IOException e) {
        LOG.error(e.getMessage(), e);
    CommonUtils.changeLocalFilePermission(mLocalUserFolder.getPath(), "775");

    mUnderfsOrphansFolder = mUnderfsWorkerFolder + "/orphans";
    if (!mUnderFs.exists(mUnderfsOrphansFolder)) {
      mUnderFs.mkdirs(mUnderfsOrphansFolder, true);

    int cnt = 0;
    for (File tFile : mLocalDataFolder.listFiles()) {
      if (tFile.isFile()) {
        LOG.info("File " + cnt + ": " + tFile.getPath() + " with size " + tFile.length() + " Bs.");

        long blockId = CommonUtils.getBlockIdFromFileName(tFile.getName());
        boolean success = mWorkerSpaceCounter.requestSpaceBytes(tFile.length());
        try {
          addFoundBlock(blockId, tFile.length());
        } catch (FileDoesNotExistException e) {
          LOG.error("BlockId: " + blockId + " becomes orphan for: \"" + e.message + "\"");
              "Swapout File " + cnt + ": blockId: " + blockId + " to " + mUnderfsOrphansFolder);
          swapoutOrphanBlocks(blockId, tFile);
        if (!success) {
          throw new RuntimeException("Pre-existing files exceed the local memory capacity.");

   * Lock the block
   * <p>Used internally to make sure blocks are unmodified, but also used in {@link
   * tachyon.client.TachyonFS} for cacheing blocks locally for users. When a user tries to read a
   * block ({@link tachyon.client.TachyonFile#readByteBuffer()}), the client will attempt to cache
   * the block on the local users's node, while the user is reading from the local block, the given
   * block is locked and unlocked once read.
   * @param blockId The id of the block
   * @param userId The id of the user who locks the block
   * @throws TException
  public void lockBlock(long blockId, long userId) throws TException {
    synchronized (mUsersPerLockedBlock) {
      if (!mUsersPerLockedBlock.containsKey(blockId)) {
        mUsersPerLockedBlock.put(blockId, new HashSet<Long>());

      if (!mLockedBlocksPerUser.containsKey(userId)) {
        mLockedBlocksPerUser.put(userId, new HashSet<Long>());

   * Use local LRU to evict data, and get <code> requestBytes </code> available space.
   * @param requestBytes The data requested.
   * @return <code> true </code> if the space is granted, <code> false </code> if not.
  private boolean memoryEvictionLRU(long requestBytes) {
    Set<Integer> pinList;

    try {
      pinList = mMasterClient.worker_getPinIdList();
    } catch (TException e) {
      pinList = new HashSet<Integer>();

    synchronized (mLatestBlockAccessTimeMs) {
      synchronized (mUsersPerLockedBlock) {
        while (mWorkerSpaceCounter.getAvailableBytes() < requestBytes) {
          long blockId = -1;
          long latestTimeMs = Long.MAX_VALUE;
          for (Entry<Long, Long> entry : mLatestBlockAccessTimeMs.entrySet()) {
            if (entry.getValue() < latestTimeMs
                && !pinList.contains(BlockInfo.computeInodeId(entry.getKey()))) {
              if (!mUsersPerLockedBlock.containsKey(entry.getKey())) {
                blockId = entry.getKey();
                latestTimeMs = entry.getValue();
          if (blockId != -1) {
          } else {
            return false;

    return true;

  /** Register this TachyonWorker to the TachyonMaster */
  public void register() {
    long id = 0;
    while (id == 0) {
      try {
        id =
                new ArrayList<Long>(mMemoryData));
      } catch (BlockInfoException e) {
        LOG.error(e.getMessage(), e);
        id = 0;
        CommonUtils.sleepMs(LOG, Constants.SECOND_MS);
      } catch (TException e) {
        LOG.error(e.getMessage(), e);
        id = 0;
        CommonUtils.sleepMs(LOG, Constants.SECOND_MS);
    mWorkerId = id;

   * Request space from the worker
   * @param userId The id of the user who send the request
   * @param requestBytes The requested space size, in bytes
   * @return true if succeed, false otherwise
   * @throws TException
  public boolean requestSpace(long userId, long requestBytes) throws TException {
            + userId
            + ", "
            + requestBytes
            + "): Current available: "
            + mWorkerSpaceCounter.getAvailableBytes()
            + " requested: "
            + requestBytes);
    if (mWorkerSpaceCounter.getCapacityBytes() < requestBytes) {
          "user_requestSpace(): requested memory size is larger than the total memory on"
              + " the machine.");
      return false;

    while (!mWorkerSpaceCounter.requestSpaceBytes(requestBytes)) {
      if (!memoryEvictionLRU(requestBytes)) {
        return false;

    mUsers.addOwnBytes(userId, requestBytes);

    return true;

   * Set a new MasterClient and connect to it.
   * @throws TException
  public void resetMasterClient() throws TException {
    MasterClient tMasterClient = new MasterClient(mMasterAddress);
    mMasterClient = tMasterClient;

   * Return the space which has been requested
   * @param userId The id of the user who wants to return the space
   * @param returnedBytes The returned space size, in bytes
   * @throws TException
  public void returnSpace(long userId, long returnedBytes) throws TException {
    long preAvailableBytes = mWorkerSpaceCounter.getAvailableBytes();
    if (returnedBytes > mUsers.ownBytes(userId)) {
      LOG.error("User " + userId + " does not own " + returnedBytes + " bytes.");
    } else {
      mUsers.addOwnBytes(userId, -returnedBytes);

            + userId
            + ", "
            + returnedBytes
            + ") : "
            + preAvailableBytes
            + " returned: "
            + returnedBytes
            + ". New Available: "
            + mWorkerSpaceCounter.getAvailableBytes());

  /** Disconnect to the Master. */
  public void stop() {

   * Swap out those blocks missing INode information onto underFS which can be retrieved by user
   * later. Its cleanup only happens while formating the TFS.
  private void swapoutOrphanBlocks(long blockId, File file) throws IOException {
    RandomAccessFile localFile = new RandomAccessFile(file, "r");
    ByteBuffer buf = localFile.getChannel().map(MapMode.READ_ONLY, 0, file.length());

    String ufsOrphanBlock = CommonUtils.concat(mUnderfsOrphansFolder, blockId);
    OutputStream os = mUnderFs.create(ufsOrphanBlock);
    int BULKSIZE = Constants.KB * 64;
    byte[] bulk = new byte[BULKSIZE];
    for (int k = 0; k < (buf.limit() + BULKSIZE - 1) / BULKSIZE; k++) {
      int len = BULKSIZE < buf.remaining() ? BULKSIZE : buf.remaining();
      buf.get(bulk, 0, len);
      os.write(bulk, 0, len);


   * Unlock the block
   * <p>Used internally to make sure blocks are unmodified, but also used in {@link
   * tachyon.client.TachyonFS} for cacheing blocks locally for users. When a user tries to read a
   * block ({@link tachyon.client.TachyonFile#readByteBuffer()}), the client will attempt to cache
   * the block on the local users's node, while the user is reading from the local block, the given
   * block is locked and unlocked once read.
   * @param blockId The id of the block
   * @param userId The id of the user who unlocks the block
   * @throws TException
  public void unlockBlock(long blockId, long userId) throws TException {
    synchronized (mUsersPerLockedBlock) {
      if (mUsersPerLockedBlock.containsKey(blockId)) {
        if (mUsersPerLockedBlock.get(blockId).size() == 0) {

      if (mLockedBlocksPerUser.containsKey(userId)) {

   * Handle the user's heartbeat.
   * @param userId The id of the user
   * @throws TException
  public void userHeartbeat(long userId) throws TException {
コード例 #9
ファイル: WorkerStorage.java プロジェクト: kinleyma/tachyon
    public void run() {
      while (true) {
        try {
          int fileId = -1;
          synchronized (mDependencyLock) {
            fileId = getFileIdBasedOnPriorityDependency();

            if (fileId == -1) {
              if (mPriorityDependencies.size() == 0) {
                mPriorityDependencies = getSortedPriorityDependencyList();
                if (!mPriorityDependencies.isEmpty()) {
                      "Get new mPriorityDependencies "
                          + CommonUtils.listToString(mPriorityDependencies));
              } else {
                List<Integer> tList = getSortedPriorityDependencyList();
                boolean equal = true;
                if (mPriorityDependencies.size() != tList.size()) {
                  equal = false;
                if (equal) {
                  for (int k = 0; k < tList.size(); k++) {
                    if (tList.get(k) != mPriorityDependencies.get(k)) {
                      equal = false;

                if (!equal) {
                  mPriorityDependencies = tList;

              fileId = getFileIdBasedOnPriorityDependency();

            if (fileId == -1) {
              fileId = getRandomUncheckpointedFile();

          if (fileId == -1) {
            LOG.debug("Thread " + ID + " has nothing to checkpoint. Sleep for 1 sec.");
            CommonUtils.sleepMs(LOG, Constants.SECOND_MS);

          // TODO checkpoint process. In future, move from midPath to dstPath should be done by
          // master
          String midPath = CommonUtils.concat(mUnderfsWorkerDataFolder, fileId);
          String dstPath = CommonUtils.concat(CommonConf.get().UNDERFS_DATA_FOLDER, fileId);
              "Thread "
                  + ID
                  + " is checkpointing file "
                  + fileId
                  + " from "
                  + mLocalDataFolder.toString()
                  + " to "
                  + midPath
                  + " to "
                  + dstPath);

          if (mCheckpointUnderFs == null) {
            mCheckpointUnderFs = UnderFileSystem.get(midPath);

          long startCopyTimeMs = System.currentTimeMillis();
          ClientFileInfo fileInfo = mMasterClient.getClientFileInfoById(fileId);
          if (!fileInfo.isComplete) {
            LOG.error("File " + fileInfo + " is not complete!");
          for (int k = 0; k < fileInfo.blockIds.size(); k++) {
            lockBlock(fileInfo.blockIds.get(k), Users.sCHECKPOINT_USER_ID);
          OutputStream os = mCheckpointUnderFs.create(midPath, (int) fileInfo.getBlockSizeByte());
          long fileSizeByte = 0;
          for (int k = 0; k < fileInfo.blockIds.size(); k++) {
            File tempFile =
                new File(CommonUtils.concat(mLocalDataFolder.toString(), fileInfo.blockIds.get(k)));
            fileSizeByte += tempFile.length();
            InputStream is = new FileInputStream(tempFile);
            byte[] buf = new byte[16 * Constants.KB];
            int got = is.read(buf);
            while (got != -1) {
              os.write(buf, 0, got);
              got = is.read(buf);
          if (!mCheckpointUnderFs.rename(midPath, dstPath)) {
            LOG.error("Failed to rename from " + midPath + " to " + dstPath);
          mMasterClient.addCheckpoint(mWorkerId, fileId, fileSizeByte, dstPath);
          for (int k = 0; k < fileInfo.blockIds.size(); k++) {
            unlockBlock(fileInfo.blockIds.get(k), Users.sCHECKPOINT_USER_ID);

          long shouldTakeMs =
                      * fileSizeByte
                      / Constants.MB
                      / WorkerConf.get().WORKER_PER_THREAD_CHECKPOINT_CAP_MB_SEC);
          long currentTimeMs = System.currentTimeMillis();
          if (startCopyTimeMs + shouldTakeMs > currentTimeMs) {
            long shouldSleepMs = startCopyTimeMs + shouldTakeMs - currentTimeMs;
                "Checkpointed last file "
                    + fileId
                    + " took "
                    + (currentTimeMs - startCopyTimeMs)
                    + " ms. Need to sleep "
                    + shouldSleepMs
                    + " ms.");
            CommonUtils.sleepMs(LOG, shouldSleepMs);
        } catch (FileDoesNotExistException e) {
        } catch (SuspectedFileSizeException e) {
        } catch (BlockInfoException e) {
        } catch (IOException e) {
        } catch (TException e) {