Example #1
0
  @Override
  public void run() {
    long lastHeartbeatMs = System.currentTimeMillis();
    Command cmd = null;
    while (!mStop) {
      long diff = System.currentTimeMillis() - lastHeartbeatMs;
      if (diff < WorkerConf.get().TO_MASTER_HEARTBEAT_INTERVAL_MS) {
        LOG.debug("Heartbeat process takes {} ms.", diff);
        CommonUtils.sleepMs(LOG, WorkerConf.get().TO_MASTER_HEARTBEAT_INTERVAL_MS - diff);
      } else {
        LOG.error("Heartbeat process takes " + diff + " ms.");
      }

      try {
        cmd = mWorkerStorage.heartbeat();

        lastHeartbeatMs = System.currentTimeMillis();
      } catch (IOException e) {
        LOG.error(e.getMessage(), e);
        mWorkerStorage.resetMasterClient();
        CommonUtils.sleepMs(LOG, Constants.SECOND_MS);
        cmd = null;
        if (System.currentTimeMillis() - lastHeartbeatMs >= WorkerConf.get().HEARTBEAT_TIMEOUT_MS) {
          throw new RuntimeException(
              "Timebeat timeout " + (System.currentTimeMillis() - lastHeartbeatMs) + "ms");
        }
      }

      if (cmd != null) {
        switch (cmd.mCommandType) {
          case Unknown:
            LOG.error("Unknown command: " + cmd);
            break;
          case Nothing:
            LOG.debug("Nothing command: {}", cmd);
            break;
          case Register:
            LOG.info("Register command: " + cmd);
            mWorkerStorage.register();
            break;
          case Free:
            mWorkerStorage.freeBlocks(cmd.mData);
            LOG.info("Free command: " + cmd);
            break;
          case Delete:
            LOG.info("Delete command: " + cmd);
            break;
          default:
            throw new RuntimeException("Un-recognized command from master " + cmd.toString());
        }
      }

      mWorkerStorage.checkStatus();
    }
  }
Example #2
0
 private DataServer createDataServer(
     final InetSocketAddress dataAddress, final BlocksLocker blockLocker) {
   switch (WorkerConf.get().NETWORK_TYPE) {
     case NIO:
       return new NIODataServer(dataAddress, blockLocker);
     case NETTY:
       return new NettyDataServer(dataAddress, blockLocker);
     default:
       throw new AssertionError("Unknown network type: " + WorkerConf.get().NETWORK_TYPE);
   }
 }
Example #3
0
  public static void main(String[] args) throws UnknownHostException {
    if (args.length > 1) {
      LOG.info(
          "Usage: java -cp target/tachyon-"
              + Version.VERSION
              + "-jar-with-dependencies.jar "
              + "tachyon.Worker [<MasterHost:Port>]");
      System.exit(-1);
    }

    WorkerConf wConf = WorkerConf.get();

    String resolvedWorkerHost = NetworkUtils.getLocalHostName();
    LOG.info("Resolved local TachyonWorker host to " + resolvedWorkerHost);

    TachyonWorker worker =
        TachyonWorker.createWorker(
            getMasterLocation(args),
            resolvedWorkerHost + ":" + wConf.PORT,
            wConf.DATA_PORT,
            wConf.SELECTOR_THREADS,
            wConf.QUEUE_SIZE_PER_SELECTOR,
            wConf.SERVER_THREADS,
            wConf.DATA_FOLDER,
            wConf.MEMORY_SIZE);
    try {
      worker.start();
    } catch (Exception e) {
      LOG.error("Uncaught exception terminating worker", e);
      throw new RuntimeException(e);
    }
  }
Example #4
0
  /**
   * Main logic behind the worker process.
   *
   * <p>This object is lazily initialized. Before an object of this call should be used, {@link
   * #initialize()} must be called.
   *
   * @param masterAddress The TachyonMaster's address
   * @param dataFolder This TachyonWorker's local folder's path
   * @param memoryCapacityBytes The maximum memory space this TachyonWorker can use, in bytes
   */
  public WorkerStorage(
      InetSocketAddress masterAddress, String dataFolder, long memoryCapacityBytes) {
    COMMON_CONF = CommonConf.get();

    mMasterAddress = masterAddress;
    mMasterClient = new MasterClient(mMasterAddress);
    mLocalDataFolder = new File(dataFolder);

    mWorkerSpaceCounter = new WorkerSpaceCounter(memoryCapacityBytes);
    mLocalUserFolder = new File(mLocalDataFolder, WorkerConf.get().USER_TEMP_RELATIVE_FOLDER);
  }
Example #5
0
 private void login() throws IOException {
   WorkerConf wConf = WorkerConf.get();
   if (wConf.KEYTAB == null || wConf.PRINCIPAL == null) {
     return;
   }
   UnderFileSystem ufs = UnderFileSystem.get(CommonConf.get().UNDERFS_ADDRESS);
   if (ufs instanceof UnderFileSystemHdfs) {
     ((UnderFileSystemHdfs) ufs)
         .login(
             wConf.KEYTAB_KEY,
             wConf.KEYTAB,
             wConf.PRINCIPAL_KEY,
             wConf.PRINCIPAL,
             NetworkUtils.getFqdnHost(mWorkerAddress));
   }
 }
Example #6
0
  public void initialize(final NetAddress address) {
    mWorkerAddress = address;

    register();

    mUnderfsWorkerFolder = CommonUtils.concat(COMMON_CONF.UNDERFS_WORKERS_FOLDER, mWorkerId);
    mUnderfsWorkerDataFolder = mUnderfsWorkerFolder + "/data";
    mUnderFs = UnderFileSystem.get(COMMON_CONF.UNDERFS_ADDRESS);
    mUsers = new Users(mLocalUserFolder.toString(), mUnderfsWorkerFolder);

    for (int k = 0; k < WorkerConf.get().WORKER_CHECKPOINT_THREADS; k++) {
      Thread thread = new Thread(new CheckpointThread(k));
      mCheckpointThreads.add(thread);
      thread.start();
    }

    try {
      initializeWorkerStorage();
    } catch (IOException e) {
      throw Throwables.propagate(e);
    } catch (FileDoesNotExistException e) {
      throw Throwables.propagate(e);
    } catch (SuspectedFileSizeException e) {
      throw Throwables.propagate(e);
    } catch (BlockInfoException e) {
      throw Throwables.propagate(e);
    } catch (TException e) {
      throw Throwables.propagate(e);
    }

    LOG.info(
        "Current Worker Info: ID "
            + mWorkerId
            + ", ADDRESS: "
            + mWorkerAddress
            + ", MemoryCapacityBytes: "
            + mWorkerSpaceCounter.getCapacityBytes());
  }
Example #7
0
 private static String getMasterLocation(String[] args) {
   WorkerConf wConf = WorkerConf.get();
   String confFileMasterLoc = wConf.MASTER_HOSTNAME + ":" + wConf.MASTER_PORT;
   String masterLocation;
   if (args.length < 1) {
     masterLocation = confFileMasterLoc;
   } else {
     masterLocation = args[0];
     if (masterLocation.indexOf(":") == -1) {
       masterLocation += ":" + wConf.MASTER_PORT;
     }
     if (!masterLocation.equals(confFileMasterLoc)) {
       LOG.warn(
           "Master Address in configuration file("
               + confFileMasterLoc
               + ") is different "
               + "from the command line one("
               + masterLocation
               + ").");
     }
   }
   return masterLocation;
 }
Example #8
0
/** The structure to store a worker's information in worker node. */
public class WorkerStorage {
  /** The CheckpointThread, used to checkpoint the files belong to the worker. */
  public class CheckpointThread implements Runnable {
    private final Logger LOG = Logger.getLogger(Constants.LOGGER_TYPE);
    private final int ID;
    private UnderFileSystem mCheckpointUnderFs = null;

    public CheckpointThread(int id) {
      ID = id;
    }

    // This method assumes the mDependencyLock has been acquired.
    private int getFileIdBasedOnPriorityDependency() throws TException {
      if (mPriorityDependencies.isEmpty()) {
        return -1;
      }
      for (int depId : mPriorityDependencies) {
        int fileId = getFileIdFromOneDependency(depId);
        if (fileId != -1) {
          return fileId;
        }
      }
      return -1;
    }

    // This method assumes the mDependencyLock has been acquired.
    private int getFileIdFromOneDependency(int depId) throws TException {
      Set<Integer> fileIds = mDepIdToFiles.get(depId);
      if (fileIds != null && !fileIds.isEmpty()) {
        int fileId = fileIds.iterator().next();
        fileIds.remove(fileId);
        mUncheckpointFiles.remove(fileId);
        if (fileIds.isEmpty()) {
          mDepIdToFiles.remove(depId);
        }
        return fileId;
      }
      return -1;
    }

    // This method assumes the mDependencyLock has been acquired.
    private int getRandomUncheckpointedFile() throws TException {
      if (mUncheckpointFiles.isEmpty()) {
        return -1;
      }
      for (int depId : mDepIdToFiles.keySet()) {
        int fileId = getFileIdFromOneDependency(depId);
        if (fileId != -1) {
          return fileId;
        }
      }
      return -1;
    }

    private List<Integer> getSortedPriorityDependencyList() throws TException {
      List<Integer> ret = mMasterClient.worker_getPriorityDependencyList();
      for (int i = 0; i < ret.size(); i++) {
        for (int j = i + 1; j < ret.size(); j++) {
          if (ret.get(i) < ret.get(j)) {
            int k = ret.get(i);
            ret.set(i, ret.get(j));
            ret.set(j, k);
          }
        }
      }
      return ret;
    }

    @Override
    public void run() {
      while (true) {
        try {
          int fileId = -1;
          synchronized (mDependencyLock) {
            fileId = getFileIdBasedOnPriorityDependency();

            if (fileId == -1) {
              if (mPriorityDependencies.size() == 0) {
                mPriorityDependencies = getSortedPriorityDependencyList();
                if (!mPriorityDependencies.isEmpty()) {
                  LOG.info(
                      "Get new mPriorityDependencies "
                          + CommonUtils.listToString(mPriorityDependencies));
                }
              } else {
                List<Integer> tList = getSortedPriorityDependencyList();
                boolean equal = true;
                if (mPriorityDependencies.size() != tList.size()) {
                  equal = false;
                }
                if (equal) {
                  for (int k = 0; k < tList.size(); k++) {
                    if (tList.get(k) != mPriorityDependencies.get(k)) {
                      equal = false;
                      break;
                    }
                  }
                }

                if (!equal) {
                  mPriorityDependencies = tList;
                }
              }

              fileId = getFileIdBasedOnPriorityDependency();
            }

            if (fileId == -1) {
              fileId = getRandomUncheckpointedFile();
            }
          }

          if (fileId == -1) {
            LOG.debug("Thread " + ID + " has nothing to checkpoint. Sleep for 1 sec.");
            CommonUtils.sleepMs(LOG, Constants.SECOND_MS);
            continue;
          }

          // TODO checkpoint process. In future, move from midPath to dstPath should be done by
          // master
          String midPath = CommonUtils.concat(mUnderfsWorkerDataFolder, fileId);
          String dstPath = CommonUtils.concat(CommonConf.get().UNDERFS_DATA_FOLDER, fileId);
          LOG.info(
              "Thread "
                  + ID
                  + " is checkpointing file "
                  + fileId
                  + " from "
                  + mLocalDataFolder.toString()
                  + " to "
                  + midPath
                  + " to "
                  + dstPath);

          if (mCheckpointUnderFs == null) {
            mCheckpointUnderFs = UnderFileSystem.get(midPath);
          }

          long startCopyTimeMs = System.currentTimeMillis();
          ClientFileInfo fileInfo = mMasterClient.getClientFileInfoById(fileId);
          if (!fileInfo.isComplete) {
            LOG.error("File " + fileInfo + " is not complete!");
            continue;
          }
          for (int k = 0; k < fileInfo.blockIds.size(); k++) {
            lockBlock(fileInfo.blockIds.get(k), Users.sCHECKPOINT_USER_ID);
          }
          OutputStream os = mCheckpointUnderFs.create(midPath, (int) fileInfo.getBlockSizeByte());
          long fileSizeByte = 0;
          for (int k = 0; k < fileInfo.blockIds.size(); k++) {
            File tempFile =
                new File(CommonUtils.concat(mLocalDataFolder.toString(), fileInfo.blockIds.get(k)));
            fileSizeByte += tempFile.length();
            InputStream is = new FileInputStream(tempFile);
            byte[] buf = new byte[16 * Constants.KB];
            int got = is.read(buf);
            while (got != -1) {
              os.write(buf, 0, got);
              got = is.read(buf);
            }
            is.close();
          }
          os.close();
          if (!mCheckpointUnderFs.rename(midPath, dstPath)) {
            LOG.error("Failed to rename from " + midPath + " to " + dstPath);
          }
          mMasterClient.addCheckpoint(mWorkerId, fileId, fileSizeByte, dstPath);
          for (int k = 0; k < fileInfo.blockIds.size(); k++) {
            unlockBlock(fileInfo.blockIds.get(k), Users.sCHECKPOINT_USER_ID);
          }

          long shouldTakeMs =
              (long)
                  (1000.0
                      * fileSizeByte
                      / Constants.MB
                      / WorkerConf.get().WORKER_PER_THREAD_CHECKPOINT_CAP_MB_SEC);
          long currentTimeMs = System.currentTimeMillis();
          if (startCopyTimeMs + shouldTakeMs > currentTimeMs) {
            long shouldSleepMs = startCopyTimeMs + shouldTakeMs - currentTimeMs;
            LOG.info(
                "Checkpointed last file "
                    + fileId
                    + " took "
                    + (currentTimeMs - startCopyTimeMs)
                    + " ms. Need to sleep "
                    + shouldSleepMs
                    + " ms.");
            CommonUtils.sleepMs(LOG, shouldSleepMs);
          }
        } catch (FileDoesNotExistException e) {
          LOG.warn(e);
        } catch (SuspectedFileSizeException e) {
          LOG.error(e);
        } catch (BlockInfoException e) {
          LOG.error(e);
        } catch (IOException e) {
          LOG.error(e);
        } catch (TException e) {
          LOG.warn(e);
        }
      }
    }
  }

  private final Logger LOG = Logger.getLogger(Constants.LOGGER_TYPE);

  private final CommonConf COMMON_CONF;
  private volatile MasterClient mMasterClient;
  private InetSocketAddress mMasterAddress;
  private NetAddress mWorkerAddress;
  private WorkerSpaceCounter mWorkerSpaceCounter;

  private long mWorkerId;
  private Set<Long> mMemoryData = new HashSet<Long>();
  private Map<Long, Long> mBlockSizes = new HashMap<Long, Long>();

  private Map<Long, Long> mLatestBlockAccessTimeMs = new HashMap<Long, Long>();
  private Map<Long, Set<Long>> mUsersPerLockedBlock = new HashMap<Long, Set<Long>>();

  private Map<Long, Set<Long>> mLockedBlocksPerUser = new HashMap<Long, Set<Long>>();
  private BlockingQueue<Long> mRemovedBlockList =
      new ArrayBlockingQueue<Long>(Constants.WORKER_BLOCKS_QUEUE_SIZE);

  private BlockingQueue<Long> mAddedBlockList =
      new ArrayBlockingQueue<Long>(Constants.WORKER_BLOCKS_QUEUE_SIZE);
  private File mLocalDataFolder;
  private File mLocalUserFolder;
  private String mUnderfsWorkerFolder;
  private String mUnderfsWorkerDataFolder;
  private String mUnderfsOrphansFolder;

  private UnderFileSystem mUnderFs;

  private Users mUsers;
  // Dependency related lock
  private Object mDependencyLock = new Object();
  private Set<Integer> mUncheckpointFiles = new HashSet<Integer>();
  // From dependencyId to files in that set.
  private Map<Integer, Set<Integer>> mDepIdToFiles = new HashMap<Integer, Set<Integer>>();

  private List<Integer> mPriorityDependencies = new ArrayList<Integer>();

  private ArrayList<Thread> mCheckpointThreads =
      new ArrayList<Thread>(WorkerConf.get().WORKER_CHECKPOINT_THREADS);

  /**
   * Main logic behind the worker process.
   *
   * <p>This object is lazily initialized. Before an object of this call should be used, {@link
   * #initialize()} must be called.
   *
   * @param masterAddress The TachyonMaster's address
   * @param dataFolder This TachyonWorker's local folder's path
   * @param memoryCapacityBytes The maximum memory space this TachyonWorker can use, in bytes
   */
  public WorkerStorage(
      InetSocketAddress masterAddress, String dataFolder, long memoryCapacityBytes) {
    COMMON_CONF = CommonConf.get();

    mMasterAddress = masterAddress;
    mMasterClient = new MasterClient(mMasterAddress);
    mLocalDataFolder = new File(dataFolder);

    mWorkerSpaceCounter = new WorkerSpaceCounter(memoryCapacityBytes);
    mLocalUserFolder = new File(mLocalDataFolder, WorkerConf.get().USER_TEMP_RELATIVE_FOLDER);
  }

  public void initialize(final NetAddress address) {
    mWorkerAddress = address;

    register();

    mUnderfsWorkerFolder = CommonUtils.concat(COMMON_CONF.UNDERFS_WORKERS_FOLDER, mWorkerId);
    mUnderfsWorkerDataFolder = mUnderfsWorkerFolder + "/data";
    mUnderFs = UnderFileSystem.get(COMMON_CONF.UNDERFS_ADDRESS);
    mUsers = new Users(mLocalUserFolder.toString(), mUnderfsWorkerFolder);

    for (int k = 0; k < WorkerConf.get().WORKER_CHECKPOINT_THREADS; k++) {
      Thread thread = new Thread(new CheckpointThread(k));
      mCheckpointThreads.add(thread);
      thread.start();
    }

    try {
      initializeWorkerStorage();
    } catch (IOException e) {
      throw Throwables.propagate(e);
    } catch (FileDoesNotExistException e) {
      throw Throwables.propagate(e);
    } catch (SuspectedFileSizeException e) {
      throw Throwables.propagate(e);
    } catch (BlockInfoException e) {
      throw Throwables.propagate(e);
    } catch (TException e) {
      throw Throwables.propagate(e);
    }

    LOG.info(
        "Current Worker Info: ID "
            + mWorkerId
            + ", ADDRESS: "
            + mWorkerAddress
            + ", MemoryCapacityBytes: "
            + mWorkerSpaceCounter.getCapacityBytes());
  }

  /**
   * Update the latest block access time on the worker.
   *
   * @param blockId The id of the block
   */
  void accessBlock(long blockId) {
    synchronized (mLatestBlockAccessTimeMs) {
      mLatestBlockAccessTimeMs.put(blockId, System.currentTimeMillis());
    }
  }

  private void addBlockId(long blockId, long fileSizeBytes) {
    synchronized (mLatestBlockAccessTimeMs) {
      mLatestBlockAccessTimeMs.put(blockId, System.currentTimeMillis());
      mBlockSizes.put(blockId, fileSizeBytes);
      mMemoryData.add(blockId);
    }
  }

  /**
   * Add the checkpoint information of a file. The information is from the user <code>userId</code>.
   *
   * <p>This method is normally triggered from {@link tachyon.client.FileOutStream#close()} if and
   * only if {@link tachyon.client.WriteType#isThrough()} is true. The current implementation of
   * checkpointing is that through {@link tachyon.client.WriteType} operations write to {@link
   * tachyon.UnderFileSystem} on the client's write path, but under a user temp directory (temp
   * directory is defined in the worker as {@link #getUserUnderfsTempFolder(long)}).
   *
   * @param userId The user id of the client who send the notification
   * @param fileId The id of the checkpointed file
   * @throws FileDoesNotExistException
   * @throws SuspectedFileSizeException
   * @throws FailedToCheckpointException
   * @throws BlockInfoException
   * @throws TException
   */
  public void addCheckpoint(long userId, int fileId)
      throws FileDoesNotExistException, SuspectedFileSizeException, FailedToCheckpointException,
          BlockInfoException, TException {
    // TODO This part need to be changed.
    String srcPath = CommonUtils.concat(getUserUnderfsTempFolder(userId), fileId);
    String dstPath = CommonUtils.concat(COMMON_CONF.UNDERFS_DATA_FOLDER, fileId);
    try {
      if (!mUnderFs.rename(srcPath, dstPath)) {
        throw new FailedToCheckpointException("Failed to rename " + srcPath + " to " + dstPath);
      }
    } catch (IOException e) {
      throw new FailedToCheckpointException("Failed to rename " + srcPath + " to " + dstPath);
    }
    long fileSize;
    try {
      fileSize = mUnderFs.getFileSize(dstPath);
    } catch (IOException e) {
      throw new FailedToCheckpointException("Failed to getFileSize " + dstPath);
    }
    mMasterClient.addCheckpoint(mWorkerId, fileId, fileSize, dstPath);
  }

  private void addFoundBlock(long blockId, long length)
      throws FileDoesNotExistException, SuspectedFileSizeException, BlockInfoException, TException {
    addBlockId(blockId, length);
    mMasterClient.worker_cacheBlock(mWorkerId, mWorkerSpaceCounter.getUsedBytes(), blockId, length);
  }

  /**
   * Notify the worker to checkpoint the file asynchronously.
   *
   * @param fileId The id of the file
   * @return true if succeed, false otherwise
   * @throws IOException
   * @throws TException
   */
  public boolean asyncCheckpoint(int fileId) throws IOException, TException {
    ClientFileInfo fileInfo = mMasterClient.getClientFileInfoById(fileId);

    if (fileInfo.getDependencyId() != -1) {
      synchronized (mDependencyLock) {
        mUncheckpointFiles.add(fileId);
        if (!mDepIdToFiles.containsKey(fileInfo.getDependencyId())) {
          mDepIdToFiles.put(fileInfo.getDependencyId(), new HashSet<Integer>());
        }
        mDepIdToFiles.get(fileInfo.getDependencyId()).add(fileId);
      }
      return true;
    }

    return false;
  }

  /**
   * Notify the worker the block is cached.
   *
   * <p>This call is called remotely from {@link tachyon.client.TachyonFS#cacheBlock(long)} which is
   * only ever called from {@link tachyon.client.BlockOutStream#close()} (though its a public api so
   * anyone could call it). There are a few interesting preconditions for this to work.
   *
   * <p>1) Client process writes to files locally under a tachyon defined temp directory. 2) Worker
   * process is on the same node as the client 3) Client is talking to the local worker directly
   *
   * <p>If all conditions are true, then and only then can this method ever be called; all
   * operations work on local files.
   *
   * @param userId The user id of the client who send the notification
   * @param blockId The id of the block
   * @throws FileDoesNotExistException
   * @throws SuspectedFileSizeException
   * @throws BlockInfoException
   * @throws TException
   */
  public void cacheBlock(long userId, long blockId)
      throws FileDoesNotExistException, SuspectedFileSizeException, BlockInfoException, TException {
    File srcFile = new File(CommonUtils.concat(getUserTempFolder(userId), blockId));
    File dstFile = new File(CommonUtils.concat(mLocalDataFolder, blockId));
    long fileSizeBytes = srcFile.length();
    if (!srcFile.exists()) {
      throw new FileDoesNotExistException("File " + srcFile + " does not exist.");
    }
    if (!srcFile.renameTo(dstFile)) {
      throw new FileDoesNotExistException(
          "Failed to rename file from " + srcFile.getPath() + " to " + dstFile.getPath());
    }
    addBlockId(blockId, fileSizeBytes);
    mUsers.addOwnBytes(userId, -fileSizeBytes);
    mMasterClient.worker_cacheBlock(
        mWorkerId, mWorkerSpaceCounter.getUsedBytes(), blockId, fileSizeBytes);
    LOG.info(userId + " " + dstFile);
  }

  /**
   * Check worker's status. This should be executed periodically.
   *
   * <p>It finds the timeout users and cleans them up.
   */
  public void checkStatus() {
    List<Long> removedUsers = mUsers.checkStatus();

    for (long userId : removedUsers) {
      mWorkerSpaceCounter.returnUsedBytes(mUsers.removeUser(userId));
      synchronized (mUsersPerLockedBlock) {
        Set<Long> blockds = mLockedBlocksPerUser.get(userId);
        mLockedBlocksPerUser.remove(userId);
        if (blockds != null) {
          for (long blockId : blockds) {
            try {
              unlockBlock(blockId, userId);
            } catch (TException e) {
              throw Throwables.propagate(e);
            }
          }
        }
      }
    }
  }

  /**
   * Remove a block from the memory.
   *
   * @param blockId The block to be removed.
   * @return Removed file size in bytes.
   */
  private long freeBlock(long blockId) {
    long freedFileBytes = 0;
    synchronized (mLatestBlockAccessTimeMs) {
      if (mBlockSizes.containsKey(blockId)) {
        mWorkerSpaceCounter.returnUsedBytes(mBlockSizes.get(blockId));
        File srcFile = new File(CommonUtils.concat(mLocalDataFolder, blockId));
        srcFile.delete();
        mLatestBlockAccessTimeMs.remove(blockId);
        freedFileBytes = mBlockSizes.remove(blockId);
        mRemovedBlockList.add(blockId);
        mMemoryData.remove(blockId);
        LOG.info("Removed Data " + blockId);
      } else {
        LOG.warn("File " + blockId + " does not exist in memory.");
      }
    }

    return freedFileBytes;
  }

  /**
   * Remove blocks from the memory.
   *
   * <p>This is triggered when the worker heartbeats to the master, which sends a {@link
   * tachyon.thrift.Command} with type {@link tachyon.thrift.CommandType#Free}
   *
   * @param blocks The list of blocks to be removed.
   */
  public void freeBlocks(List<Long> blocks) {
    for (long blockId : blocks) {
      freeBlock(blockId);
    }
  }

  /**
   * @return The root local data folder of the worker
   * @throws TException
   */
  public String getDataFolder() throws TException {
    return mLocalDataFolder.toString();
  }

  /** @return The orphans' folder in the under file system */
  public String getUnderfsOrphansFolder() {
    return mUnderfsOrphansFolder;
  }

  /**
   * Get the local user temporary folder of the specified user.
   *
   * <p>This method is a wrapper around {@link tachyon.Users#getUserTempFolder(long)}, and as such
   * should be referentially transparent with {@link tachyon.Users#getUserTempFolder(long)}. In the
   * context of {@code this}, this call will output the result of path concat of {@link
   * #mLocalUserFolder} with the provided {@literal userId}.
   *
   * <p>This method differs from {@link #getUserUnderfsTempFolder(long)} in the context of where
   * write operations end up. This temp folder generated lives inside the tachyon file system, and
   * as such, will be stored in memory.
   *
   * @see tachyon.Users#getUserTempFolder(long)
   * @param userId The id of the user
   * @return The local user temporary folder of the specified user
   * @throws TException
   */
  public String getUserTempFolder(long userId) throws TException {
    String ret = mUsers.getUserTempFolder(userId);
    LOG.info("Return UserTempFolder for " + userId + " : " + ret);
    return ret;
  }

  /**
   * Get the user temporary folder in the under file system of the specified user.
   *
   * <p>This method is a wrapper around {@link tachyon.Users#getUserUnderfsTempFolder(long)}, and as
   * such should be referentially transparent with {@link Users#getUserUnderfsTempFolder(long)}. In
   * the context of {@code this}, this call will output the result of path concat of {@link
   * #mUnderfsWorkerFolder} with the provided {@literal userId}.
   *
   * <p>This method differs from {@link #getUserTempFolder(long)} in the context of where write
   * operations end up. This temp folder generated lives inside the {@link tachyon.UnderFileSystem},
   * and as such, will be stored remotely, most likely on disk.
   *
   * @param userId The id of the user
   * @return The user temporary folder in the under file system
   * @throws TException
   */
  public String getUserUnderfsTempFolder(long userId) throws TException {
    String ret = mUsers.getUserUnderfsTempFolder(userId);
    LOG.info("Return UserHdfsTempFolder for " + userId + " : " + ret);
    return ret;
  }

  /**
   * Heartbeat with the TachyonMaster. Send the removed block list to the Master.
   *
   * @return The Command received from the Master
   * @throws BlockInfoException
   * @throws TException
   */
  public Command heartbeat() throws BlockInfoException, TException {
    ArrayList<Long> sendRemovedPartitionList = new ArrayList<Long>();
    while (mRemovedBlockList.size() > 0) {
      sendRemovedPartitionList.add(mRemovedBlockList.poll());
    }
    return mMasterClient.worker_heartbeat(
        mWorkerId, mWorkerSpaceCounter.getUsedBytes(), sendRemovedPartitionList);
  }

  private void initializeWorkerStorage()
      throws IOException, FileDoesNotExistException, SuspectedFileSizeException, BlockInfoException,
          TException {
    LOG.info("Initializing the worker storage.");
    if (!mLocalDataFolder.exists()) {
      LOG.info("Local folder " + mLocalDataFolder + " does not exist. Creating a new one.");
      mLocalDataFolder.mkdirs();
      mLocalUserFolder.mkdirs();

      CommonUtils.changeLocalFilePermission(mLocalDataFolder.getPath(), "775");
      CommonUtils.changeLocalFilePermission(mLocalUserFolder.getPath(), "775");
      return;
    }

    if (!mLocalDataFolder.isDirectory()) {
      String tmp = "Data folder " + mLocalDataFolder + " is not a folder!";
      LOG.error(tmp);
      throw new IllegalArgumentException(tmp);
    }

    if (mLocalUserFolder.exists()) {
      try {
        FileUtils.deleteDirectory(mLocalUserFolder);
      } catch (IOException e) {
        LOG.error(e.getMessage(), e);
      }
    }
    mLocalUserFolder.mkdir();
    CommonUtils.changeLocalFilePermission(mLocalUserFolder.getPath(), "775");

    mUnderfsOrphansFolder = mUnderfsWorkerFolder + "/orphans";
    if (!mUnderFs.exists(mUnderfsOrphansFolder)) {
      mUnderFs.mkdirs(mUnderfsOrphansFolder, true);
    }

    int cnt = 0;
    for (File tFile : mLocalDataFolder.listFiles()) {
      if (tFile.isFile()) {
        cnt++;
        LOG.info("File " + cnt + ": " + tFile.getPath() + " with size " + tFile.length() + " Bs.");

        long blockId = CommonUtils.getBlockIdFromFileName(tFile.getName());
        boolean success = mWorkerSpaceCounter.requestSpaceBytes(tFile.length());
        try {
          addFoundBlock(blockId, tFile.length());
        } catch (FileDoesNotExistException e) {
          LOG.error("BlockId: " + blockId + " becomes orphan for: \"" + e.message + "\"");
          LOG.info(
              "Swapout File " + cnt + ": blockId: " + blockId + " to " + mUnderfsOrphansFolder);
          swapoutOrphanBlocks(blockId, tFile);
          freeBlock(blockId);
          continue;
        }
        mAddedBlockList.add(blockId);
        if (!success) {
          throw new RuntimeException("Pre-existing files exceed the local memory capacity.");
        }
      }
    }
  }

  /**
   * Lock the block
   *
   * <p>Used internally to make sure blocks are unmodified, but also used in {@link
   * tachyon.client.TachyonFS} for cacheing blocks locally for users. When a user tries to read a
   * block ({@link tachyon.client.TachyonFile#readByteBuffer()}), the client will attempt to cache
   * the block on the local users's node, while the user is reading from the local block, the given
   * block is locked and unlocked once read.
   *
   * @param blockId The id of the block
   * @param userId The id of the user who locks the block
   * @throws TException
   */
  public void lockBlock(long blockId, long userId) throws TException {
    synchronized (mUsersPerLockedBlock) {
      if (!mUsersPerLockedBlock.containsKey(blockId)) {
        mUsersPerLockedBlock.put(blockId, new HashSet<Long>());
      }
      mUsersPerLockedBlock.get(blockId).add(userId);

      if (!mLockedBlocksPerUser.containsKey(userId)) {
        mLockedBlocksPerUser.put(userId, new HashSet<Long>());
      }
      mLockedBlocksPerUser.get(userId).add(blockId);
    }
  }

  /**
   * Use local LRU to evict data, and get <code> requestBytes </code> available space.
   *
   * @param requestBytes The data requested.
   * @return <code> true </code> if the space is granted, <code> false </code> if not.
   */
  private boolean memoryEvictionLRU(long requestBytes) {
    Set<Integer> pinList;

    try {
      pinList = mMasterClient.worker_getPinIdList();
    } catch (TException e) {
      LOG.error(e.getMessage());
      pinList = new HashSet<Integer>();
    }

    synchronized (mLatestBlockAccessTimeMs) {
      synchronized (mUsersPerLockedBlock) {
        while (mWorkerSpaceCounter.getAvailableBytes() < requestBytes) {
          long blockId = -1;
          long latestTimeMs = Long.MAX_VALUE;
          for (Entry<Long, Long> entry : mLatestBlockAccessTimeMs.entrySet()) {
            if (entry.getValue() < latestTimeMs
                && !pinList.contains(BlockInfo.computeInodeId(entry.getKey()))) {
              if (!mUsersPerLockedBlock.containsKey(entry.getKey())) {
                blockId = entry.getKey();
                latestTimeMs = entry.getValue();
              }
            }
          }
          if (blockId != -1) {
            freeBlock(blockId);
          } else {
            return false;
          }
        }
      }
    }

    return true;
  }

  /** Register this TachyonWorker to the TachyonMaster */
  public void register() {
    long id = 0;
    while (id == 0) {
      try {
        mMasterClient.connect();
        id =
            mMasterClient.worker_register(
                mWorkerAddress,
                mWorkerSpaceCounter.getCapacityBytes(),
                mWorkerSpaceCounter.getUsedBytes(),
                new ArrayList<Long>(mMemoryData));
      } catch (BlockInfoException e) {
        LOG.error(e.getMessage(), e);
        id = 0;
        CommonUtils.sleepMs(LOG, Constants.SECOND_MS);
      } catch (TException e) {
        LOG.error(e.getMessage(), e);
        id = 0;
        CommonUtils.sleepMs(LOG, Constants.SECOND_MS);
      }
    }
    mWorkerId = id;
  }

  /**
   * Request space from the worker
   *
   * @param userId The id of the user who send the request
   * @param requestBytes The requested space size, in bytes
   * @return true if succeed, false otherwise
   * @throws TException
   */
  public boolean requestSpace(long userId, long requestBytes) throws TException {
    LOG.info(
        "requestSpace("
            + userId
            + ", "
            + requestBytes
            + "): Current available: "
            + mWorkerSpaceCounter.getAvailableBytes()
            + " requested: "
            + requestBytes);
    if (mWorkerSpaceCounter.getCapacityBytes() < requestBytes) {
      LOG.info(
          "user_requestSpace(): requested memory size is larger than the total memory on"
              + " the machine.");
      return false;
    }

    while (!mWorkerSpaceCounter.requestSpaceBytes(requestBytes)) {
      if (!memoryEvictionLRU(requestBytes)) {
        return false;
      }
    }

    mUsers.addOwnBytes(userId, requestBytes);

    return true;
  }

  /**
   * Set a new MasterClient and connect to it.
   *
   * @throws TException
   */
  public void resetMasterClient() throws TException {
    MasterClient tMasterClient = new MasterClient(mMasterAddress);
    tMasterClient.connect();
    mMasterClient = tMasterClient;
  }

  /**
   * Return the space which has been requested
   *
   * @param userId The id of the user who wants to return the space
   * @param returnedBytes The returned space size, in bytes
   * @throws TException
   */
  public void returnSpace(long userId, long returnedBytes) throws TException {
    long preAvailableBytes = mWorkerSpaceCounter.getAvailableBytes();
    if (returnedBytes > mUsers.ownBytes(userId)) {
      LOG.error("User " + userId + " does not own " + returnedBytes + " bytes.");
    } else {
      mWorkerSpaceCounter.returnUsedBytes(returnedBytes);
      mUsers.addOwnBytes(userId, -returnedBytes);
    }

    LOG.info(
        "returnSpace("
            + userId
            + ", "
            + returnedBytes
            + ") : "
            + preAvailableBytes
            + " returned: "
            + returnedBytes
            + ". New Available: "
            + mWorkerSpaceCounter.getAvailableBytes());
  }

  /** Disconnect to the Master. */
  public void stop() {
    mMasterClient.shutdown();
  }

  /**
   * Swap out those blocks missing INode information onto underFS which can be retrieved by user
   * later. Its cleanup only happens while formating the TFS.
   */
  private void swapoutOrphanBlocks(long blockId, File file) throws IOException {
    RandomAccessFile localFile = new RandomAccessFile(file, "r");
    ByteBuffer buf = localFile.getChannel().map(MapMode.READ_ONLY, 0, file.length());

    String ufsOrphanBlock = CommonUtils.concat(mUnderfsOrphansFolder, blockId);
    OutputStream os = mUnderFs.create(ufsOrphanBlock);
    int BULKSIZE = Constants.KB * 64;
    byte[] bulk = new byte[BULKSIZE];
    for (int k = 0; k < (buf.limit() + BULKSIZE - 1) / BULKSIZE; k++) {
      int len = BULKSIZE < buf.remaining() ? BULKSIZE : buf.remaining();
      buf.get(bulk, 0, len);
      os.write(bulk, 0, len);
    }
    os.close();

    localFile.close();
  }

  /**
   * Unlock the block
   *
   * <p>Used internally to make sure blocks are unmodified, but also used in {@link
   * tachyon.client.TachyonFS} for cacheing blocks locally for users. When a user tries to read a
   * block ({@link tachyon.client.TachyonFile#readByteBuffer()}), the client will attempt to cache
   * the block on the local users's node, while the user is reading from the local block, the given
   * block is locked and unlocked once read.
   *
   * @param blockId The id of the block
   * @param userId The id of the user who unlocks the block
   * @throws TException
   */
  public void unlockBlock(long blockId, long userId) throws TException {
    synchronized (mUsersPerLockedBlock) {
      if (mUsersPerLockedBlock.containsKey(blockId)) {
        mUsersPerLockedBlock.get(blockId).remove(userId);
        if (mUsersPerLockedBlock.get(blockId).size() == 0) {
          mUsersPerLockedBlock.remove(blockId);
        }
      }

      if (mLockedBlocksPerUser.containsKey(userId)) {
        mLockedBlocksPerUser.get(userId).remove(blockId);
      }
    }
  }

  /**
   * Handle the user's heartbeat.
   *
   * @param userId The id of the user
   * @throws TException
   */
  public void userHeartbeat(long userId) throws TException {
    mUsers.userHeartbeat(userId);
  }
}
Example #9
0
    @Override
    public void run() {
      while (true) {
        try {
          int fileId = -1;
          synchronized (mDependencyLock) {
            fileId = getFileIdBasedOnPriorityDependency();

            if (fileId == -1) {
              if (mPriorityDependencies.size() == 0) {
                mPriorityDependencies = getSortedPriorityDependencyList();
                if (!mPriorityDependencies.isEmpty()) {
                  LOG.info(
                      "Get new mPriorityDependencies "
                          + CommonUtils.listToString(mPriorityDependencies));
                }
              } else {
                List<Integer> tList = getSortedPriorityDependencyList();
                boolean equal = true;
                if (mPriorityDependencies.size() != tList.size()) {
                  equal = false;
                }
                if (equal) {
                  for (int k = 0; k < tList.size(); k++) {
                    if (tList.get(k) != mPriorityDependencies.get(k)) {
                      equal = false;
                      break;
                    }
                  }
                }

                if (!equal) {
                  mPriorityDependencies = tList;
                }
              }

              fileId = getFileIdBasedOnPriorityDependency();
            }

            if (fileId == -1) {
              fileId = getRandomUncheckpointedFile();
            }
          }

          if (fileId == -1) {
            LOG.debug("Thread " + ID + " has nothing to checkpoint. Sleep for 1 sec.");
            CommonUtils.sleepMs(LOG, Constants.SECOND_MS);
            continue;
          }

          // TODO checkpoint process. In future, move from midPath to dstPath should be done by
          // master
          String midPath = CommonUtils.concat(mUnderfsWorkerDataFolder, fileId);
          String dstPath = CommonUtils.concat(CommonConf.get().UNDERFS_DATA_FOLDER, fileId);
          LOG.info(
              "Thread "
                  + ID
                  + " is checkpointing file "
                  + fileId
                  + " from "
                  + mLocalDataFolder.toString()
                  + " to "
                  + midPath
                  + " to "
                  + dstPath);

          if (mCheckpointUnderFs == null) {
            mCheckpointUnderFs = UnderFileSystem.get(midPath);
          }

          long startCopyTimeMs = System.currentTimeMillis();
          ClientFileInfo fileInfo = mMasterClient.getClientFileInfoById(fileId);
          if (!fileInfo.isComplete) {
            LOG.error("File " + fileInfo + " is not complete!");
            continue;
          }
          for (int k = 0; k < fileInfo.blockIds.size(); k++) {
            lockBlock(fileInfo.blockIds.get(k), Users.sCHECKPOINT_USER_ID);
          }
          OutputStream os = mCheckpointUnderFs.create(midPath, (int) fileInfo.getBlockSizeByte());
          long fileSizeByte = 0;
          for (int k = 0; k < fileInfo.blockIds.size(); k++) {
            File tempFile =
                new File(CommonUtils.concat(mLocalDataFolder.toString(), fileInfo.blockIds.get(k)));
            fileSizeByte += tempFile.length();
            InputStream is = new FileInputStream(tempFile);
            byte[] buf = new byte[16 * Constants.KB];
            int got = is.read(buf);
            while (got != -1) {
              os.write(buf, 0, got);
              got = is.read(buf);
            }
            is.close();
          }
          os.close();
          if (!mCheckpointUnderFs.rename(midPath, dstPath)) {
            LOG.error("Failed to rename from " + midPath + " to " + dstPath);
          }
          mMasterClient.addCheckpoint(mWorkerId, fileId, fileSizeByte, dstPath);
          for (int k = 0; k < fileInfo.blockIds.size(); k++) {
            unlockBlock(fileInfo.blockIds.get(k), Users.sCHECKPOINT_USER_ID);
          }

          long shouldTakeMs =
              (long)
                  (1000.0
                      * fileSizeByte
                      / Constants.MB
                      / WorkerConf.get().WORKER_PER_THREAD_CHECKPOINT_CAP_MB_SEC);
          long currentTimeMs = System.currentTimeMillis();
          if (startCopyTimeMs + shouldTakeMs > currentTimeMs) {
            long shouldSleepMs = startCopyTimeMs + shouldTakeMs - currentTimeMs;
            LOG.info(
                "Checkpointed last file "
                    + fileId
                    + " took "
                    + (currentTimeMs - startCopyTimeMs)
                    + " ms. Need to sleep "
                    + shouldSleepMs
                    + " ms.");
            CommonUtils.sleepMs(LOG, shouldSleepMs);
          }
        } catch (FileDoesNotExistException e) {
          LOG.warn(e);
        } catch (SuspectedFileSizeException e) {
          LOG.error(e);
        } catch (BlockInfoException e) {
          LOG.error(e);
        } catch (IOException e) {
          LOG.error(e);
        } catch (TException e) {
          LOG.warn(e);
        }
      }
    }