Example #1
0
  /**
   * Open the connection to the worker. And start the heartbeat thread.
   *
   * @return true if succeed, false otherwise
   * @throws IOException
   */
  private synchronized boolean connect() throws IOException {
    if (!mConnected) {
      NetAddress workerNetAddress = null;
      try {
        String localHostName = NetworkAddressUtils.getLocalHostName(mTachyonConf);
        LOG.info("Trying to get local worker host : " + localHostName);
        workerNetAddress = mMasterClient.user_getWorker(false, localHostName);
        mIsLocal =
            workerNetAddress
                .getMHost()
                .equals(InetAddress.getByName(localHostName).getHostAddress());
      } catch (NoWorkerException e) {
        LOG.info(e.getMessage());
        workerNetAddress = null;
      } catch (UnknownHostException e) {
        LOG.info(e.getMessage());
        workerNetAddress = null;
      }

      if (workerNetAddress == null) {
        try {
          workerNetAddress = mMasterClient.user_getWorker(true, "");
        } catch (NoWorkerException e) {
          LOG.info("No worker running in the system: " + e.getMessage());
          mClient = null;
          return false;
        }
      }

      String host = NetworkAddressUtils.getFqdnHost(workerNetAddress);
      int port = workerNetAddress.mPort;
      mWorkerAddress = new InetSocketAddress(host, port);
      mWorkerDataServerAddress = new InetSocketAddress(host, workerNetAddress.mSecondaryPort);
      LOG.info("Connecting " + (mIsLocal ? "local" : "remote") + " worker @ " + mWorkerAddress);

      mProtocol = new TBinaryProtocol(new TFramedTransport(new TSocket(host, port)));
      mClient = new WorkerService.Client(mProtocol);

      mHeartbeatExecutor = new WorkerClientHeartbeatExecutor(this, mMasterClient.getUserId());
      String threadName = "worker-heartbeat-" + mWorkerAddress;
      int interval = mTachyonConf.getInt(Constants.USER_HEARTBEAT_INTERVAL_MS, Constants.SECOND_MS);
      mHeartbeat =
          mExecutorService.submit(new HeartbeatThread(threadName, mHeartbeatExecutor, interval));

      try {
        mProtocol.getTransport().open();
      } catch (TTransportException e) {
        LOG.error(e.getMessage(), e);
        return false;
      }
      mConnected = true;
    }

    return mConnected;
  }
Example #2
0
  /**
   * Use local LRU to evict data, and get <code> requestBytes </code> available space.
   *
   * @param requestBytes The data requested.
   * @return <code> true </code> if the space is granted, <code> false </code> if not.
   */
  private boolean memoryEvictionLRU(long requestBytes) {
    Set<Integer> pinList;

    try {
      pinList = mMasterClient.worker_getPinIdList();
    } catch (TException e) {
      LOG.error(e.getMessage());
      pinList = new HashSet<Integer>();
    }

    synchronized (mLatestBlockAccessTimeMs) {
      synchronized (mUsersPerLockedBlock) {
        while (mWorkerSpaceCounter.getAvailableBytes() < requestBytes) {
          long blockId = -1;
          long latestTimeMs = Long.MAX_VALUE;
          for (Entry<Long, Long> entry : mLatestBlockAccessTimeMs.entrySet()) {
            if (entry.getValue() < latestTimeMs
                && !pinList.contains(BlockInfo.computeInodeId(entry.getKey()))) {
              if (!mUsersPerLockedBlock.containsKey(entry.getKey())) {
                blockId = entry.getKey();
                latestTimeMs = entry.getValue();
              }
            }
          }
          if (blockId != -1) {
            freeBlock(blockId);
          } else {
            return false;
          }
        }
      }
    }

    return true;
  }
Example #3
0
 /**
  * Heartbeat with the TachyonMaster. Send the removed block list to the Master.
  *
  * @return The Command received from the Master
  * @throws BlockInfoException
  * @throws TException
  */
 public Command heartbeat() throws BlockInfoException, TException {
   ArrayList<Long> sendRemovedPartitionList = new ArrayList<Long>();
   while (mRemovedBlockList.size() > 0) {
     sendRemovedPartitionList.add(mRemovedBlockList.poll());
   }
   return mMasterClient.worker_heartbeat(
       mWorkerId, mWorkerSpaceCounter.getUsedBytes(), sendRemovedPartitionList);
 }
Example #4
0
  /**
   * Unlock the block
   *
   * @param blockId The id of the block
   * @return true if success, false otherwise
   * @throws IOException
   */
  public synchronized boolean unlockBlock(long blockId) throws IOException {
    mustConnect();

    try {
      return mClient.unlockBlock(blockId, mMasterClient.getUserId());
    } catch (TException e) {
      mConnected = false;
      throw new IOException(e);
    }
  }
Example #5
0
  /**
   * Get the user temporary folder in the under file system of the specified user.
   *
   * @return The user temporary folder in the under file system
   * @throws IOException
   */
  public synchronized String getUserUfsTempFolder() throws IOException {
    mustConnect();

    try {
      return mClient.getUserUfsTempFolder(mMasterClient.getUserId());
    } catch (TException e) {
      mConnected = false;
      throw new IOException(e);
    }
  }
Example #6
0
  /**
   * Notify worker that the block has been cancelled
   *
   * @param blockId The Id of the block to be cancelled
   * @throws IOException
   */
  public synchronized void cancelBlock(long blockId) throws IOException {
    mustConnect();

    try {
      mClient.cancelBlock(mMasterClient.getUserId(), blockId);
    } catch (TException e) {
      mConnected = false;
      throw new IOException(e);
    }
  }
Example #7
0
  /**
   * Lock the block, therefore, the worker will not evict the block from the memory until it is
   * unlocked.
   *
   * @param blockId The id of the block
   * @return the path of the block file locked
   * @throws IOException
   */
  public synchronized String lockBlock(long blockId) throws IOException {
    mustConnect();

    try {
      return mClient.lockBlock(blockId, mMasterClient.getUserId());
    } catch (FileDoesNotExistException e) {
      return null;
    } catch (TException e) {
      mConnected = false;
      throw new IOException(e);
    }
  }
Example #8
0
 /** Register this TachyonWorker to the TachyonMaster */
 public void register() {
   long id = 0;
   while (id == 0) {
     try {
       mMasterClient.connect();
       id =
           mMasterClient.worker_register(
               mWorkerAddress,
               mWorkerSpaceCounter.getCapacityBytes(),
               mWorkerSpaceCounter.getUsedBytes(),
               new ArrayList<Long>(mMemoryData));
     } catch (BlockInfoException e) {
       LOG.error(e.getMessage(), e);
       id = 0;
       CommonUtils.sleepMs(LOG, Constants.SECOND_MS);
     } catch (TException e) {
       LOG.error(e.getMessage(), e);
       id = 0;
       CommonUtils.sleepMs(LOG, Constants.SECOND_MS);
     }
   }
   mWorkerId = id;
 }
Example #9
0
  /**
   * Notify the worker the block is cached.
   *
   * @param blockId The id of the block
   * @throws IOException
   */
  public synchronized void cacheBlock(long blockId) throws IOException {
    mustConnect();

    try {
      mClient.cacheBlock(mMasterClient.getUserId(), blockId);
    } catch (FileDoesNotExistException e) {
      throw new IOException(e);
    } catch (BlockInfoException e) {
      throw new IOException(e);
    } catch (TException e) {
      mConnected = false;
      throw new IOException(e);
    }
  }
Example #10
0
  /**
   * Request space for some block from worker
   *
   * @param blockId The id of the block
   * @param requestBytes The requested space size, in bytes
   * @return true if success, false otherwise
   * @throws IOException
   */
  public synchronized boolean requestSpace(long blockId, long requestBytes) throws IOException {
    mustConnect();

    try {
      return mClient.requestSpace(mMasterClient.getUserId(), blockId, requestBytes);
    } catch (OutOfSpaceException e) {
      return false;
    } catch (FileDoesNotExistException e) {
      throw new IOException(e);
    } catch (TException e) {
      mConnected = false;
      throw new IOException(e);
    }
  }
Example #11
0
  /**
   * Get temporary path for the block from the worker
   *
   * @param blockId The id of the block
   * @param initialBytes The initial size bytes allocated for the block
   * @return the temporary path of the block
   * @throws IOException
   */
  public synchronized String requestBlockLocation(long blockId, long initialBytes)
      throws IOException {
    mustConnect();

    try {
      return mClient.requestBlockLocation(mMasterClient.getUserId(), blockId, initialBytes);
    } catch (OutOfSpaceException e) {
      throw new IOException(e);
    } catch (FileAlreadyExistException e) {
      throw new IOException(e);
    } catch (TException e) {
      mConnected = false;
      throw new IOException(e);
    }
  }
Example #12
0
  /**
   * Notify the worker to checkpoint the file asynchronously.
   *
   * @param fileId The id of the file
   * @return true if succeed, false otherwise
   * @throws IOException
   * @throws TException
   */
  public boolean asyncCheckpoint(int fileId) throws IOException, TException {
    ClientFileInfo fileInfo = mMasterClient.getClientFileInfoById(fileId);

    if (fileInfo.getDependencyId() != -1) {
      synchronized (mDependencyLock) {
        mUncheckpointFiles.add(fileId);
        if (!mDepIdToFiles.containsKey(fileInfo.getDependencyId())) {
          mDepIdToFiles.put(fileInfo.getDependencyId(), new HashSet<Integer>());
        }
        mDepIdToFiles.get(fileInfo.getDependencyId()).add(fileId);
      }
      return true;
    }

    return false;
  }
Example #13
0
 /**
  * Notify the worker the block is cached.
  *
  * <p>This call is called remotely from {@link tachyon.client.TachyonFS#cacheBlock(long)} which is
  * only ever called from {@link tachyon.client.BlockOutStream#close()} (though its a public api so
  * anyone could call it). There are a few interesting preconditions for this to work.
  *
  * <p>1) Client process writes to files locally under a tachyon defined temp directory. 2) Worker
  * process is on the same node as the client 3) Client is talking to the local worker directly
  *
  * <p>If all conditions are true, then and only then can this method ever be called; all
  * operations work on local files.
  *
  * @param userId The user id of the client who send the notification
  * @param blockId The id of the block
  * @throws FileDoesNotExistException
  * @throws SuspectedFileSizeException
  * @throws BlockInfoException
  * @throws TException
  */
 public void cacheBlock(long userId, long blockId)
     throws FileDoesNotExistException, SuspectedFileSizeException, BlockInfoException, TException {
   File srcFile = new File(CommonUtils.concat(getUserTempFolder(userId), blockId));
   File dstFile = new File(CommonUtils.concat(mLocalDataFolder, blockId));
   long fileSizeBytes = srcFile.length();
   if (!srcFile.exists()) {
     throw new FileDoesNotExistException("File " + srcFile + " does not exist.");
   }
   if (!srcFile.renameTo(dstFile)) {
     throw new FileDoesNotExistException(
         "Failed to rename file from " + srcFile.getPath() + " to " + dstFile.getPath());
   }
   addBlockId(blockId, fileSizeBytes);
   mUsers.addOwnBytes(userId, -fileSizeBytes);
   mMasterClient.worker_cacheBlock(
       mWorkerId, mWorkerSpaceCounter.getUsedBytes(), blockId, fileSizeBytes);
   LOG.info(userId + " " + dstFile);
 }
Example #14
0
  /**
   * Notify the worker that the checkpoint file of the file has been added.
   *
   * @param fileId The id of the checkpointed file
   * @throws IOException
   */
  public synchronized void addCheckpoint(int fileId) throws IOException {
    mustConnect();

    try {
      mClient.addCheckpoint(mMasterClient.getUserId(), fileId);
    } catch (FileDoesNotExistException e) {
      throw new IOException(e);
    } catch (SuspectedFileSizeException e) {
      throw new IOException(e);
    } catch (FailedToCheckpointException e) {
      throw new IOException(e);
    } catch (BlockInfoException e) {
      throw new IOException(e);
    } catch (TException e) {
      mConnected = false;
      throw new IOException(e);
    }
  }
Example #15
0
 /**
  * Add the checkpoint information of a file. The information is from the user <code>userId</code>.
  *
  * <p>This method is normally triggered from {@link tachyon.client.FileOutStream#close()} if and
  * only if {@link tachyon.client.WriteType#isThrough()} is true. The current implementation of
  * checkpointing is that through {@link tachyon.client.WriteType} operations write to {@link
  * tachyon.UnderFileSystem} on the client's write path, but under a user temp directory (temp
  * directory is defined in the worker as {@link #getUserUnderfsTempFolder(long)}).
  *
  * @param userId The user id of the client who send the notification
  * @param fileId The id of the checkpointed file
  * @throws FileDoesNotExistException
  * @throws SuspectedFileSizeException
  * @throws FailedToCheckpointException
  * @throws BlockInfoException
  * @throws TException
  */
 public void addCheckpoint(long userId, int fileId)
     throws FileDoesNotExistException, SuspectedFileSizeException, FailedToCheckpointException,
         BlockInfoException, TException {
   // TODO This part need to be changed.
   String srcPath = CommonUtils.concat(getUserUnderfsTempFolder(userId), fileId);
   String dstPath = CommonUtils.concat(COMMON_CONF.UNDERFS_DATA_FOLDER, fileId);
   try {
     if (!mUnderFs.rename(srcPath, dstPath)) {
       throw new FailedToCheckpointException("Failed to rename " + srcPath + " to " + dstPath);
     }
   } catch (IOException e) {
     throw new FailedToCheckpointException("Failed to rename " + srcPath + " to " + dstPath);
   }
   long fileSize;
   try {
     fileSize = mUnderFs.getFileSize(dstPath);
   } catch (IOException e) {
     throw new FailedToCheckpointException("Failed to getFileSize " + dstPath);
   }
   mMasterClient.addCheckpoint(mWorkerId, fileId, fileSize, dstPath);
 }
Example #16
0
 /**
  * Set a new MasterClient and connect to it.
  *
  * @throws TException
  */
 public void resetMasterClient() throws TException {
   MasterClient tMasterClient = new MasterClient(mMasterAddress);
   tMasterClient.connect();
   mMasterClient = tMasterClient;
 }
Example #17
0
 /** Disconnect to the Master. */
 public void stop() {
   mMasterClient.shutdown();
 }
Example #18
0
 private void addFoundBlock(long blockId, long length)
     throws FileDoesNotExistException, SuspectedFileSizeException, BlockInfoException, TException {
   addBlockId(blockId, length);
   mMasterClient.worker_cacheBlock(mWorkerId, mWorkerSpaceCounter.getUsedBytes(), blockId, length);
 }