Beispiel #1
0
  /**
   * Updates metadata when a worker periodically heartbeats with the master.
   *
   * @param workerId the worker id
   * @param usedBytesOnTiers a mapping from tier alias to the used bytes
   * @param removedBlockIds a list of block ids removed from this worker
   * @param addedBlocksOnTiers a mapping from tier alias to the added blocks
   * @return an optional command for the worker to execute
   */
  public Command workerHeartbeat(
      long workerId,
      Map<String, Long> usedBytesOnTiers,
      List<Long> removedBlockIds,
      Map<String, List<Long>> addedBlocksOnTiers) {
    MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
    if (worker == null) {
      LOG.warn("Could not find worker id: {} for heartbeat.", workerId);
      return new Command(CommandType.Register, new ArrayList<Long>());
    }

    synchronized (worker) {
      // Technically, 'worker' should be confirmed to still be in the data structure. Lost worker
      // detection can remove it. However, we are intentionally ignoring this race, since the worker
      // will just re-register regardless.
      processWorkerRemovedBlocks(worker, removedBlockIds);
      processWorkerAddedBlocks(worker, addedBlocksOnTiers);

      worker.updateUsedBytes(usedBytesOnTiers);
      worker.updateLastUpdatedTimeMs();

      List<Long> toRemoveBlocks = worker.getToRemoveBlocks();
      if (toRemoveBlocks.isEmpty()) {
        return new Command(CommandType.Nothing, new ArrayList<Long>());
      }
      return new Command(CommandType.Free, toRemoveBlocks);
    }
  }
Beispiel #2
0
  // TODO(binfan): check the logic is correct or not when commitBlock is a retry
  public void commitBlock(
      long workerId, long usedBytesOnTier, String tierAlias, long blockId, long length)
      throws NoWorkerException {
    LOG.debug(
        "Commit block from workerId: {}, usedBytesOnTier: {}, blockId: {}, length: {}",
        workerId,
        usedBytesOnTier,
        blockId,
        length);

    long counter = AsyncJournalWriter.INVALID_FLUSH_COUNTER;

    MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
    // TODO(peis): Check lost workers as well.
    if (worker == null) {
      throw new NoWorkerException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId));
    }

    // Lock the worker metadata first.
    synchronized (worker) {
      // Loop until block metadata is successfully locked.
      for (; ; ) {
        boolean newBlock = false;
        MasterBlockInfo block = mBlocks.get(blockId);
        if (block == null) {
          // The block metadata doesn't exist yet.
          block = new MasterBlockInfo(blockId, length);
          newBlock = true;
        }

        // Lock the block metadata.
        synchronized (block) {
          boolean writeJournal = false;
          if (newBlock) {
            if (mBlocks.putIfAbsent(blockId, block) != null) {
              // Another thread already inserted the metadata for this block, so start loop over.
              continue;
            }
            // Successfully added the new block metadata. Append a journal entry for the new
            // metadata.
            writeJournal = true;
          } else if (block.getLength() != length && block.getLength() == Constants.UNKNOWN_SIZE) {
            // The block size was previously unknown. Update the block size with the committed
            // size, and append a journal entry.
            block.updateLength(length);
            writeJournal = true;
          }
          if (writeJournal) {
            BlockInfoEntry blockInfo =
                BlockInfoEntry.newBuilder().setBlockId(blockId).setLength(length).build();
            counter = appendJournalEntry(JournalEntry.newBuilder().setBlockInfo(blockInfo).build());
          }
          // At this point, both the worker and the block metadata are locked.

          // Update the block metadata with the new worker location.
          block.addWorker(workerId, tierAlias);
          // This worker has this block, so it is no longer lost.
          mLostBlocks.remove(blockId);

          // Update the worker information for this new block.
          // TODO(binfan): when retry commitBlock on master is expected, make sure metrics are not
          // double counted.
          worker.addBlock(blockId);
          worker.updateUsedBytes(tierAlias, usedBytesOnTier);
          worker.updateLastUpdatedTimeMs();
        }
        break;
      }
    }

    waitForJournalFlush(counter);
  }