Exemple #1
0
 /**
  * Creates a {@link BlockInfo} form a given {@link MasterBlockInfo}, by populating worker
  * locations.
  *
  * @param masterBlockInfo the {@link MasterBlockInfo}
  * @return a {@link BlockInfo} from a {@link MasterBlockInfo}. Populates worker locations
  */
 @GuardedBy("masterBlockInfo")
 private BlockInfo generateBlockInfo(MasterBlockInfo masterBlockInfo) {
   // "Join" to get all the addresses of the workers.
   List<BlockLocation> locations = new ArrayList<>();
   List<MasterBlockLocation> blockLocations = masterBlockInfo.getBlockLocations();
   // Sort the block locations by their alias ordinal in the master storage tier mapping
   Collections.sort(
       blockLocations,
       new Comparator<MasterBlockLocation>() {
         @Override
         public int compare(MasterBlockLocation o1, MasterBlockLocation o2) {
           return mGlobalStorageTierAssoc.getOrdinal(o1.getTierAlias())
               - mGlobalStorageTierAssoc.getOrdinal(o2.getTierAlias());
         }
       });
   for (MasterBlockLocation masterBlockLocation : blockLocations) {
     MasterWorkerInfo workerInfo =
         mWorkers.getFirstByField(ID_INDEX, masterBlockLocation.getWorkerId());
     if (workerInfo != null) {
       // worker metadata is intentionally not locked here because:
       // - it would be an incorrect order (correct order is lock worker first, then block)
       // - only uses getters of final variables
       locations.add(
           new BlockLocation()
               .setWorkerId(masterBlockLocation.getWorkerId())
               .setWorkerAddress(workerInfo.getWorkerAddress())
               .setTierAlias(masterBlockLocation.getTierAlias()));
     }
   }
   return new BlockInfo()
       .setBlockId(masterBlockInfo.getBlockId())
       .setLength(masterBlockInfo.getLength())
       .setLocations(locations);
 }
Exemple #2
0
 /**
  * Updates the worker and block metadata for blocks removed from a worker.
  *
  * @param workerInfo The worker metadata object
  * @param removedBlockIds A list of block ids removed from the worker
  */
 @GuardedBy("workerInfo")
 private void processWorkerRemovedBlocks(
     MasterWorkerInfo workerInfo, Collection<Long> removedBlockIds) {
   for (long removedBlockId : removedBlockIds) {
     MasterBlockInfo block = mBlocks.get(removedBlockId);
     // TODO(calvin): Investigate if this branching logic can be simplified.
     if (block == null) {
       // LOG.warn("Worker {} informs the removed block {}, but block metadata does not exist"
       //    + " on Master!", workerInfo.getId(), removedBlockId);
       // TODO(pfxuan): [ALLUXIO-1804] should find a better way to handle the removed blocks.
       // Ideally, the delete/free I/O flow should never reach this point. Because Master may
       // update the block metadata only after receiving the acknowledgement from Workers.
       workerInfo.removeBlock(removedBlockId);
       // Continue to remove the remaining blocks.
       continue;
     }
     synchronized (block) {
       LOG.info("Block {} is removed on worker {}.", removedBlockId, workerInfo.getId());
       workerInfo.removeBlock(block.getBlockId());
       block.removeWorker(workerInfo.getId());
       if (block.getNumLocations() == 0) {
         mLostBlocks.add(removedBlockId);
       }
     }
   }
 }
Exemple #3
0
  /**
   * Updates metadata when a worker registers with the master.
   *
   * @param workerId the worker id of the worker registering
   * @param storageTiers a list of storage tier aliases in order of their position in the worker's
   *     hierarchy
   * @param totalBytesOnTiers a mapping from storage tier alias to total bytes
   * @param usedBytesOnTiers a mapping from storage tier alias to the used byes
   * @param currentBlocksOnTiers a mapping from storage tier alias to a list of blocks
   * @throws NoWorkerException if workerId cannot be found
   */
  public void workerRegister(
      long workerId,
      List<String> storageTiers,
      Map<String, Long> totalBytesOnTiers,
      Map<String, Long> usedBytesOnTiers,
      Map<String, List<Long>> currentBlocksOnTiers)
      throws NoWorkerException {
    MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
    if (worker == null) {
      throw new NoWorkerException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId));
    }

    // Gather all blocks on this worker.
    HashSet<Long> blocks = new HashSet<>();
    for (List<Long> blockIds : currentBlocksOnTiers.values()) {
      blocks.addAll(blockIds);
    }

    synchronized (worker) {
      worker.updateLastUpdatedTimeMs();
      // Detect any lost blocks on this worker.
      Set<Long> removedBlocks =
          worker.register(
              mGlobalStorageTierAssoc, storageTiers, totalBytesOnTiers, usedBytesOnTiers, blocks);
      processWorkerRemovedBlocks(worker, removedBlocks);
      processWorkerAddedBlocks(worker, currentBlocksOnTiers);
    }

    LOG.info("registerWorker(): {}", worker);
  }
Exemple #4
0
  /**
   * Updates metadata when a worker periodically heartbeats with the master.
   *
   * @param workerId the worker id
   * @param usedBytesOnTiers a mapping from tier alias to the used bytes
   * @param removedBlockIds a list of block ids removed from this worker
   * @param addedBlocksOnTiers a mapping from tier alias to the added blocks
   * @return an optional command for the worker to execute
   */
  public Command workerHeartbeat(
      long workerId,
      Map<String, Long> usedBytesOnTiers,
      List<Long> removedBlockIds,
      Map<String, List<Long>> addedBlocksOnTiers) {
    MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
    if (worker == null) {
      LOG.warn("Could not find worker id: {} for heartbeat.", workerId);
      return new Command(CommandType.Register, new ArrayList<Long>());
    }

    synchronized (worker) {
      // Technically, 'worker' should be confirmed to still be in the data structure. Lost worker
      // detection can remove it. However, we are intentionally ignoring this race, since the worker
      // will just re-register regardless.
      processWorkerRemovedBlocks(worker, removedBlockIds);
      processWorkerAddedBlocks(worker, addedBlocksOnTiers);

      worker.updateUsedBytes(usedBytesOnTiers);
      worker.updateLastUpdatedTimeMs();

      List<Long> toRemoveBlocks = worker.getToRemoveBlocks();
      if (toRemoveBlocks.isEmpty()) {
        return new Command(CommandType.Nothing, new ArrayList<Long>());
      }
      return new Command(CommandType.Free, toRemoveBlocks);
    }
  }
Exemple #5
0
  /**
   * Returns a worker id for the given worker.
   *
   * @param workerNetAddress the worker {@link WorkerNetAddress}
   * @return the worker id for this worker
   */
  public long getWorkerId(WorkerNetAddress workerNetAddress) {
    // TODO(gpang): This NetAddress cloned in case thrift re-uses the object. Does thrift re-use it?
    MasterWorkerInfo existingWorker = mWorkers.getFirstByField(ADDRESS_INDEX, workerNetAddress);
    if (existingWorker != null) {
      // This worker address is already mapped to a worker id.
      long oldWorkerId = existingWorker.getId();
      LOG.warn("The worker {} already exists as id {}.", workerNetAddress, oldWorkerId);
      return oldWorkerId;
    }

    MasterWorkerInfo lostWorker = mLostWorkers.getFirstByField(ADDRESS_INDEX, workerNetAddress);
    if (lostWorker != null) {
      // this is one of the lost workers
      synchronized (lostWorker) {
        final long lostWorkerId = lostWorker.getId();
        LOG.warn("A lost worker {} has requested its old id {}.", workerNetAddress, lostWorkerId);

        // Update the timestamp of the worker before it is considered an active worker.
        lostWorker.updateLastUpdatedTimeMs();
        mWorkers.add(lostWorker);
        mLostWorkers.remove(lostWorker);
        return lostWorkerId;
      }
    }

    // Generate a new worker id.
    long workerId = mNextWorkerId.getAndIncrement();
    mWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress));

    LOG.info("getWorkerId(): WorkerNetAddress: {} id: {}", workerNetAddress, workerId);
    return workerId;
  }
Exemple #6
0
 /**
  * Gets info about the lost workers.
  *
  * @return a set of worker info
  */
 public Set<WorkerInfo> getLostWorkersInfo() {
   Set<WorkerInfo> ret = new HashSet<>(mLostWorkers.size());
   for (MasterWorkerInfo worker : mLostWorkers) {
     synchronized (worker) {
       ret.add(worker.generateClientWorkerInfo());
     }
   }
   return ret;
 }
Exemple #7
0
 /** @return the total used bytes on all tiers, on all workers of Alluxio */
 public long getUsedBytes() {
   long ret = 0;
   for (MasterWorkerInfo worker : mWorkers) {
     synchronized (worker) {
       ret += worker.getUsedBytes();
     }
   }
   return ret;
 }
Exemple #8
0
 /** @return a list of {@link WorkerInfo} objects representing the workers in Alluxio */
 public List<WorkerInfo> getWorkerInfoList() {
   List<WorkerInfo> workerInfoList = new ArrayList<>(mWorkers.size());
   for (MasterWorkerInfo worker : mWorkers) {
     synchronized (worker) {
       workerInfoList.add(worker.generateClientWorkerInfo());
     }
   }
   return workerInfoList;
 }
Exemple #9
0
 /** @return the used bytes on each storage tier */
 public Map<String, Long> getUsedBytesOnTiers() {
   Map<String, Long> ret = new HashMap<>();
   for (MasterWorkerInfo worker : mWorkers) {
     synchronized (worker) {
       for (Map.Entry<String, Long> entry : worker.getUsedBytesOnTiers().entrySet()) {
         Long used = ret.get(entry.getKey());
         ret.put(entry.getKey(), (used == null ? 0L : used) + entry.getValue());
       }
     }
   }
   return ret;
 }
Exemple #10
0
 @Override
 public void heartbeat() {
   int masterWorkerTimeoutMs = Configuration.getInt(Constants.MASTER_WORKER_TIMEOUT_MS);
   for (MasterWorkerInfo worker : mWorkers) {
     synchronized (worker) {
       final long lastUpdate = CommonUtils.getCurrentMs() - worker.getLastUpdatedTimeMs();
       if (lastUpdate > masterWorkerTimeoutMs) {
         LOG.error(
             "The worker {} timed out after {}ms without a heartbeat!", worker, lastUpdate);
         mLostWorkers.add(worker);
         mWorkers.remove(worker);
         processWorkerRemovedBlocks(worker, worker.getBlocks());
       }
     }
   }
 }
Exemple #11
0
 /**
  * Updates the worker and block metadata for blocks added to a worker.
  *
  * @param workerInfo The worker metadata object
  * @param addedBlockIds A mapping from storage tier alias to a list of block ids added
  */
 @GuardedBy("workerInfo")
 private void processWorkerAddedBlocks(
     MasterWorkerInfo workerInfo, Map<String, List<Long>> addedBlockIds) {
   for (Map.Entry<String, List<Long>> entry : addedBlockIds.entrySet()) {
     for (long blockId : entry.getValue()) {
       MasterBlockInfo block = mBlocks.get(blockId);
       if (block != null) {
         synchronized (block) {
           workerInfo.addBlock(blockId);
           block.addWorker(workerInfo.getId(), entry.getKey());
           mLostBlocks.remove(blockId);
         }
       } else {
         LOG.warn("Failed to register workerId: {} to blockId: {}", workerInfo.getId(), blockId);
       }
     }
   }
 }
Exemple #12
0
  /**
   * Removes blocks from workers.
   *
   * @param blockIds a list of block ids to remove from Alluxio space
   * @param delete whether to delete blocks metadata in Master
   */
  public void removeBlocks(List<Long> blockIds, boolean delete) {
    for (long blockId : blockIds) {
      MasterBlockInfo block = mBlocks.get(blockId);
      if (block == null) {
        continue;
      }
      HashSet<Long> workerIds = new HashSet<>();
      synchronized (block) {
        // Technically, 'block' should be confirmed to still be in the data structure. A
        // concurrent removeBlock call can remove it. However, we are intentionally ignoring this
        // race, since deleting the same block again is a noop.
        workerIds.addAll(block.getWorkers());
        // Two cases here:
        // 1) For delete: delete the block metadata.
        // 2) For free: keep the block metadata. mLostBlocks will be changed in
        // processWorkerRemovedBlocks
        if (delete) {
          // Make sure blockId is removed from mLostBlocks when the block metadata is deleted.
          // Otherwise blockId in mLostBlock can be dangling index if the metadata is gone.
          mLostBlocks.remove(blockId);
          mBlocks.remove(blockId);
        }
      }

      // Outside of locking the block. This does not have to be synchronized with the block
      // metadata, since it is essentially an asynchronous signal to the worker to remove the block.
      for (long workerId : workerIds) {
        MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
        if (worker != null) {
          synchronized (worker) {
            worker.updateToRemovedBlock(true, blockId);
          }
        }
      }
    }
  }
Exemple #13
0
 @Override
 public Object getFieldValue(MasterWorkerInfo o) {
   return o.getWorkerAddress();
 }
Exemple #14
0
  // TODO(binfan): check the logic is correct or not when commitBlock is a retry
  public void commitBlock(
      long workerId, long usedBytesOnTier, String tierAlias, long blockId, long length)
      throws NoWorkerException {
    LOG.debug(
        "Commit block from workerId: {}, usedBytesOnTier: {}, blockId: {}, length: {}",
        workerId,
        usedBytesOnTier,
        blockId,
        length);

    long counter = AsyncJournalWriter.INVALID_FLUSH_COUNTER;

    MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId);
    // TODO(peis): Check lost workers as well.
    if (worker == null) {
      throw new NoWorkerException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId));
    }

    // Lock the worker metadata first.
    synchronized (worker) {
      // Loop until block metadata is successfully locked.
      for (; ; ) {
        boolean newBlock = false;
        MasterBlockInfo block = mBlocks.get(blockId);
        if (block == null) {
          // The block metadata doesn't exist yet.
          block = new MasterBlockInfo(blockId, length);
          newBlock = true;
        }

        // Lock the block metadata.
        synchronized (block) {
          boolean writeJournal = false;
          if (newBlock) {
            if (mBlocks.putIfAbsent(blockId, block) != null) {
              // Another thread already inserted the metadata for this block, so start loop over.
              continue;
            }
            // Successfully added the new block metadata. Append a journal entry for the new
            // metadata.
            writeJournal = true;
          } else if (block.getLength() != length && block.getLength() == Constants.UNKNOWN_SIZE) {
            // The block size was previously unknown. Update the block size with the committed
            // size, and append a journal entry.
            block.updateLength(length);
            writeJournal = true;
          }
          if (writeJournal) {
            BlockInfoEntry blockInfo =
                BlockInfoEntry.newBuilder().setBlockId(blockId).setLength(length).build();
            counter = appendJournalEntry(JournalEntry.newBuilder().setBlockInfo(blockInfo).build());
          }
          // At this point, both the worker and the block metadata are locked.

          // Update the block metadata with the new worker location.
          block.addWorker(workerId, tierAlias);
          // This worker has this block, so it is no longer lost.
          mLostBlocks.remove(blockId);

          // Update the worker information for this new block.
          // TODO(binfan): when retry commitBlock on master is expected, make sure metrics are not
          // double counted.
          worker.addBlock(blockId);
          worker.updateUsedBytes(tierAlias, usedBytesOnTier);
          worker.updateLastUpdatedTimeMs();
        }
        break;
      }
    }

    waitForJournalFlush(counter);
  }