/** * Creates a {@link BlockInfo} form a given {@link MasterBlockInfo}, by populating worker * locations. * * @param masterBlockInfo the {@link MasterBlockInfo} * @return a {@link BlockInfo} from a {@link MasterBlockInfo}. Populates worker locations */ @GuardedBy("masterBlockInfo") private BlockInfo generateBlockInfo(MasterBlockInfo masterBlockInfo) { // "Join" to get all the addresses of the workers. List<BlockLocation> locations = new ArrayList<>(); List<MasterBlockLocation> blockLocations = masterBlockInfo.getBlockLocations(); // Sort the block locations by their alias ordinal in the master storage tier mapping Collections.sort( blockLocations, new Comparator<MasterBlockLocation>() { @Override public int compare(MasterBlockLocation o1, MasterBlockLocation o2) { return mGlobalStorageTierAssoc.getOrdinal(o1.getTierAlias()) - mGlobalStorageTierAssoc.getOrdinal(o2.getTierAlias()); } }); for (MasterBlockLocation masterBlockLocation : blockLocations) { MasterWorkerInfo workerInfo = mWorkers.getFirstByField(ID_INDEX, masterBlockLocation.getWorkerId()); if (workerInfo != null) { // worker metadata is intentionally not locked here because: // - it would be an incorrect order (correct order is lock worker first, then block) // - only uses getters of final variables locations.add( new BlockLocation() .setWorkerId(masterBlockLocation.getWorkerId()) .setWorkerAddress(workerInfo.getWorkerAddress()) .setTierAlias(masterBlockLocation.getTierAlias())); } } return new BlockInfo() .setBlockId(masterBlockInfo.getBlockId()) .setLength(masterBlockInfo.getLength()) .setLocations(locations); }
/** * Updates the worker and block metadata for blocks removed from a worker. * * @param workerInfo The worker metadata object * @param removedBlockIds A list of block ids removed from the worker */ @GuardedBy("workerInfo") private void processWorkerRemovedBlocks( MasterWorkerInfo workerInfo, Collection<Long> removedBlockIds) { for (long removedBlockId : removedBlockIds) { MasterBlockInfo block = mBlocks.get(removedBlockId); // TODO(calvin): Investigate if this branching logic can be simplified. if (block == null) { // LOG.warn("Worker {} informs the removed block {}, but block metadata does not exist" // + " on Master!", workerInfo.getId(), removedBlockId); // TODO(pfxuan): [ALLUXIO-1804] should find a better way to handle the removed blocks. // Ideally, the delete/free I/O flow should never reach this point. Because Master may // update the block metadata only after receiving the acknowledgement from Workers. workerInfo.removeBlock(removedBlockId); // Continue to remove the remaining blocks. continue; } synchronized (block) { LOG.info("Block {} is removed on worker {}.", removedBlockId, workerInfo.getId()); workerInfo.removeBlock(block.getBlockId()); block.removeWorker(workerInfo.getId()); if (block.getNumLocations() == 0) { mLostBlocks.add(removedBlockId); } } } }
/** * Updates metadata when a worker registers with the master. * * @param workerId the worker id of the worker registering * @param storageTiers a list of storage tier aliases in order of their position in the worker's * hierarchy * @param totalBytesOnTiers a mapping from storage tier alias to total bytes * @param usedBytesOnTiers a mapping from storage tier alias to the used byes * @param currentBlocksOnTiers a mapping from storage tier alias to a list of blocks * @throws NoWorkerException if workerId cannot be found */ public void workerRegister( long workerId, List<String> storageTiers, Map<String, Long> totalBytesOnTiers, Map<String, Long> usedBytesOnTiers, Map<String, List<Long>> currentBlocksOnTiers) throws NoWorkerException { MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker == null) { throw new NoWorkerException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId)); } // Gather all blocks on this worker. HashSet<Long> blocks = new HashSet<>(); for (List<Long> blockIds : currentBlocksOnTiers.values()) { blocks.addAll(blockIds); } synchronized (worker) { worker.updateLastUpdatedTimeMs(); // Detect any lost blocks on this worker. Set<Long> removedBlocks = worker.register( mGlobalStorageTierAssoc, storageTiers, totalBytesOnTiers, usedBytesOnTiers, blocks); processWorkerRemovedBlocks(worker, removedBlocks); processWorkerAddedBlocks(worker, currentBlocksOnTiers); } LOG.info("registerWorker(): {}", worker); }
/** * Updates metadata when a worker periodically heartbeats with the master. * * @param workerId the worker id * @param usedBytesOnTiers a mapping from tier alias to the used bytes * @param removedBlockIds a list of block ids removed from this worker * @param addedBlocksOnTiers a mapping from tier alias to the added blocks * @return an optional command for the worker to execute */ public Command workerHeartbeat( long workerId, Map<String, Long> usedBytesOnTiers, List<Long> removedBlockIds, Map<String, List<Long>> addedBlocksOnTiers) { MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker == null) { LOG.warn("Could not find worker id: {} for heartbeat.", workerId); return new Command(CommandType.Register, new ArrayList<Long>()); } synchronized (worker) { // Technically, 'worker' should be confirmed to still be in the data structure. Lost worker // detection can remove it. However, we are intentionally ignoring this race, since the worker // will just re-register regardless. processWorkerRemovedBlocks(worker, removedBlockIds); processWorkerAddedBlocks(worker, addedBlocksOnTiers); worker.updateUsedBytes(usedBytesOnTiers); worker.updateLastUpdatedTimeMs(); List<Long> toRemoveBlocks = worker.getToRemoveBlocks(); if (toRemoveBlocks.isEmpty()) { return new Command(CommandType.Nothing, new ArrayList<Long>()); } return new Command(CommandType.Free, toRemoveBlocks); } }
/** * Returns a worker id for the given worker. * * @param workerNetAddress the worker {@link WorkerNetAddress} * @return the worker id for this worker */ public long getWorkerId(WorkerNetAddress workerNetAddress) { // TODO(gpang): This NetAddress cloned in case thrift re-uses the object. Does thrift re-use it? MasterWorkerInfo existingWorker = mWorkers.getFirstByField(ADDRESS_INDEX, workerNetAddress); if (existingWorker != null) { // This worker address is already mapped to a worker id. long oldWorkerId = existingWorker.getId(); LOG.warn("The worker {} already exists as id {}.", workerNetAddress, oldWorkerId); return oldWorkerId; } MasterWorkerInfo lostWorker = mLostWorkers.getFirstByField(ADDRESS_INDEX, workerNetAddress); if (lostWorker != null) { // this is one of the lost workers synchronized (lostWorker) { final long lostWorkerId = lostWorker.getId(); LOG.warn("A lost worker {} has requested its old id {}.", workerNetAddress, lostWorkerId); // Update the timestamp of the worker before it is considered an active worker. lostWorker.updateLastUpdatedTimeMs(); mWorkers.add(lostWorker); mLostWorkers.remove(lostWorker); return lostWorkerId; } } // Generate a new worker id. long workerId = mNextWorkerId.getAndIncrement(); mWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress)); LOG.info("getWorkerId(): WorkerNetAddress: {} id: {}", workerNetAddress, workerId); return workerId; }
/** * Gets info about the lost workers. * * @return a set of worker info */ public Set<WorkerInfo> getLostWorkersInfo() { Set<WorkerInfo> ret = new HashSet<>(mLostWorkers.size()); for (MasterWorkerInfo worker : mLostWorkers) { synchronized (worker) { ret.add(worker.generateClientWorkerInfo()); } } return ret; }
/** @return the total used bytes on all tiers, on all workers of Alluxio */ public long getUsedBytes() { long ret = 0; for (MasterWorkerInfo worker : mWorkers) { synchronized (worker) { ret += worker.getUsedBytes(); } } return ret; }
/** @return a list of {@link WorkerInfo} objects representing the workers in Alluxio */ public List<WorkerInfo> getWorkerInfoList() { List<WorkerInfo> workerInfoList = new ArrayList<>(mWorkers.size()); for (MasterWorkerInfo worker : mWorkers) { synchronized (worker) { workerInfoList.add(worker.generateClientWorkerInfo()); } } return workerInfoList; }
/** @return the used bytes on each storage tier */ public Map<String, Long> getUsedBytesOnTiers() { Map<String, Long> ret = new HashMap<>(); for (MasterWorkerInfo worker : mWorkers) { synchronized (worker) { for (Map.Entry<String, Long> entry : worker.getUsedBytesOnTiers().entrySet()) { Long used = ret.get(entry.getKey()); ret.put(entry.getKey(), (used == null ? 0L : used) + entry.getValue()); } } } return ret; }
@Override public void heartbeat() { int masterWorkerTimeoutMs = Configuration.getInt(Constants.MASTER_WORKER_TIMEOUT_MS); for (MasterWorkerInfo worker : mWorkers) { synchronized (worker) { final long lastUpdate = CommonUtils.getCurrentMs() - worker.getLastUpdatedTimeMs(); if (lastUpdate > masterWorkerTimeoutMs) { LOG.error( "The worker {} timed out after {}ms without a heartbeat!", worker, lastUpdate); mLostWorkers.add(worker); mWorkers.remove(worker); processWorkerRemovedBlocks(worker, worker.getBlocks()); } } } }
/** * Updates the worker and block metadata for blocks added to a worker. * * @param workerInfo The worker metadata object * @param addedBlockIds A mapping from storage tier alias to a list of block ids added */ @GuardedBy("workerInfo") private void processWorkerAddedBlocks( MasterWorkerInfo workerInfo, Map<String, List<Long>> addedBlockIds) { for (Map.Entry<String, List<Long>> entry : addedBlockIds.entrySet()) { for (long blockId : entry.getValue()) { MasterBlockInfo block = mBlocks.get(blockId); if (block != null) { synchronized (block) { workerInfo.addBlock(blockId); block.addWorker(workerInfo.getId(), entry.getKey()); mLostBlocks.remove(blockId); } } else { LOG.warn("Failed to register workerId: {} to blockId: {}", workerInfo.getId(), blockId); } } } }
/** * Removes blocks from workers. * * @param blockIds a list of block ids to remove from Alluxio space * @param delete whether to delete blocks metadata in Master */ public void removeBlocks(List<Long> blockIds, boolean delete) { for (long blockId : blockIds) { MasterBlockInfo block = mBlocks.get(blockId); if (block == null) { continue; } HashSet<Long> workerIds = new HashSet<>(); synchronized (block) { // Technically, 'block' should be confirmed to still be in the data structure. A // concurrent removeBlock call can remove it. However, we are intentionally ignoring this // race, since deleting the same block again is a noop. workerIds.addAll(block.getWorkers()); // Two cases here: // 1) For delete: delete the block metadata. // 2) For free: keep the block metadata. mLostBlocks will be changed in // processWorkerRemovedBlocks if (delete) { // Make sure blockId is removed from mLostBlocks when the block metadata is deleted. // Otherwise blockId in mLostBlock can be dangling index if the metadata is gone. mLostBlocks.remove(blockId); mBlocks.remove(blockId); } } // Outside of locking the block. This does not have to be synchronized with the block // metadata, since it is essentially an asynchronous signal to the worker to remove the block. for (long workerId : workerIds) { MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker != null) { synchronized (worker) { worker.updateToRemovedBlock(true, blockId); } } } } }
@Override public Object getFieldValue(MasterWorkerInfo o) { return o.getWorkerAddress(); }
// TODO(binfan): check the logic is correct or not when commitBlock is a retry public void commitBlock( long workerId, long usedBytesOnTier, String tierAlias, long blockId, long length) throws NoWorkerException { LOG.debug( "Commit block from workerId: {}, usedBytesOnTier: {}, blockId: {}, length: {}", workerId, usedBytesOnTier, blockId, length); long counter = AsyncJournalWriter.INVALID_FLUSH_COUNTER; MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); // TODO(peis): Check lost workers as well. if (worker == null) { throw new NoWorkerException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId)); } // Lock the worker metadata first. synchronized (worker) { // Loop until block metadata is successfully locked. for (; ; ) { boolean newBlock = false; MasterBlockInfo block = mBlocks.get(blockId); if (block == null) { // The block metadata doesn't exist yet. block = new MasterBlockInfo(blockId, length); newBlock = true; } // Lock the block metadata. synchronized (block) { boolean writeJournal = false; if (newBlock) { if (mBlocks.putIfAbsent(blockId, block) != null) { // Another thread already inserted the metadata for this block, so start loop over. continue; } // Successfully added the new block metadata. Append a journal entry for the new // metadata. writeJournal = true; } else if (block.getLength() != length && block.getLength() == Constants.UNKNOWN_SIZE) { // The block size was previously unknown. Update the block size with the committed // size, and append a journal entry. block.updateLength(length); writeJournal = true; } if (writeJournal) { BlockInfoEntry blockInfo = BlockInfoEntry.newBuilder().setBlockId(blockId).setLength(length).build(); counter = appendJournalEntry(JournalEntry.newBuilder().setBlockInfo(blockInfo).build()); } // At this point, both the worker and the block metadata are locked. // Update the block metadata with the new worker location. block.addWorker(workerId, tierAlias); // This worker has this block, so it is no longer lost. mLostBlocks.remove(blockId); // Update the worker information for this new block. // TODO(binfan): when retry commitBlock on master is expected, make sure metrics are not // double counted. worker.addBlock(blockId); worker.updateUsedBytes(tierAlias, usedBytesOnTier); worker.updateLastUpdatedTimeMs(); } break; } } waitForJournalFlush(counter); }