/** * Creates a {@link BlockInfo} form a given {@link MasterBlockInfo}, by populating worker * locations. * * <p>NOTE: {@link #mWorkers} should already be locked before calling this method. * * @param masterBlockInfo the {@link MasterBlockInfo} * @return a {@link BlockInfo} from a {@link MasterBlockInfo}. Populates worker locations */ private BlockInfo generateBlockInfo(MasterBlockInfo masterBlockInfo) { // "Join" to get all the addresses of the workers. List<BlockLocation> locations = new ArrayList<BlockLocation>(); List<MasterBlockLocation> blockLocations = masterBlockInfo.getBlockLocations(); // Sort the block locations by their alias ordinal in the master storage tier mapping Collections.sort( blockLocations, new Comparator<MasterBlockLocation>() { @Override public int compare(MasterBlockLocation o1, MasterBlockLocation o2) { return mGlobalStorageTierAssoc.getOrdinal(o1.getTierAlias()) - mGlobalStorageTierAssoc.getOrdinal(o2.getTierAlias()); } }); for (MasterBlockLocation masterBlockLocation : blockLocations) { MasterWorkerInfo workerInfo = mWorkers.getFirstByField(mIdIndex, masterBlockLocation.getWorkerId()); if (workerInfo != null) { locations.add( new BlockLocation() .setWorkerId(masterBlockLocation.getWorkerId()) .setWorkerAddress(workerInfo.getWorkerAddress()) .setTierAlias(masterBlockLocation.getTierAlias())); } } return new BlockInfo() .setBlockId(masterBlockInfo.getBlockId()) .setLength(masterBlockInfo.getLength()) .setLocations(locations); }
/** * Updates metadata when a worker periodically heartbeats with the master. * * @param workerId the worker id * @param usedBytesOnTiers a mapping from tier alias to the used bytes * @param removedBlockIds a list of block ids removed from this worker * @param addedBlocksOnTiers a mapping from tier alias to the added blocks * @return an optional command for the worker to execute */ public Command workerHeartbeat( long workerId, Map<String, Long> usedBytesOnTiers, List<Long> removedBlockIds, Map<String, List<Long>> addedBlocksOnTiers) { synchronized (mBlocks) { synchronized (mWorkers) { if (!mWorkers.contains(mIdIndex, workerId)) { LOG.warn("Could not find worker id: {} for heartbeat.", workerId); return new Command(CommandType.Register, new ArrayList<Long>()); } MasterWorkerInfo workerInfo = mWorkers.getFirstByField(mIdIndex, workerId); processWorkerRemovedBlocks(workerInfo, removedBlockIds); processWorkerAddedBlocks(workerInfo, addedBlocksOnTiers); workerInfo.updateUsedBytes(usedBytesOnTiers); workerInfo.updateLastUpdatedTimeMs(); List<Long> toRemoveBlocks = workerInfo.getToRemoveBlocks(); if (toRemoveBlocks.isEmpty()) { return new Command(CommandType.Nothing, new ArrayList<Long>()); } return new Command(CommandType.Free, toRemoveBlocks); } } }
/** * Returns a worker id for the given worker. * * @param workerNetAddress the worker {@link WorkerNetAddress} * @return the worker id for this worker */ public long getWorkerId(WorkerNetAddress workerNetAddress) { // TODO(gene): This NetAddress cloned in case thrift re-uses the object. Does thrift re-use it? synchronized (mWorkers) { if (mWorkers.contains(mAddressIndex, workerNetAddress)) { // This worker address is already mapped to a worker id. long oldWorkerId = mWorkers.getFirstByField(mAddressIndex, workerNetAddress).getId(); LOG.warn("The worker {} already exists as id {}.", workerNetAddress, oldWorkerId); return oldWorkerId; } if (mLostWorkers.contains(mAddressIndex, workerNetAddress)) { // this is one of the lost workers final MasterWorkerInfo lostWorkerInfo = mLostWorkers.getFirstByField(mAddressIndex, workerNetAddress); final long lostWorkerId = lostWorkerInfo.getId(); LOG.warn("A lost worker {} has requested its old id {}.", workerNetAddress, lostWorkerId); // Update the timestamp of the worker before it is considered an active worker. lostWorkerInfo.updateLastUpdatedTimeMs(); mWorkers.add(lostWorkerInfo); mLostWorkers.remove(lostWorkerInfo); return lostWorkerId; } // Generate a new worker id. long workerId = mNextWorkerId.getAndIncrement(); mWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress)); LOG.info("getWorkerId(): WorkerNetAddress: {} id: {}", workerNetAddress, workerId); return workerId; } }
/** * Updates metadata when a worker registers with the master. * * @param workerId the worker id of the worker registering * @param storageTiers a list of storage tier aliases in order of their position in the worker's * hierarchy * @param totalBytesOnTiers a mapping from storage tier alias to total bytes * @param usedBytesOnTiers a mapping from storage tier alias to the used byes * @param currentBlocksOnTiers a mapping from storage tier alias to a list of blocks * @throws NoWorkerException if workerId cannot be found */ public void workerRegister( long workerId, List<String> storageTiers, Map<String, Long> totalBytesOnTiers, Map<String, Long> usedBytesOnTiers, Map<String, List<Long>> currentBlocksOnTiers) throws NoWorkerException { synchronized (mBlocks) { synchronized (mWorkers) { if (!mWorkers.contains(mIdIndex, workerId)) { throw new NoWorkerException("Could not find worker id: " + workerId + " to register."); } MasterWorkerInfo workerInfo = mWorkers.getFirstByField(mIdIndex, workerId); workerInfo.updateLastUpdatedTimeMs(); // Gather all blocks on this worker. HashSet<Long> blocks = new HashSet<Long>(); for (List<Long> blockIds : currentBlocksOnTiers.values()) { blocks.addAll(blockIds); } // Detect any lost blocks on this worker. Set<Long> removedBlocks = workerInfo.register( mGlobalStorageTierAssoc, storageTiers, totalBytesOnTiers, usedBytesOnTiers, blocks); processWorkerRemovedBlocks(workerInfo, removedBlocks); processWorkerAddedBlocks(workerInfo, currentBlocksOnTiers); LOG.info("registerWorker(): {}", workerInfo); } } }
/** * Marks a block as committed on a specific worker. * * @param workerId the worker id committing the block * @param usedBytesOnTier the updated used bytes on the tier of the worker * @param tierAlias the alias of the storage tier where the worker is committing the block to * @param blockId the committing block id * @param length the length of the block */ public void commitBlock( long workerId, long usedBytesOnTier, String tierAlias, long blockId, long length) { LOG.debug( "Commit block from worker: {}", FormatUtils.parametersToString(workerId, usedBytesOnTier, blockId, length)); synchronized (mBlocks) { synchronized (mWorkers) { MasterWorkerInfo workerInfo = mWorkers.getFirstByField(mIdIndex, workerId); workerInfo.addBlock(blockId); workerInfo.updateUsedBytes(tierAlias, usedBytesOnTier); workerInfo.updateLastUpdatedTimeMs(); MasterBlockInfo masterBlockInfo = mBlocks.get(blockId); if (masterBlockInfo == null) { masterBlockInfo = new MasterBlockInfo(blockId, length); mBlocks.put(blockId, masterBlockInfo); BlockInfoEntry blockInfo = BlockInfoEntry.newBuilder() .setBlockId(masterBlockInfo.getBlockId()) .setLength(masterBlockInfo.getLength()) .build(); writeJournalEntry(JournalEntry.newBuilder().setBlockInfo(blockInfo).build()); flushJournal(); } masterBlockInfo.addWorker(workerId, tierAlias); mLostBlocks.remove(blockId); } } }
/** * Gets info about the lost workers. * * @return a set of worker info */ public Set<WorkerInfo> getLostWorkersInfo() { synchronized (mWorkers) { Set<WorkerInfo> ret = new HashSet<WorkerInfo>(mLostWorkers.size()); for (MasterWorkerInfo worker : mLostWorkers) { ret.add(worker.generateClientWorkerInfo()); } return ret; } }
/** @return the total used bytes on all tiers, on all workers of Tachyon */ public long getUsedBytes() { long ret = 0; synchronized (mWorkers) { for (MasterWorkerInfo worker : mWorkers) { ret += worker.getUsedBytes(); } } return ret; }
/** @return a list of {@link WorkerInfo} objects representing the workers in Tachyon */ public List<WorkerInfo> getWorkerInfoList() { synchronized (mWorkers) { List<WorkerInfo> workerInfoList = new ArrayList<WorkerInfo>(mWorkers.size()); for (MasterWorkerInfo masterWorkerInfo : mWorkers) { workerInfoList.add(masterWorkerInfo.generateClientWorkerInfo()); } return workerInfoList; } }
/** @return the used bytes on each storage tier */ public Map<String, Long> getUsedBytesOnTiers() { Map<String, Long> ret = new HashMap<String, Long>(); synchronized (mWorkers) { for (MasterWorkerInfo worker : mWorkers) { for (Map.Entry<String, Long> entry : worker.getUsedBytesOnTiers().entrySet()) { Long used = ret.get(entry.getKey()); ret.put(entry.getKey(), (used == null ? 0L : used) + entry.getValue()); } } } return ret; }
/** * Updates the worker and block metadata for blocks added to a worker. * * <p>NOTE: {@link #mBlocks} should already be locked before calling this method. * * @param workerInfo The worker metadata object * @param addedBlockIds A mapping from storage tier alias to a list of block ids added */ private void processWorkerAddedBlocks( MasterWorkerInfo workerInfo, Map<String, List<Long>> addedBlockIds) { for (Map.Entry<String, List<Long>> entry : addedBlockIds.entrySet()) { for (long blockId : entry.getValue()) { MasterBlockInfo masterBlockInfo = mBlocks.get(blockId); if (masterBlockInfo != null) { workerInfo.addBlock(blockId); masterBlockInfo.addWorker(workerInfo.getId(), entry.getKey()); mLostBlocks.remove(blockId); } else { LOG.warn("Failed to register workerId: {} to blockId: {}", workerInfo.getId(), blockId); } } } }
/** * Updates the worker and block metadata for blocks removed from a worker. * * <p>NOTE: {@link #mBlocks} should already be locked before calling this method. * * @param workerInfo The worker metadata object * @param removedBlockIds A list of block ids removed from the worker */ private void processWorkerRemovedBlocks( MasterWorkerInfo workerInfo, Collection<Long> removedBlockIds) { for (long removedBlockId : removedBlockIds) { MasterBlockInfo masterBlockInfo = mBlocks.get(removedBlockId); if (masterBlockInfo == null) { LOG.warn( "Worker {} removed block {} but block does not exist.", workerInfo.getId(), removedBlockId); // Continue to remove the remaining blocks. continue; } LOG.info("Block {} is removed on worker {}.", removedBlockId, workerInfo.getId()); workerInfo.removeBlock(masterBlockInfo.getBlockId()); masterBlockInfo.removeWorker(workerInfo.getId()); if (masterBlockInfo.getNumLocations() == 0) { mLostBlocks.add(removedBlockId); } } }
/** * Removes blocks from workers. * * @param blockIds a list of block ids to remove from Tachyon space */ public void removeBlocks(List<Long> blockIds) { synchronized (mBlocks) { synchronized (mWorkers) { for (long blockId : blockIds) { MasterBlockInfo masterBlockInfo = mBlocks.get(blockId); if (masterBlockInfo == null) { continue; } for (long workerId : new ArrayList<Long>(masterBlockInfo.getWorkers())) { masterBlockInfo.removeWorker(workerId); MasterWorkerInfo worker = mWorkers.getFirstByField(mIdIndex, workerId); if (worker != null) { worker.updateToRemovedBlock(true, blockId); } } mLostBlocks.remove(blockId); } } } }
@Override public void heartbeat() { LOG.debug("System status checking."); TachyonConf conf = MasterContext.getConf(); int masterWorkerTimeoutMs = conf.getInt(Constants.MASTER_WORKER_TIMEOUT_MS); synchronized (mBlocks) { synchronized (mWorkers) { Iterator<MasterWorkerInfo> iter = mWorkers.iterator(); while (iter.hasNext()) { MasterWorkerInfo worker = iter.next(); final long lastUpdate = CommonUtils.getCurrentMs() - worker.getLastUpdatedTimeMs(); if (lastUpdate > masterWorkerTimeoutMs) { LOG.error("The worker {} got timed out!", worker); mLostWorkers.add(worker); iter.remove(); processLostWorker(worker); } } } } }
// Synchronized on mBlocks by the caller private void processLostWorker(MasterWorkerInfo latest) { final Set<Long> lostBlocks = latest.getBlocks(); processWorkerRemovedBlocks(latest, lostBlocks); }
@Override public Object getFieldValue(MasterWorkerInfo o) { return o.getWorkerAddress(); }