/** * One worker can have multiple running task runners. <code>HostVolumeMapping</code> describes * various information for one worker, including : * * <ul> * <li>host name * <li>rack name * <li>unassigned tasks for each disk volume * <li>last assigned volume id - it can be used for assigning task in a round-robin manner * <li>the number of running tasks for each volume * </ul> * * , each task runner and the concurrency number of running tasks for volumes. * * <p>Here, we identifier a task runner by {@link ContainerId}, and we use volume ids to identify * all disks in this node. Actually, each volume is only used to distinguish disks, and we don't * know a certain volume id indicates a certain disk. If you want to know volume id, please read * the below section. * * <h3>Volume id</h3> * * Volume id is an integer. Each volume id identifies each disk volume. * * <p>This volume id can be obtained from * org.apache.hadoop.fs.BlockStorageLocation#getVolumeIds()}. * HDFS cannot give any volume id due * to unknown reason and disabled config 'dfs.client.file-block-locations.enabled'. In this case, * the volume id will be -1 or other native integer. * * <h3>See Also</h3> * * <ul> * <li>HDFS-3672 (https://issues.apache.org/jira/browse/HDFS-3672). * </ul> */ public class HostVolumeMapping { private final String host; private final String rack; /** A key is disk volume, and a value is a list of tasks to be scheduled. */ private Map<Integer, LinkedHashSet<TaskAttempt>> unassignedTaskForEachVolume = Collections.synchronizedMap(new HashMap<>()); /** A value is last assigned volume id for each task runner */ private HashMap<TaskAttemptId, Integer> lastAssignedVolumeId = Maps.newHashMap(); /** * A key is disk volume id, and a value is the load of this volume. This load is measured by * counting how many number of tasks are running. * * <p>These disk volumes are kept in an order of ascending order of the volume id. In other * words, the head volume ids are likely to -1, meaning no given volume id. */ private SortedMap<Integer, Integer> diskVolumeLoads = new TreeMap<>(); /** The total number of remain tasks in this host */ private AtomicInteger remainTasksNum = new AtomicInteger(0); public static final int REMOTE = -2; public HostVolumeMapping(String host, String rack) { this.host = host; this.rack = rack; } public synchronized void addTaskAttempt(int volumeId, TaskAttempt attemptId) { synchronized (unassignedTaskForEachVolume) { LinkedHashSet<TaskAttempt> list = unassignedTaskForEachVolume.get(volumeId); if (list == null) { list = new LinkedHashSet<>(); unassignedTaskForEachVolume.put(volumeId, list); } list.add(attemptId); } remainTasksNum.incrementAndGet(); if (!diskVolumeLoads.containsKey(volumeId)) diskVolumeLoads.put(volumeId, 0); } /** * Priorities 1. a task list in a volume of host 2. unknown block or Non-splittable task in host * 3. remote tasks. unassignedTaskForEachVolume is only contained local task. so it will be null */ public synchronized TaskAttemptId getLocalTask() { int volumeId = getLowestVolumeId(); TaskAttemptId taskAttemptId = null; if (unassignedTaskForEachVolume.size() > 0) { int retry = unassignedTaskForEachVolume.size(); do { // clean and get a remaining local task taskAttemptId = getAndRemove(volumeId); if (taskAttemptId == null) { // reassign next volume volumeId = getLowestVolumeId(); retry--; } else { lastAssignedVolumeId.put(taskAttemptId, volumeId); break; } } while (retry > 0); } else { this.remainTasksNum.set(0); } return taskAttemptId; } public synchronized TaskAttemptId getTaskAttemptIdByRack(String rack) { TaskAttemptId taskAttemptId = null; if (unassignedTaskForEachVolume.size() > 0 && this.rack.equals(rack)) { int retry = unassignedTaskForEachVolume.size(); do { // clean and get a remaining task int volumeId = getLowestVolumeId(); taskAttemptId = getAndRemove(volumeId); if (taskAttemptId == null) { retry--; } else { break; } } while (retry > 0); } return taskAttemptId; } private synchronized TaskAttemptId getAndRemove(int volumeId) { TaskAttemptId taskAttemptId = null; if (!unassignedTaskForEachVolume.containsKey(volumeId)) { if (volumeId > REMOTE) { diskVolumeLoads.remove(volumeId); } return taskAttemptId; } LinkedHashSet<TaskAttempt> list = unassignedTaskForEachVolume.get(volumeId); if (list != null && !list.isEmpty()) { TaskAttempt taskAttempt; synchronized (unassignedTaskForEachVolume) { Iterator<TaskAttempt> iterator = list.iterator(); taskAttempt = iterator.next(); iterator.remove(); } taskAttemptId = taskAttempt.getId(); for (DataLocation location : taskAttempt.getTask().getDataLocations()) { HostVolumeMapping volumeMapping = scheduledRequests.leafTaskHostMapping.get(location.getHost()); if (volumeMapping != null) { volumeMapping.removeTaskAttempt(location.getVolumeId(), taskAttempt); } } increaseConcurrency(volumeId); } return taskAttemptId; } private synchronized void removeTaskAttempt(int volumeId, TaskAttempt taskAttempt) { if (!unassignedTaskForEachVolume.containsKey(volumeId)) return; LinkedHashSet<TaskAttempt> tasks = unassignedTaskForEachVolume.get(volumeId); if (tasks.remove(taskAttempt)) { remainTasksNum.getAndDecrement(); } if (tasks.isEmpty()) { unassignedTaskForEachVolume.remove(volumeId); if (volumeId > REMOTE) { diskVolumeLoads.remove(volumeId); } } } /** * Increase the count of running tasks and disk loads for a certain task runner. * * @param volumeId Volume identifier * @return the volume load (i.e., how many running tasks use this volume) */ private synchronized int increaseConcurrency(int volumeId) { int concurrency = 1; if (diskVolumeLoads.containsKey(volumeId)) { concurrency = diskVolumeLoads.get(volumeId) + 1; } if (volumeId > -1) { LOG.info( "Assigned host : " + host + ", Volume : " + volumeId + ", Concurrency : " + concurrency); } else if (volumeId == -1) { // this case is disabled namenode block meta or compressed text file or amazon s3 LOG.info( "Assigned host : " + host + ", Unknown Volume : " + volumeId + ", Concurrency : " + concurrency); } else if (volumeId == REMOTE) { // this case has processed all block on host and it will be assigned to remote LOG.info( "Assigned host : " + host + ", Remaining local tasks : " + getRemainingLocalTaskSize() + ", Remote Concurrency : " + concurrency); } diskVolumeLoads.put(volumeId, concurrency); return concurrency; } /** Decrease the count of running tasks of a certain task runner */ private synchronized void decreaseConcurrency(int volumeId) { if (diskVolumeLoads.containsKey(volumeId)) { Integer concurrency = diskVolumeLoads.get(volumeId); if (concurrency > 0) { diskVolumeLoads.put(volumeId, concurrency - 1); } else { if (volumeId > REMOTE && !unassignedTaskForEachVolume.containsKey(volumeId)) { diskVolumeLoads.remove(volumeId); } } } } /** volume of a host : 0 ~ n compressed task, amazon s3, unKnown volume : -1 remote task : -2 */ public int getLowestVolumeId() { Map.Entry<Integer, Integer> volumeEntry = null; for (Map.Entry<Integer, Integer> entry : diskVolumeLoads.entrySet()) { if (volumeEntry == null) volumeEntry = entry; if (volumeEntry.getValue() >= entry.getValue()) { volumeEntry = entry; } } if (volumeEntry != null) { return volumeEntry.getKey(); } else { return REMOTE; } } public int getRemoteConcurrency() { return getVolumeConcurrency(REMOTE); } public int getVolumeConcurrency(int volumeId) { Integer size = diskVolumeLoads.get(volumeId); if (size == null) return 0; else return size; } public int getRemainingLocalTaskSize() { return remainTasksNum.get(); } public String getHost() { return host; } public String getRack() { return rack; } }