Beispiel #1
0
  /**
   * One worker can have multiple running task runners. <code>HostVolumeMapping</code> describes
   * various information for one worker, including :
   *
   * <ul>
   *   <li>host name
   *   <li>rack name
   *   <li>unassigned tasks for each disk volume
   *   <li>last assigned volume id - it can be used for assigning task in a round-robin manner
   *   <li>the number of running tasks for each volume
   * </ul>
   *
   * , each task runner and the concurrency number of running tasks for volumes.
   *
   * <p>Here, we identifier a task runner by {@link ContainerId}, and we use volume ids to identify
   * all disks in this node. Actually, each volume is only used to distinguish disks, and we don't
   * know a certain volume id indicates a certain disk. If you want to know volume id, please read
   * the below section.
   *
   * <h3>Volume id</h3>
   *
   * Volume id is an integer. Each volume id identifies each disk volume.
   *
   * <p>This volume id can be obtained from
   * org.apache.hadoop.fs.BlockStorageLocation#getVolumeIds()}. * HDFS cannot give any volume id due
   * to unknown reason and disabled config 'dfs.client.file-block-locations.enabled'. In this case,
   * the volume id will be -1 or other native integer.
   *
   * <h3>See Also</h3>
   *
   * <ul>
   *   <li>HDFS-3672 (https://issues.apache.org/jira/browse/HDFS-3672).
   * </ul>
   */
  public class HostVolumeMapping {
    private final String host;
    private final String rack;
    /** A key is disk volume, and a value is a list of tasks to be scheduled. */
    private Map<Integer, LinkedHashSet<TaskAttempt>> unassignedTaskForEachVolume =
        Collections.synchronizedMap(new HashMap<>());
    /** A value is last assigned volume id for each task runner */
    private HashMap<TaskAttemptId, Integer> lastAssignedVolumeId = Maps.newHashMap();
    /**
     * A key is disk volume id, and a value is the load of this volume. This load is measured by
     * counting how many number of tasks are running.
     *
     * <p>These disk volumes are kept in an order of ascending order of the volume id. In other
     * words, the head volume ids are likely to -1, meaning no given volume id.
     */
    private SortedMap<Integer, Integer> diskVolumeLoads = new TreeMap<>();
    /** The total number of remain tasks in this host */
    private AtomicInteger remainTasksNum = new AtomicInteger(0);

    public static final int REMOTE = -2;

    public HostVolumeMapping(String host, String rack) {
      this.host = host;
      this.rack = rack;
    }

    public synchronized void addTaskAttempt(int volumeId, TaskAttempt attemptId) {
      synchronized (unassignedTaskForEachVolume) {
        LinkedHashSet<TaskAttempt> list = unassignedTaskForEachVolume.get(volumeId);
        if (list == null) {
          list = new LinkedHashSet<>();
          unassignedTaskForEachVolume.put(volumeId, list);
        }
        list.add(attemptId);
      }

      remainTasksNum.incrementAndGet();

      if (!diskVolumeLoads.containsKey(volumeId)) diskVolumeLoads.put(volumeId, 0);
    }

    /**
     * Priorities 1. a task list in a volume of host 2. unknown block or Non-splittable task in host
     * 3. remote tasks. unassignedTaskForEachVolume is only contained local task. so it will be null
     */
    public synchronized TaskAttemptId getLocalTask() {
      int volumeId = getLowestVolumeId();
      TaskAttemptId taskAttemptId = null;

      if (unassignedTaskForEachVolume.size() > 0) {
        int retry = unassignedTaskForEachVolume.size();
        do {
          // clean and get a remaining local task
          taskAttemptId = getAndRemove(volumeId);

          if (taskAttemptId == null) {
            // reassign next volume
            volumeId = getLowestVolumeId();
            retry--;
          } else {
            lastAssignedVolumeId.put(taskAttemptId, volumeId);
            break;
          }
        } while (retry > 0);
      } else {
        this.remainTasksNum.set(0);
      }

      return taskAttemptId;
    }

    public synchronized TaskAttemptId getTaskAttemptIdByRack(String rack) {
      TaskAttemptId taskAttemptId = null;

      if (unassignedTaskForEachVolume.size() > 0 && this.rack.equals(rack)) {
        int retry = unassignedTaskForEachVolume.size();
        do {
          // clean and get a remaining task
          int volumeId = getLowestVolumeId();
          taskAttemptId = getAndRemove(volumeId);
          if (taskAttemptId == null) {
            retry--;
          } else {
            break;
          }
        } while (retry > 0);
      }
      return taskAttemptId;
    }

    private synchronized TaskAttemptId getAndRemove(int volumeId) {
      TaskAttemptId taskAttemptId = null;
      if (!unassignedTaskForEachVolume.containsKey(volumeId)) {
        if (volumeId > REMOTE) {
          diskVolumeLoads.remove(volumeId);
        }
        return taskAttemptId;
      }

      LinkedHashSet<TaskAttempt> list = unassignedTaskForEachVolume.get(volumeId);
      if (list != null && !list.isEmpty()) {
        TaskAttempt taskAttempt;
        synchronized (unassignedTaskForEachVolume) {
          Iterator<TaskAttempt> iterator = list.iterator();
          taskAttempt = iterator.next();
          iterator.remove();
        }

        taskAttemptId = taskAttempt.getId();
        for (DataLocation location : taskAttempt.getTask().getDataLocations()) {
          HostVolumeMapping volumeMapping =
              scheduledRequests.leafTaskHostMapping.get(location.getHost());
          if (volumeMapping != null) {
            volumeMapping.removeTaskAttempt(location.getVolumeId(), taskAttempt);
          }
        }

        increaseConcurrency(volumeId);
      }

      return taskAttemptId;
    }

    private synchronized void removeTaskAttempt(int volumeId, TaskAttempt taskAttempt) {
      if (!unassignedTaskForEachVolume.containsKey(volumeId)) return;

      LinkedHashSet<TaskAttempt> tasks = unassignedTaskForEachVolume.get(volumeId);
      if (tasks.remove(taskAttempt)) {
        remainTasksNum.getAndDecrement();
      }

      if (tasks.isEmpty()) {
        unassignedTaskForEachVolume.remove(volumeId);
        if (volumeId > REMOTE) {
          diskVolumeLoads.remove(volumeId);
        }
      }
    }

    /**
     * Increase the count of running tasks and disk loads for a certain task runner.
     *
     * @param volumeId Volume identifier
     * @return the volume load (i.e., how many running tasks use this volume)
     */
    private synchronized int increaseConcurrency(int volumeId) {

      int concurrency = 1;
      if (diskVolumeLoads.containsKey(volumeId)) {
        concurrency = diskVolumeLoads.get(volumeId) + 1;
      }

      if (volumeId > -1) {
        LOG.info(
            "Assigned host : "
                + host
                + ", Volume : "
                + volumeId
                + ", Concurrency : "
                + concurrency);
      } else if (volumeId == -1) {
        // this case is disabled namenode block meta or compressed text file or amazon s3
        LOG.info(
            "Assigned host : "
                + host
                + ", Unknown Volume : "
                + volumeId
                + ", Concurrency : "
                + concurrency);
      } else if (volumeId == REMOTE) {
        // this case has processed all block on host and it will be assigned to remote
        LOG.info(
            "Assigned host : "
                + host
                + ", Remaining local tasks : "
                + getRemainingLocalTaskSize()
                + ", Remote Concurrency : "
                + concurrency);
      }
      diskVolumeLoads.put(volumeId, concurrency);
      return concurrency;
    }

    /** Decrease the count of running tasks of a certain task runner */
    private synchronized void decreaseConcurrency(int volumeId) {
      if (diskVolumeLoads.containsKey(volumeId)) {
        Integer concurrency = diskVolumeLoads.get(volumeId);
        if (concurrency > 0) {
          diskVolumeLoads.put(volumeId, concurrency - 1);
        } else {
          if (volumeId > REMOTE && !unassignedTaskForEachVolume.containsKey(volumeId)) {
            diskVolumeLoads.remove(volumeId);
          }
        }
      }
    }

    /** volume of a host : 0 ~ n compressed task, amazon s3, unKnown volume : -1 remote task : -2 */
    public int getLowestVolumeId() {
      Map.Entry<Integer, Integer> volumeEntry = null;

      for (Map.Entry<Integer, Integer> entry : diskVolumeLoads.entrySet()) {
        if (volumeEntry == null) volumeEntry = entry;

        if (volumeEntry.getValue() >= entry.getValue()) {
          volumeEntry = entry;
        }
      }

      if (volumeEntry != null) {
        return volumeEntry.getKey();
      } else {
        return REMOTE;
      }
    }

    public int getRemoteConcurrency() {
      return getVolumeConcurrency(REMOTE);
    }

    public int getVolumeConcurrency(int volumeId) {
      Integer size = diskVolumeLoads.get(volumeId);
      if (size == null) return 0;
      else return size;
    }

    public int getRemainingLocalTaskSize() {
      return remainTasksNum.get();
    }

    public String getHost() {
      return host;
    }

    public String getRack() {
      return rack;
    }
  }