Пример #1
0
  @Test
  public void testDefaultSingleQueueMetrics() {
    String queueName = "single";
    String user = "******";

    QueueMetrics metrics = QueueMetrics.forQueue(ms, queueName, null, false, conf);
    MetricsSource queueSource = queueSource(ms, queueName);
    AppSchedulingInfo app = mockApp(user);

    metrics.submitApp(user, 1);
    MetricsSource userSource = userSource(ms, queueName, user);
    checkApps(queueSource, 1, 1, 0, 0, 0, 0);

    metrics.setAvailableResourcesToQueue(Resources.createResource(100 * GB));
    metrics.incrPendingResources(user, 5, Resources.createResource(15 * GB));
    // Available resources is set externally, as it depends on dynamic
    // configurable cluster/queue resources
    checkResources(queueSource, 0, 0, 0, 0, 100 * GB, 15 * GB, 5, 0, 0);

    metrics.incrAppsRunning(app, user);
    checkApps(queueSource, 1, 0, 1, 0, 0, 0);

    metrics.allocateResources(user, 3, Resources.createResource(2 * GB));
    checkResources(queueSource, 6 * GB, 3, 3, 0, 100 * GB, 9 * GB, 2, 0, 0);

    metrics.releaseResources(user, 1, Resources.createResource(2 * GB));
    checkResources(queueSource, 4 * GB, 2, 3, 1, 100 * GB, 9 * GB, 2, 0, 0);

    metrics.finishApp(app, RMAppAttemptState.FINISHED);
    checkApps(queueSource, 1, 0, 0, 1, 0, 0);
    assertNull(userSource);
  }
Пример #2
0
  @Test
  public void testTwoLevelWithUserMetrics() {
    String parentQueueName = "root";
    String leafQueueName = "root.leaf";
    String user = "******";

    QueueMetrics parentMetrics = QueueMetrics.forQueue(ms, parentQueueName, null, true, conf);
    Queue parentQueue = make(stub(Queue.class).returning(parentMetrics).from.getMetrics());
    QueueMetrics metrics = QueueMetrics.forQueue(ms, leafQueueName, parentQueue, true, conf);
    MetricsSource parentQueueSource = queueSource(ms, parentQueueName);
    MetricsSource queueSource = queueSource(ms, leafQueueName);
    AppSchedulingInfo app = mockApp(user);

    metrics.submitApp(user, 1);
    MetricsSource userSource = userSource(ms, leafQueueName, user);
    MetricsSource parentUserSource = userSource(ms, parentQueueName, user);

    checkApps(queueSource, 1, 1, 0, 0, 0, 0);
    checkApps(parentQueueSource, 1, 1, 0, 0, 0, 0);
    checkApps(userSource, 1, 1, 0, 0, 0, 0);
    checkApps(parentUserSource, 1, 1, 0, 0, 0, 0);

    parentMetrics.setAvailableResourcesToQueue(Resources.createResource(100 * GB));
    metrics.setAvailableResourcesToQueue(Resources.createResource(100 * GB));
    parentMetrics.setAvailableResourcesToUser(user, Resources.createResource(10 * GB));
    metrics.setAvailableResourcesToUser(user, Resources.createResource(10 * GB));
    metrics.incrPendingResources(user, 5, Resources.createResource(15 * GB));
    checkResources(queueSource, 0, 0, 0, 0, 100 * GB, 15 * GB, 5, 0, 0);
    checkResources(parentQueueSource, 0, 0, 0, 0, 100 * GB, 15 * GB, 5, 0, 0);
    checkResources(userSource, 0, 0, 0, 0, 10 * GB, 15 * GB, 5, 0, 0);
    checkResources(parentUserSource, 0, 0, 0, 0, 10 * GB, 15 * GB, 5, 0, 0);

    metrics.incrAppsRunning(app, user);
    checkApps(queueSource, 1, 0, 1, 0, 0, 0);
    checkApps(userSource, 1, 0, 1, 0, 0, 0);

    metrics.allocateResources(user, 3, Resources.createResource(2 * GB));
    metrics.reserveResource(user, Resources.createResource(3 * GB));
    // Available resources is set externally, as it depends on dynamic
    // configurable cluster/queue resources
    checkResources(queueSource, 6 * GB, 3, 3, 0, 100 * GB, 9 * GB, 2, 3 * GB, 1);
    checkResources(parentQueueSource, 6 * GB, 3, 3, 0, 100 * GB, 9 * GB, 2, 3 * GB, 1);
    checkResources(userSource, 6 * GB, 3, 3, 0, 10 * GB, 9 * GB, 2, 3 * GB, 1);
    checkResources(parentUserSource, 6 * GB, 3, 3, 0, 10 * GB, 9 * GB, 2, 3 * GB, 1);

    metrics.releaseResources(user, 1, Resources.createResource(2 * GB));
    metrics.unreserveResource(user, Resources.createResource(3 * GB));
    checkResources(queueSource, 4 * GB, 2, 3, 1, 100 * GB, 9 * GB, 2, 0, 0);
    checkResources(parentQueueSource, 4 * GB, 2, 3, 1, 100 * GB, 9 * GB, 2, 0, 0);
    checkResources(userSource, 4 * GB, 2, 3, 1, 10 * GB, 9 * GB, 2, 0, 0);
    checkResources(parentUserSource, 4 * GB, 2, 3, 1, 10 * GB, 9 * GB, 2, 0, 0);

    metrics.finishApp(app, RMAppAttemptState.FINISHED);
    checkApps(queueSource, 1, 0, 0, 1, 0, 0);
    checkApps(parentQueueSource, 1, 0, 0, 1, 0, 0);
    checkApps(userSource, 1, 0, 0, 1, 0, 0);
    checkApps(parentUserSource, 1, 0, 0, 1, 0, 0);
  }
Пример #3
0
/**
 * A Schedulable represents an entity that can launch tasks, such as a job or a queue. It provides a
 * common interface so that algorithms such as fair sharing can be applied both within a queue and
 * across queues. There are currently two types of Schedulables: JobSchedulables, which represent a
 * single job, and QueueSchedulables, which allocate among jobs in their queue.
 *
 * <p>Separate sets of Schedulables are used for maps and reduces. Each queue has both a
 * mapSchedulable and a reduceSchedulable, and so does each job.
 *
 * <p>A Schedulable is responsible for three roles: 1) It can launch tasks through assignTask(). 2)
 * It provides information about the job/queue to the scheduler, including: - Demand (maximum number
 * of tasks required) - Number of currently running tasks - Minimum share (for queues) - Job/queue
 * weight (for fair sharing) - Start time and priority (for FIFO) 3) It can be assigned a fair
 * share, for use with fair scheduling.
 *
 * <p>Schedulable also contains two methods for performing scheduling computations: - updateDemand()
 * is called periodically to compute the demand of the various jobs and queues, which may be
 * expensive (e.g. jobs must iterate through all their tasks to count failed tasks, tasks that can
 * be speculated, etc). - redistributeShare() is called after demands are updated and a
 * Schedulable's fair share has been set by its parent to let it distribute its share among the
 * other Schedulables within it (e.g. for queues that want to perform fair sharing among their
 * jobs).
 */
@Private
@Unstable
abstract class Schedulable {
  /** Fair share assigned to this Schedulable */
  private Resource fairShare = Resources.createResource(0);

  /**
   * Name of job/queue, used for debugging as well as for breaking ties in scheduling order
   * deterministically.
   */
  public abstract String getName();

  /**
   * Maximum number of resources required by this Schedulable. This is defined as number of
   * currently utilized resources + number of unlaunched resources (that are either not yet launched
   * or need to be speculated).
   */
  public abstract Resource getDemand();

  /** Get the aggregate amount of resources consumed by the schedulable. */
  public abstract Resource getResourceUsage();

  /** Minimum Resource share assigned to the schedulable. */
  public abstract Resource getMinShare();

  /** Job/queue weight in fair sharing. */
  public abstract double getWeight();

  /** Start time for jobs in FIFO queues; meaningless for QueueSchedulables. */
  public abstract long getStartTime();

  /** Job priority for jobs in FIFO queues; meaningless for QueueSchedulables. */
  public abstract Priority getPriority();

  /** Refresh the Schedulable's demand and those of its children if any. */
  public abstract void updateDemand();

  /**
   * Assign a container on this node if possible, and return the amount of resources assigned. If
   * {@code reserved} is true, it means a reservation already exists on this node, and the
   * schedulable should fulfill that reservation if possible.
   */
  public abstract Resource assignContainer(FSSchedulerNode node, boolean reserved);

  /** Assign a fair share to this Schedulable. */
  public void setFairShare(Resource fairShare) {
    this.fairShare = fairShare;
  }

  /** Get the fair share assigned to this Schedulable. */
  public Resource getFairShare() {
    return fairShare;
  }

  /** Convenient toString implementation for debugging. */
  @Override
  public String toString() {
    return String.format(
        "[%s, demand=%s, running=%s, share=%s,], w=%.1f]",
        getName(), getDemand(), getResourceUsage(), fairShare, getWeight());
  }
}
Пример #4
0
  //  @Test
  public void testFifoScheduler() throws Exception {

    LOG.info("--- START: testFifoScheduler ---");

    final int GB = 1024;

    // Register node1
    String host_0 = "host_0";
    org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm_0 =
        registerNode(host_0, 1234, 2345, NetworkTopology.DEFAULT_RACK, 4 * GB);
    nm_0.heartbeat();

    // Register node2
    String host_1 = "host_1";
    org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm_1 =
        registerNode(host_1, 1234, 2345, NetworkTopology.DEFAULT_RACK, 2 * GB);
    nm_1.heartbeat();

    // ResourceRequest priorities
    Priority priority_0 = org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.create(0);
    Priority priority_1 = org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.create(1);

    // Submit an application
    Application application_0 = new Application("user_0", resourceManager);
    application_0.submit();

    application_0.addNodeManager(host_0, 1234, nm_0);
    application_0.addNodeManager(host_1, 1234, nm_1);

    Resource capability_0_0 = Resources.createResource(GB);
    application_0.addResourceRequestSpec(priority_1, capability_0_0);

    Resource capability_0_1 = Resources.createResource(2 * GB);
    application_0.addResourceRequestSpec(priority_0, capability_0_1);

    Task task_0_0 = new Task(application_0, priority_1, new String[] {host_0, host_1});
    application_0.addTask(task_0_0);

    // Submit another application
    Application application_1 = new Application("user_1", resourceManager);
    application_1.submit();

    application_1.addNodeManager(host_0, 1234, nm_0);
    application_1.addNodeManager(host_1, 1234, nm_1);

    Resource capability_1_0 = Resources.createResource(3 * GB);
    application_1.addResourceRequestSpec(priority_1, capability_1_0);

    Resource capability_1_1 = Resources.createResource(4 * GB);
    application_1.addResourceRequestSpec(priority_0, capability_1_1);

    Task task_1_0 = new Task(application_1, priority_1, new String[] {host_0, host_1});
    application_1.addTask(task_1_0);

    // Send resource requests to the scheduler
    LOG.info("Send resource requests to the scheduler");
    application_0.schedule();
    application_1.schedule();

    // Send a heartbeat to kick the tires on the Scheduler
    LOG.info(
        "Send a heartbeat to kick the tires on the Scheduler... "
            + "nm0 -> task_0_0 and task_1_0 allocated, used=4G "
            + "nm1 -> nothing allocated");
    nm_0.heartbeat(); // task_0_0 and task_1_0 allocated, used=4G
    nm_1.heartbeat(); // nothing allocated

    // Get allocations from the scheduler
    application_0.schedule(); // task_0_0
    checkApplicationResourceUsage(GB, application_0);

    application_1.schedule(); // task_1_0
    checkApplicationResourceUsage(3 * GB, application_1);

    nm_0.heartbeat();
    nm_1.heartbeat();

    checkNodeResourceUsage(4 * GB, nm_0); // task_0_0 (1G) and task_1_0 (3G)
    checkNodeResourceUsage(0 * GB, nm_1); // no tasks, 2G available

    LOG.info("Adding new tasks...");

    Task task_1_1 = new Task(application_1, priority_1, new String[] {RMNode.ANY});
    application_1.addTask(task_1_1);

    Task task_1_2 = new Task(application_1, priority_1, new String[] {RMNode.ANY});
    application_1.addTask(task_1_2);

    Task task_1_3 = new Task(application_1, priority_0, new String[] {RMNode.ANY});
    application_1.addTask(task_1_3);

    application_1.schedule();

    Task task_0_1 = new Task(application_0, priority_1, new String[] {host_0, host_1});
    application_0.addTask(task_0_1);

    Task task_0_2 = new Task(application_0, priority_1, new String[] {host_0, host_1});
    application_0.addTask(task_0_2);

    Task task_0_3 = new Task(application_0, priority_0, new String[] {RMNode.ANY});
    application_0.addTask(task_0_3);

    application_0.schedule();

    // Send a heartbeat to kick the tires on the Scheduler
    LOG.info("Sending hb from " + nm_0.getHostName());
    nm_0.heartbeat(); // nothing new, used=4G

    LOG.info("Sending hb from " + nm_1.getHostName());
    nm_1.heartbeat(); // task_0_3, used=2G

    // Get allocations from the scheduler
    LOG.info("Trying to allocate...");
    application_0.schedule();
    checkApplicationResourceUsage(3 * GB, application_0);
    application_1.schedule();
    checkApplicationResourceUsage(3 * GB, application_1);
    nm_0.heartbeat();
    nm_1.heartbeat();
    checkNodeResourceUsage(4 * GB, nm_0);
    checkNodeResourceUsage(2 * GB, nm_1);

    // Complete tasks
    LOG.info("Finishing up task_0_0");
    application_0.finishTask(task_0_0); // Now task_0_1
    application_0.schedule();
    application_1.schedule();
    nm_0.heartbeat();
    nm_1.heartbeat();
    checkApplicationResourceUsage(3 * GB, application_0);
    checkApplicationResourceUsage(3 * GB, application_1);
    checkNodeResourceUsage(4 * GB, nm_0);
    checkNodeResourceUsage(2 * GB, nm_1);

    LOG.info("Finishing up task_1_0");
    application_1.finishTask(task_1_0); // Now task_0_2
    application_0.schedule(); // final overcommit for app0 caused here
    application_1.schedule();
    nm_0.heartbeat(); // final overcommit for app0 occurs here
    nm_1.heartbeat();
    checkApplicationResourceUsage(4 * GB, application_0);
    checkApplicationResourceUsage(0 * GB, application_1);
    // checkNodeResourceUsage(1*GB, nm_0);  // final over-commit -> rm.node->1G, test.node=2G
    checkNodeResourceUsage(2 * GB, nm_1);

    LOG.info("Finishing up task_0_3");
    application_0.finishTask(task_0_3); // No more
    application_0.schedule();
    application_1.schedule();
    nm_0.heartbeat();
    nm_1.heartbeat();
    checkApplicationResourceUsage(2 * GB, application_0);
    checkApplicationResourceUsage(0 * GB, application_1);
    // checkNodeResourceUsage(2*GB, nm_0);  // final over-commit, rm.node->1G, test.node->2G
    checkNodeResourceUsage(0 * GB, nm_1);

    LOG.info("Finishing up task_0_1");
    application_0.finishTask(task_0_1);
    application_0.schedule();
    application_1.schedule();
    nm_0.heartbeat();
    nm_1.heartbeat();
    checkApplicationResourceUsage(1 * GB, application_0);
    checkApplicationResourceUsage(0 * GB, application_1);

    LOG.info("Finishing up task_0_2");
    application_0.finishTask(task_0_2); // now task_1_3 can go!
    application_0.schedule();
    application_1.schedule();
    nm_0.heartbeat();
    nm_1.heartbeat();
    checkApplicationResourceUsage(0 * GB, application_0);
    checkApplicationResourceUsage(4 * GB, application_1);

    LOG.info("Finishing up task_1_3");
    application_1.finishTask(task_1_3); // now task_1_1
    application_0.schedule();
    application_1.schedule();
    nm_0.heartbeat();
    nm_1.heartbeat();
    checkApplicationResourceUsage(0 * GB, application_0);
    checkApplicationResourceUsage(3 * GB, application_1);

    LOG.info("Finishing up task_1_1");
    application_1.finishTask(task_1_1);
    application_0.schedule();
    application_1.schedule();
    nm_0.heartbeat();
    nm_1.heartbeat();
    checkApplicationResourceUsage(0 * GB, application_0);
    checkApplicationResourceUsage(3 * GB, application_1);

    LOG.info("--- END: testFifoScheduler ---");
  }