Example #1
1
 /**
  * JobTracker.submitJob() kicks off a new job.
  *
  * <p>Create a 'JobInProgress' object, which contains both JobProfile and JobStatus. Those two
  * sub-objects are sometimes shipped outside of the JobTracker. But JobInProgress adds info that's
  * useful for the JobTracker alone.
  *
  * <p>We add the JIP to the jobInitQueue, which is processed asynchronously to handle
  * split-computation and build up the right TaskTracker/Block mapping.
  */
 public synchronized JobStatus submitJob(String jobFile) throws IOException {
   totalSubmissions++;
   JobInProgress job = new JobInProgress(jobFile, this, this.conf);
   synchronized (jobs) {
     synchronized (jobsByArrival) {
       synchronized (jobInitQueue) {
         jobs.put(job.getProfile().getJobId(), job);
         jobsByArrival.add(job);
         jobInitQueue.add(job);
         jobInitQueue.notifyAll();
       }
     }
   }
   return job.getStatus();
 }
Example #2
1
  /**
   * We lost the task tracker! All task-tracker structures have already been updated. Just process
   * the contained tasks and any jobs that might be affected.
   */
  void lostTaskTracker(String trackerName) {
    LOG.info("Lost tracker '" + trackerName + "'");
    TreeSet lostTasks = (TreeSet) trackerToTaskMap.get(trackerName);
    trackerToTaskMap.remove(trackerName);

    if (lostTasks != null) {
      for (Iterator it = lostTasks.iterator(); it.hasNext(); ) {
        String taskId = (String) it.next();
        TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId);

        // Tell the job to fail the relevant task
        JobInProgress job = tip.getJob();
        job.failedTask(tip, taskId, trackerName);
      }
    }
  }
Example #3
0
    /**
     * The run method lives for the life of the JobTracker, and removes Jobs that are not still
     * running, but which finished a long time ago.
     */
    public void run() {
      while (shouldRun) {
        try {
          Thread.sleep(RETIRE_JOB_CHECK_INTERVAL);
        } catch (InterruptedException ie) {
        }

        synchronized (jobs) {
          synchronized (jobInitQueue) {
            synchronized (jobsByArrival) {
              for (Iterator it = jobs.keySet().iterator(); it.hasNext(); ) {
                String jobid = (String) it.next();
                JobInProgress job = (JobInProgress) jobs.get(jobid);

                if (job.getStatus().getRunState() != JobStatus.RUNNING
                    && job.getStatus().getRunState() != JobStatus.PREP
                    && (job.getFinishTime() + RETIRE_JOB_INTERVAL < System.currentTimeMillis())) {
                  it.remove();

                  jobInitQueue.remove(job);
                  jobsByArrival.remove(job);
                }
              }
            }
          }
        }
      }
    }
Example #4
0
 public synchronized JobStatus getJobStatus(String jobid) {
   JobInProgress job = (JobInProgress) jobs.get(jobid);
   if (job != null) {
     return job.getStatus();
   } else {
     return null;
   }
 }
Example #5
0
 public synchronized JobProfile getJobProfile(String jobid) {
   JobInProgress job = (JobInProgress) jobs.get(jobid);
   if (job != null) {
     return job.getProfile();
   } else {
     return null;
   }
 }
  private void printFailures(
      JspWriter out, JobTracker tracker, JobID jobId, String kind, String cause)
      throws IOException {
    JobInProgress job = (JobInProgress) tracker.getJob(jobId);
    if (job == null) {
      out.print("<b>Job " + jobId + " not found.</b><br>\n");
      return;
    }

    boolean includeMap = false;
    boolean includeReduce = false;
    if (kind == null) {
      includeMap = true;
      includeReduce = true;
    } else if ("map".equals(kind)) {
      includeMap = true;
    } else if ("reduce".equals(kind)) {
      includeReduce = true;
    } else if ("all".equals(kind)) {
      includeMap = true;
      includeReduce = true;
    } else {
      out.print("<b>Kind " + kind + " not supported.</b><br>\n");
      return;
    }

    TaskStatus.State state = null;
    try {
      if (cause != null) {
        state = TaskStatus.State.valueOf(cause.toUpperCase());
        if (state != TaskStatus.State.FAILED && state != TaskStatus.State.KILLED) {
          out.print("<b>Cause '" + cause + "' is not an 'unsuccessful' state.</b><br>\n");
          return;
        }
      }
    } catch (IllegalArgumentException e) {
      out.print("<b>Cause '" + cause + "' not supported.</b><br>\n");
      return;
    }

    out.print("<table border=2 cellpadding=\"5\" cellspacing=\"2\">");
    out.print(
        "<tr><th>Attempt</th><th>Task</th><th>Machine</th><th>State</th>"
            + "<th>Error</th><th>Logs</th></tr>\n");
    if (includeMap) {
      TaskInProgress[] tips = job.getTasks(TaskType.MAP);
      for (int i = 0; i < tips.length; ++i) {
        printFailedAttempts(out, tracker, jobId, tips[i], state);
      }
    }
    if (includeReduce) {
      TaskInProgress[] tips = job.getTasks(TaskType.REDUCE);
      for (int i = 0; i < tips.length; ++i) {
        printFailedAttempts(out, tracker, jobId, tips[i], state);
      }
    }
    out.print("</table>\n");
  }
Example #7
0
 public Vector completedJobs() {
   Vector v = new Vector();
   for (Iterator it = jobs.values().iterator(); it.hasNext(); ) {
     JobInProgress jip = (JobInProgress) it.next();
     JobStatus status = jip.getStatus();
     if (status.getRunState() == JobStatus.SUCCEEDED) {
       v.add(jip);
     }
   }
   return v;
 }
Example #8
0
 public Vector runningJobs() {
   Vector v = new Vector();
   for (Iterator it = jobs.values().iterator(); it.hasNext(); ) {
     JobInProgress jip = (JobInProgress) it.next();
     JobStatus status = jip.getStatus();
     if (status.getRunState() == JobStatus.RUNNING) {
       v.add(jip);
     }
   }
   return v;
 }
Example #9
0
 public synchronized TaskReport[] getReduceTaskReports(String jobid) {
   JobInProgress job = (JobInProgress) jobs.get(jobid);
   if (job == null) {
     return new TaskReport[0];
   } else {
     Vector reports = new Vector();
     Vector completeReduceTasks = job.reportTasksInProgress(false, true);
     for (Iterator it = completeReduceTasks.iterator(); it.hasNext(); ) {
       TaskInProgress tip = (TaskInProgress) it.next();
       reports.add(tip.generateSingleReport());
     }
     Vector incompleteReduceTasks = job.reportTasksInProgress(false, false);
     for (Iterator it = incompleteReduceTasks.iterator(); it.hasNext(); ) {
       TaskInProgress tip = (TaskInProgress) it.next();
       reports.add(tip.generateSingleReport());
     }
     return (TaskReport[]) reports.toArray(new TaskReport[reports.size()]);
   }
 }
Example #10
0
  /**
   * Accept and process a new TaskTracker profile. We might have known about the TaskTracker
   * previously, or it might be brand-new. All task-tracker structures have already been updated.
   * Just process the contained tasks and any jobs that might be affected.
   */
  void updateTaskStatuses(TaskTrackerStatus status) {
    for (Iterator it = status.taskReports(); it.hasNext(); ) {
      TaskStatus report = (TaskStatus) it.next();
      TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(report.getTaskId());
      if (tip == null) {
        LOG.info(
            "Serious problem.  While updating status, cannot find taskid " + report.getTaskId());
      } else {
        JobInProgress job = tip.getJob();
        job.updateTaskStatus(tip, report);

        if (report.getRunState() == TaskStatus.SUCCEEDED) {
          job.completedTask(tip, report.getTaskId());
        } else if (report.getRunState() == TaskStatus.FAILED) {
          // Tell the job to fail the relevant task
          job.failedTask(tip, report.getTaskId(), status.getTrackerName());
        }
      }
    }
  }
Example #11
0
 public void run() {
   while (shouldRun) {
     JobInProgress job = null;
     synchronized (jobInitQueue) {
       if (jobInitQueue.size() > 0) {
         job = (JobInProgress) jobInitQueue.elementAt(0);
         jobInitQueue.remove(job);
       } else {
         try {
           jobInitQueue.wait(JOBINIT_SLEEP_INTERVAL);
         } catch (InterruptedException iex) {
         }
       }
     }
     try {
       if (job != null) {
         job.initTasks();
       }
     } catch (Exception e) {
       LOG.log(Level.WARNING, "job init failed", e);
       job.kill();
     }
   }
 }
  public void _jspService(HttpServletRequest request, HttpServletResponse response)
      throws java.io.IOException, ServletException {

    PageContext pageContext = null;
    HttpSession session = null;
    ServletContext application = null;
    ServletConfig config = null;
    JspWriter out = null;
    Object page = this;
    JspWriter _jspx_out = null;
    PageContext _jspx_page_context = null;

    try {
      response.setContentType("text/html; charset=UTF-8");
      pageContext = _jspxFactory.getPageContext(this, request, response, null, true, 8192, true);
      _jspx_page_context = pageContext;
      application = pageContext.getServletContext();
      config = pageContext.getServletConfig();
      session = pageContext.getSession();
      out = pageContext.getOut();
      _jspx_out = out;
      _jspx_resourceInjector =
          (org.apache.jasper.runtime.ResourceInjector)
              application.getAttribute("com.sun.appserv.jsp.resource.injector");

      out.write('\n');

      JobTracker tracker = (JobTracker) application.getAttribute("job.tracker");
      ClusterStatus status = tracker.getClusterStatus();
      String trackerName = StringUtils.simpleHostname(tracker.getJobTrackerMachine());

      out.write("\n<html>\n<head>\n<title>");
      out.print(trackerName);
      out.write(
          " Hadoop Locality Statistics</title>\n<link rel=\"stylesheet\" type=\"text/css\" href=\"/static/hadoop.css\">\n</head>\n<body>\n<h1>");
      out.print(trackerName);
      out.write(" Hadoop Locality Statistics</h1>\n\n<b>State:</b> ");
      out.print(status.getJobTrackerState());
      out.write("<br>\n<b>Started:</b> ");
      out.print(new Date(tracker.getStartTime()));
      out.write("<br>\n<b>Version:</b> ");
      out.print(VersionInfo.getVersion());
      out.write(",\n                r");
      out.print(VersionInfo.getRevision());
      out.write("<br>\n<b>Compiled:</b> ");
      out.print(VersionInfo.getDate());
      out.write(" by\n                 ");
      out.print(VersionInfo.getUser());
      out.write("<br>\n<b>Identifier:</b> ");
      out.print(tracker.getTrackerIdentifier());
      out.write("<br>\n\n<hr>\n\n");

      Collection<JobInProgress> jobs = new ArrayList<JobInProgress>();
      jobs.addAll(tracker.completedJobs());
      jobs.addAll(tracker.runningJobs());
      jobs.addAll(tracker.failedJobs());
      int dataLocalMaps = 0;
      int rackLocalMaps = 0;
      int totalMaps = 0;
      int totalReduces = 0;
      for (JobInProgress job : jobs) {
        Counters counters = job.getCounters();
        dataLocalMaps += counters.getCounter(JobInProgress.Counter.DATA_LOCAL_MAPS);
        rackLocalMaps += counters.getCounter(JobInProgress.Counter.RACK_LOCAL_MAPS);
        totalMaps += counters.getCounter(JobInProgress.Counter.TOTAL_LAUNCHED_MAPS);
        totalReduces += counters.getCounter(JobInProgress.Counter.TOTAL_LAUNCHED_REDUCES);
      }
      int dataLocalMapPct = totalMaps == 0 ? 0 : (100 * dataLocalMaps) / totalMaps;
      int rackLocalMapPct = totalMaps == 0 ? 0 : (100 * rackLocalMaps) / totalMaps;
      int dataRackLocalMapPct =
          totalMaps == 0 ? 0 : (100 * (dataLocalMaps + rackLocalMaps)) / totalMaps;

      out.write("\n<p>\n<b>Data Local Maps:</b> ");
      out.print(dataLocalMaps);
      out.write(' ');
      out.write('(');
      out.print(dataLocalMapPct);
      out.write("%) <br>\n<b>Rack Local Maps:</b> ");
      out.print(rackLocalMaps);
      out.write(' ');
      out.write('(');
      out.print(rackLocalMapPct);
      out.write("%) <br>\n<b>Data or Rack Local:</b> ");
      out.print(dataLocalMaps + rackLocalMaps);
      out.write(' ');
      out.write('(');
      out.print(dataRackLocalMapPct);
      out.write("%) <br>\n<b>Total Maps:</b> ");
      out.print(totalMaps);
      out.write(" <br>\n<b>Total Reduces:</b> ");
      out.print(totalReduces);
      out.write(" <br>\n</p>\n\n");

      out.println(ServletUtil.htmlFooter());

      out.write('\n');
    } catch (Throwable t) {
      if (!(t instanceof SkipPageException)) {
        out = _jspx_out;
        if (out != null && out.getBufferSize() != 0) out.clearBuffer();
        if (_jspx_page_context != null) _jspx_page_context.handlePageException(t);
      }
    } finally {
      _jspxFactory.releasePageContext(_jspx_page_context);
    }
  }
Example #13
0
 public synchronized void killJob(String jobid) {
   JobInProgress job = (JobInProgress) jobs.get(jobid);
   job.kill();
 }
Example #14
0
  /**
   * A tracker wants to know if there's a Task to run. Returns a task we'd like the TaskTracker to
   * execute right now.
   *
   * <p>Eventually this function should compute load on the various TaskTrackers, and incorporate
   * knowledge of DFS file placement. But for right now, it just grabs a single item out of the
   * pending task list and hands it back.
   */
  public synchronized Task pollForNewTask(String taskTracker) {
    //
    // Compute average map and reduce task numbers across pool
    //
    int avgMaps = 0;
    int avgReduces = 0;
    int numTaskTrackers;
    TaskTrackerStatus tts;
    synchronized (taskTrackers) {
      numTaskTrackers = taskTrackers.size();
      tts = (TaskTrackerStatus) taskTrackers.get(taskTracker);
    }
    if (numTaskTrackers > 0) {
      avgMaps = totalMaps / numTaskTrackers;
      avgReduces = totalReduces / numTaskTrackers;
    }
    int totalCapacity = numTaskTrackers * maxCurrentTasks;
    //
    // Get map + reduce counts for the current tracker.
    //
    if (tts == null) {
      LOG.warning("Unknown task tracker polling; ignoring: " + taskTracker);
      return null;
    }

    int numMaps = tts.countMapTasks();
    int numReduces = tts.countReduceTasks();

    //
    // In the below steps, we allocate first a map task (if appropriate),
    // and then a reduce task if appropriate.  We go through all jobs
    // in order of job arrival; jobs only get serviced if their
    // predecessors are serviced, too.
    //

    //
    // We hand a task to the current taskTracker if the given machine
    // has a workload that's equal to or less than the averageMaps
    // +/- TASK_ALLOC_EPSILON.  (That epsilon is in place in case
    // there is an odd machine that is failing for some reason but
    // has not yet been removed from the pool, making capacity seem
    // larger than it really is.)
    //
    synchronized (jobsByArrival) {
      if ((numMaps < maxCurrentTasks) && (numMaps <= (avgMaps + TASK_ALLOC_EPSILON))) {

        int totalNeededMaps = 0;
        for (Iterator it = jobsByArrival.iterator(); it.hasNext(); ) {
          JobInProgress job = (JobInProgress) it.next();
          if (job.getStatus().getRunState() != JobStatus.RUNNING) {
            continue;
          }

          Task t = job.obtainNewMapTask(taskTracker, tts);
          if (t != null) {
            return t;
          }

          //
          // Beyond the highest-priority task, reserve a little
          // room for failures and speculative executions; don't
          // schedule tasks to the hilt.
          //
          totalNeededMaps += job.desiredMaps();
          double padding = 0;
          if (totalCapacity > MIN_SLOTS_FOR_PADDING) {
            padding = Math.min(maxCurrentTasks, totalNeededMaps * PAD_FRACTION);
          }
          if (totalNeededMaps + padding >= totalCapacity) {
            break;
          }
        }
      }

      //
      // Same thing, but for reduce tasks
      //
      if ((numReduces < maxCurrentTasks) && (numReduces <= (avgReduces + TASK_ALLOC_EPSILON))) {

        int totalNeededReduces = 0;
        for (Iterator it = jobsByArrival.iterator(); it.hasNext(); ) {
          JobInProgress job = (JobInProgress) it.next();
          if (job.getStatus().getRunState() != JobStatus.RUNNING) {
            continue;
          }

          Task t = job.obtainNewReduceTask(taskTracker, tts);
          if (t != null) {
            return t;
          }

          //
          // Beyond the highest-priority task, reserve a little
          // room for failures and speculative executions; don't
          // schedule tasks to the hilt.
          //
          totalNeededReduces += job.desiredReduces();
          double padding = 0;
          if (totalCapacity > MIN_SLOTS_FOR_PADDING) {
            padding = Math.min(maxCurrentTasks, totalNeededReduces * PAD_FRACTION);
          }
          if (totalNeededReduces + padding >= totalCapacity) {
            break;
          }
        }
      }
    }
    return null;
  }