public void testActionCheck() throws Exception {
    JPAService jpaService = Services.get().get(JPAService.class);
    WorkflowJobBean job =
        this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING);
    WorkflowActionBean action =
        this.addRecordToWfActionTable(job.getId(), "1", WorkflowAction.Status.PREP);
    WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action.getId());

    new ActionStartXCommand(action.getId(), "map-reduce").call();
    action = jpaService.execute(wfActionGetCmd);

    ActionExecutorContext context =
        new ActionXCommand.ActionExecutorContext(job, action, false, false);
    MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor();
    JobConf conf =
        actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action.getConf()));
    String user = conf.get("user.name");
    JobClient jobClient =
        Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);

    String launcherId = action.getExternalId();

    final RunningJob launcherJob = jobClient.getJob(JobID.forName(launcherId));

    waitFor(
        120 * 1000,
        new Predicate() {
          public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
          }
        });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData =
        LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));

    new ActionCheckXCommand(action.getId()).call();
    action = jpaService.execute(wfActionGetCmd);
    String mapperId = action.getExternalId();
    String childId = action.getExternalChildIDs();

    assertTrue(launcherId.equals(mapperId));

    final RunningJob mrJob = jobClient.getJob(JobID.forName(childId));

    waitFor(
        120 * 1000,
        new Predicate() {
          public boolean evaluate() throws Exception {
            return mrJob.isComplete();
          }
        });
    assertTrue(mrJob.isSuccessful());

    new ActionCheckXCommand(action.getId()).call();
    action = jpaService.execute(wfActionGetCmd);

    assertEquals("SUCCEEDED", action.getExternalStatus());
  }
Exemple #2
0
 /**
  * Clears all the logs in userlog directory.
  *
  * <p>Adds the job directories for deletion with default retain hours. Deletes all other
  * directories, if any. This is usually called on reinit/restart of the TaskTracker
  *
  * @param conf
  * @throws IOException
  */
 void clearOldUserLogs(Configuration conf) throws IOException {
   File userLogDir = TaskLog.getUserLogDir();
   if (userLogDir.exists()) {
     String[] logDirs = userLogDir.list();
     if (logDirs.length > 0) {
       // add all the log dirs to taskLogsMnonitor.
       long now = clock.getTime();
       for (String logDir : logDirs) {
         if (logDir.equals(logAsyncDisk.TOBEDELETED)) {
           // skip this
           continue;
         }
         JobID jobid = null;
         try {
           jobid = JobID.forName(logDir);
         } catch (IllegalArgumentException ie) {
           // if the directory is not a jobid, delete it immediately
           deleteLogPath(new File(userLogDir, logDir).getAbsolutePath());
           continue;
         }
         // add the job log directory with default retain hours, if it is not
         // already added
         if (!completedJobs.containsKey(jobid)) {
           markJobLogsForDeletion(now, conf, jobid);
         }
       }
     }
   }
 }
  /**
   * Logs job killed event. Closes the job history log file.
   *
   * @param timestamp time when job killed was issued in ms.
   * @param finishedMaps no finished map tasks.
   * @param finishedReduces no of finished reduce tasks.
   */
  public void logKilled(long timestamp, int finishedMaps, int finishedReduces, Counters counters) {
    if (disableHistory) {
      return;
    }

    if (null != writers) {
      log(
          writers,
          RecordTypes.Job,
          new Keys[] {
            Keys.JOBID,
            Keys.FINISH_TIME,
            Keys.JOB_STATUS,
            Keys.FINISHED_MAPS,
            Keys.FINISHED_REDUCES,
            Keys.COUNTERS
          },
          new String[] {
            jobId.toString(),
            String.valueOf(timestamp),
            Values.KILLED.name(),
            String.valueOf(finishedMaps),
            String.valueOf(finishedReduces),
            counters.makeEscapedCompactString()
          },
          true);
      closeAndClear(writers);
    }
  }
  public void getTaskAttempt(String job, String taskid) {
    // -logs
    try {
      JobID jobid = JobID.forName(job);
      TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskid);

      StringBuilder sb = new StringBuilder();

      LogParams logParams = client.getLogFileParams(jobid, taskAttemptID);
      LogCLIHelpers logDumper = new LogCLIHelpers();
      logDumper.setConf(conf);

      sb.append(logParams.getApplicationId());
      sb.append("|" + logParams.getContainerId());
      sb.append("|" + logParams.getNodeId());
      sb.append("|" + logParams.getOwner());
      System.out.println(sb.toString());

      logDumper.dumpAContainersLogs(
          logParams.getApplicationId(),
          logParams.getContainerId(),
          logParams.getNodeId(),
          logParams.getOwner());
    } catch (Exception e) {
      System.out.println(e);
    }
  }
  private RunningJob submitAction(Context context, Namespace ns) throws Exception {
    Hive2ActionExecutor ae = new Hive2ActionExecutor();

    WorkflowAction action = context.getAction();

    ae.prepareActionDir(getFileSystem(), context);
    ae.submitLauncher(getFileSystem(), context, action);

    String jobId = action.getExternalId();
    String jobTracker = action.getTrackerUri();
    String consoleUrl = action.getConsoleUrl();
    assertNotNull(jobId);
    assertNotNull(jobTracker);
    assertNotNull(consoleUrl);
    Element e = XmlUtils.parseXml(action.getConf());
    XConfiguration conf =
        new XConfiguration(
            new StringReader(XmlUtils.prettyPrint(e.getChild("configuration", ns)).toString()));
    conf.set("mapred.job.tracker", e.getChildTextTrim("job-tracker", ns));
    conf.set("fs.default.name", e.getChildTextTrim("name-node", ns));
    conf.set("user.name", context.getProtoActionConf().get("user.name"));
    conf.set("group.name", getTestGroup());

    JobConf jobConf = Services.get().get(HadoopAccessorService.class).createJobConf(jobTracker);
    XConfiguration.copy(conf, jobConf);
    String user = jobConf.get("user.name");
    JobClient jobClient =
        Services.get().get(HadoopAccessorService.class).createJobClient(user, jobConf);
    final RunningJob runningJob = jobClient.getJob(JobID.forName(jobId));
    assertNotNull(runningJob);
    return runningJob;
  }
  public void getTaskDiagnostics(String job) {

    try {
      String errinfo[];
      StringBuilder sb = new StringBuilder();

      JobID jobid = JobID.forName(job);
      JobStatus status = client.getJobStatus(jobid);
      System.out.println(status.getTrackingUrl());
      System.out.println(status.getJobFile());
      System.out.println(status.getHistoryFile());
      System.out.println(status.getQueue());
      System.out.println(status.toString());

      System.out.println("#### QueueAclsInfo ####");
      QueueAclsInfo[] qais = client.getQueueAclsForCurrentUser();
      for (int i = 0; i < qais.length; i++) {
        QueueAclsInfo qai = qais[i];
        System.out.println(qai.getQueueName().toString());
        String[] ops = qai.getOperations();
        for (int j = 0; j < ops.length; j++) {
          System.out.println(ops[j]);
        }
      }

      System.out.println("#### QueueInfo ####");
      QueueInfo[] qis = client.getRootQueues();
      for (int i = 0; i < qis.length; i++) {
        QueueInfo qi = qis[i];
        System.out.println(qi.getQueueName());
        System.out.println(qi.getSchedulingInfo());
        System.out.println(qi.getState());
        System.out.println(qi.getProperties());
      }

      int mapnum = client.getTaskReports(jobid, TaskType.MAP).length;
      int rednum = client.getTaskReports(jobid, TaskType.REDUCE).length;
      float progress = status.getMapProgress();
      sb.append(
          String.format(
              "m %4d %6s",
              mapnum, progress == 1 ? "100%" : String.format("%.2f%%", progress * 100)));
      sb.append("|");
      progress = status.getReduceProgress();
      sb.append(
          String.format(
              "r %4d %6s",
              rednum, progress == 1 ? "100%" : String.format("%.2f%%", progress * 100)));

      System.out.println(sb.toString());
      printTaskAttempt(client.getTaskReports(jobid, TaskType.MAP));
      printTaskAttempt(client.getTaskReports(jobid, TaskType.REDUCE));
      System.out.println(sb.toString());

    } catch (Exception e) {
      System.out.println(e);
    }
  }
Exemple #7
0
 /** Get the events list at the tasktracker */
 public MapTaskCompletionEventsUpdate getMapTaskCompletionEventsUpdates(
     int index, JobID jobId, int max) throws IOException {
   String jtId = jobTracker.getJobTracker().getTrackerIdentifier();
   TaskAttemptID dummy = new TaskAttemptID(jtId, jobId.getId(), false, 0, 0);
   return taskTrackerList
       .get(index)
       .getTaskTracker()
       .getMapCompletionEvents(jobId, 0, max, dummy, null);
 }
 private File localizeJob(JobID jobid) throws IOException {
   String user = UserGroupInformation.getCurrentUser().getShortUserName();
   new JobLocalizer(tt.getJobConf(), user, jobid.toString()).initializeJobLogDir();
   File jobUserlog = TaskLog.getJobDir(jobid);
   JobConf conf = new JobConf();
   // localize job log directory
   tt.saveLogDir(jobid, conf);
   assertTrue(jobUserlog + " directory is not created.", jobUserlog.exists());
   return jobUserlog;
 }
  /**
   * Log job finished. closes the job file in history.
   *
   * @param finishTime finish time of job in ms.
   * @param finishedMaps no of maps successfully finished.
   * @param finishedReduces no of reduces finished sucessfully.
   * @param failedMaps no of failed map tasks. (includes killed)
   * @param failedReduces no of failed reduce tasks. (includes killed)
   * @param killedMaps no of killed map tasks.
   * @param killedReduces no of killed reduce tasks.
   * @param counters the counters from the job
   */
  public void logFinished(
      long finishTime,
      int finishedMaps,
      int finishedReduces,
      int failedMaps,
      int failedReduces,
      int killedMaps,
      int killedReduces,
      Counters mapCounters,
      Counters reduceCounters,
      Counters counters) {
    if (disableHistory) {
      return;
    }

    if (null != writers) {
      log(
          writers,
          RecordTypes.Job,
          new Keys[] {
            Keys.JOBID,
            Keys.FINISH_TIME,
            Keys.JOB_STATUS,
            Keys.FINISHED_MAPS,
            Keys.FINISHED_REDUCES,
            Keys.FAILED_MAPS,
            Keys.FAILED_REDUCES,
            Keys.KILLED_MAPS,
            Keys.KILLED_REDUCES,
            Keys.MAP_COUNTERS,
            Keys.REDUCE_COUNTERS,
            Keys.COUNTERS
          },
          new String[] {
            jobId.toString(),
            Long.toString(finishTime),
            Values.SUCCESS.name(),
            String.valueOf(finishedMaps),
            String.valueOf(finishedReduces),
            String.valueOf(failedMaps),
            String.valueOf(failedReduces),
            String.valueOf(killedMaps),
            String.valueOf(killedReduces),
            mapCounters.makeEscapedCompactString(),
            reduceCounters.makeEscapedCompactString(),
            counters.makeEscapedCompactString()
          },
          true);

      closeAndClear(writers);
    }

    // NOTE: history cleaning stuff deleted from here. We should do that
    // somewhere else!
  }
  /** Logs job as running */
  public void logStarted() {
    if (disableHistory) {
      return;
    }

    if (null != writers) {
      log(
          writers,
          RecordTypes.Job,
          new Keys[] {Keys.JOBID, Keys.JOB_STATUS},
          new String[] {jobId.toString(), Values.RUNNING.name()});
    }
  }
  /**
   * Log job's priority.
   *
   * @param priority Jobs priority
   */
  public void logJobPriority(JobID jobid, JobPriority priority) {
    if (disableHistory) {
      return;
    }

    if (null != writers) {
      log(
          writers,
          RecordTypes.Job,
          new Keys[] {Keys.JOBID, Keys.JOB_PRIORITY},
          new String[] {jobId.toString(), priority.toString()});
    }
  }
Exemple #12
0
 public static JobStatus downgrade(org.apache.hadoop.mapreduce.JobStatus stat) {
   JobStatus old =
       new JobStatus(
           JobID.downgrade(stat.getJobID()),
           stat.getSetupProgress(),
           stat.getMapProgress(),
           stat.getReduceProgress(),
           stat.getCleanupProgress(),
           stat.getState().getValue(),
           JobPriority.valueOf(stat.getPriority().name()),
           stat.getUsername(),
           stat.getJobName(),
           stat.getJobFile(),
           stat.getTrackingUrl());
   old.setStartTime(stat.getStartTime());
   old.setFinishTime(stat.getFinishTime());
   old.setSchedulingInfo(stat.getSchedulingInfo());
   old.setHistoryFile(stat.getHistoryFile());
   return old;
 }
  /**
   * Logs launch time of job.
   *
   * @param startTime start time of job.
   * @param totalMaps total maps assigned by jobtracker.
   * @param totalReduces total reduces.
   */
  public void logInited(long startTime, int totalMaps, int totalReduces) {
    if (disableHistory) {
      return;
    }

    if (null != writers) {
      log(
          writers,
          RecordTypes.Job,
          new Keys[] {
            Keys.JOBID, Keys.LAUNCH_TIME, Keys.TOTAL_MAPS, Keys.TOTAL_REDUCES, Keys.JOB_STATUS
          },
          new String[] {
            jobId.toString(),
            String.valueOf(startTime),
            String.valueOf(totalMaps),
            String.valueOf(totalReduces),
            Values.PREP.name()
          });
    }
  }
Exemple #14
0
  /** Test if {@link CurrentJHParser} can read events from current JH files. */
  @Test
  public void testCurrentJHParser() throws Exception {
    final Configuration conf = new Configuration();
    final FileSystem lfs = FileSystem.getLocal(conf);

    final Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(lfs);

    final Path tempDir = new Path(rootTempDir, "TestCurrentJHParser");
    lfs.delete(tempDir, true);

    String queueName = "testQueue";
    // Run a MR job
    // create a MR cluster
    conf.setInt("mapred.tasktracker.map.tasks.maximum", 1);
    conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 1);
    conf.set("mapred.queue.names", queueName);
    MiniMRCluster mrCluster = new MiniMRCluster(1, "file:///", 1, null, null, new JobConf(conf));

    // run a job
    Path inDir = new Path(tempDir, "input");
    Path outDir = new Path(tempDir, "output");
    JobHistoryParser parser = null;
    RewindableInputStream ris = null;
    ArrayList<String> seenEvents = new ArrayList<String>(10);
    RunningJob rJob = null;

    try {
      JobConf jobConf = mrCluster.createJobConf();
      jobConf.setQueueName(queueName);
      // construct a job with 1 map and 1 reduce task.
      rJob = UtilsForTests.runJob(jobConf, inDir, outDir, 1, 1);
      rJob.waitForCompletion();
      assertTrue("Job failed", rJob.isSuccessful());

      JobID id = rJob.getID();

      // get the jobhistory filepath
      Path inputPath =
          new Path(JobHistory.getHistoryFilePath(org.apache.hadoop.mapred.JobID.downgrade(id)));
      // wait for 10 secs for the jobhistory file to move into the done folder
      for (int i = 0; i < 100; ++i) {
        if (lfs.exists(inputPath)) {
          break;
        }
        TimeUnit.MILLISECONDS.wait(100);
      }

      assertTrue("Missing job history file", lfs.exists(inputPath));

      InputDemuxer inputDemuxer = new DefaultInputDemuxer();
      inputDemuxer.bindTo(inputPath, conf);

      Pair<String, InputStream> filePair = inputDemuxer.getNext();

      assertNotNull(filePair);

      ris = new RewindableInputStream(filePair.second());

      // Test if the JobHistoryParserFactory can detect the parser correctly
      parser = JobHistoryParserFactory.getParser(ris);

      // Get ParsedJob
      String jobId = TraceBuilder.extractJobID(filePair.first());
      JobBuilder builder = new JobBuilder(jobId);

      HistoryEvent e;
      while ((e = parser.nextEvent()) != null) {
        String eventString = e.getEventType().toString();
        System.out.println(eventString);
        seenEvents.add(eventString);
        if (builder != null) {
          builder.process(e);
        }
      }

      ParsedJob parsedJob = builder.build();
      // validate the obtainXXX api of ParsedJob, ParsedTask and
      // ParsedTaskAttempt.
      validateParsedJob(parsedJob, 1, 1, queueName);
    } finally {
      // stop the MR cluster
      mrCluster.shutdown();

      if (ris != null) {
        ris.close();
      }
      if (parser != null) {
        parser.close();
      }

      // cleanup the filesystem
      lfs.delete(tempDir, true);
    }

    // Check against the gold standard
    System.out.println("testCurrentJHParser validating using gold std ");
    String[] goldLines =
        new String[] {
          "JOB_SUBMITTED",
          "JOB_PRIORITY_CHANGED",
          "JOB_STATUS_CHANGED",
          "JOB_INITED",
          "JOB_INFO_CHANGED",
          "TASK_STARTED",
          "MAP_ATTEMPT_STARTED",
          "MAP_ATTEMPT_FINISHED",
          "MAP_ATTEMPT_FINISHED",
          "TASK_UPDATED",
          "TASK_FINISHED",
          "JOB_STATUS_CHANGED",
          "TASK_STARTED",
          "MAP_ATTEMPT_STARTED",
          "MAP_ATTEMPT_FINISHED",
          "MAP_ATTEMPT_FINISHED",
          "TASK_UPDATED",
          "TASK_FINISHED",
          "TASK_STARTED",
          "MAP_ATTEMPT_STARTED",
          "MAP_ATTEMPT_FINISHED",
          "REDUCE_ATTEMPT_FINISHED",
          "TASK_UPDATED",
          "TASK_FINISHED",
          "TASK_STARTED",
          "MAP_ATTEMPT_STARTED",
          "MAP_ATTEMPT_FINISHED",
          "MAP_ATTEMPT_FINISHED",
          "TASK_UPDATED",
          "TASK_FINISHED",
          "JOB_STATUS_CHANGED",
          "JOB_FINISHED"
        };

    // Check the output with gold std
    assertEquals("Size mismatch", goldLines.length, seenEvents.size());

    int index = 0;
    for (String goldLine : goldLines) {
      assertEquals("Content mismatch", goldLine, seenEvents.get(index++));
    }
  }
  /**
   * Log job submitted event to history. Creates a new file in history for the job. if history file
   * creation fails, it disables history for all other events.
   *
   * @param jobConfPath path to job conf xml file in HDFS.
   * @param submitTime time when job tracker received the job
   * @throws IOException
   */
  public void logSubmitted(String jobConfPath, long submitTime, String jobTrackerId)
      throws IOException {

    if (disableHistory) {
      return;
    }

    // create output stream for logging in hadoop.job.history.location
    int defaultBufferSize = logDirFs.getConf().getInt("io.file.buffer.size", 4096);

    try {
      FSDataOutputStream out = null;
      PrintWriter writer = null;

      // In case the old JT is still running, but we can't connect to it, we
      // should ensure that it won't write to our (new JT's) job history file.
      if (logDirFs.exists(logFile)) {
        LOG.info("Remove the old history file " + logFile);
        logDirFs.delete(logFile, true);
      }

      out =
          logDirFs.create(
              logFile,
              new FsPermission(HISTORY_FILE_PERMISSION),
              true,
              defaultBufferSize,
              logDirFs.getDefaultReplication(),
              jobHistoryBlockSize,
              null);

      writer = new PrintWriter(out);

      fileManager.addWriter(jobId, writer);

      // cache it ...
      fileManager.setHistoryFile(jobId, logFile);

      writers = fileManager.getWriters(jobId);
      if (null != writers) {
        log(
            writers,
            RecordTypes.Meta,
            new Keys[] {Keys.VERSION},
            new String[] {String.valueOf(JobHistory.VERSION)});
      }

      String jobName = getJobName();
      String user = getUserName();

      // add to writer as well
      log(
          writers,
          RecordTypes.Job,
          new Keys[] {
            Keys.JOBID, Keys.JOBNAME, Keys.USER, Keys.SUBMIT_TIME, Keys.JOBCONF, Keys.JOBTRACKERID
          },
          new String[] {
            jobId.toString(), jobName, user,
            String.valueOf(submitTime), jobConfPath, jobTrackerId
          });

    } catch (IOException e) {
      // Disable history if we have errors other than in the user log.
      disableHistory = true;
    }

    /* Storing the job conf on the log dir */
    Path jobFilePath = new Path(logDir, CoronaJobHistoryFilesManager.getConfFilename(jobId));
    fileManager.setConfFile(jobId, jobFilePath);
    FSDataOutputStream jobFileOut = null;
    try {
      if (!logDirFs.exists(jobFilePath)) {
        jobFileOut =
            logDirFs.create(
                jobFilePath,
                new FsPermission(HISTORY_FILE_PERMISSION),
                true,
                defaultBufferSize,
                logDirFs.getDefaultReplication(),
                logDirFs.getDefaultBlockSize(),
                null);
        conf.writeXml(jobFileOut);
        jobFileOut.close();
      }
    } catch (IOException ioe) {
      LOG.error("Failed to store job conf in the log dir", ioe);
    } finally {
      if (jobFileOut != null) {
        try {
          jobFileOut.close();
        } catch (IOException ie) {
          LOG.info(
              "Failed to close the job configuration file " + StringUtils.stringifyException(ie));
        }
      }
    }
  }
Exemple #16
0
 /** @return The jobid of the Job */
 public JobID getJobID() {
   return JobID.downgrade(super.getJobID());
 }
Exemple #17
0
 public static JobID read(DataInput in) throws IOException {
   JobID jobId = new JobID();
   jobId.readFields(in);
   return jobId;
 }
  public void testActionCheckTransientDuringMRAction() throws Exception {
    // When using YARN, skip this test because it relies on shutting down the job tracker, which
    // isn't used in YARN
    if (createJobConf().get("yarn.resourcemanager.address") != null) {
      return;
    }
    services.destroy();
    // Make the max number of retries lower so the test won't take as long
    final int maxRetries = 2;
    setSystemProperty("oozie.action.retries.max", Integer.toString(maxRetries));
    services = new Services();
    // Disable ActionCheckerService so it doesn't interfere by triggering any extra
    // ActionCheckXCommands
    setClassesToBeExcluded(
        services.getConf(), new String[] {"org.apache.oozie.service.ActionCheckerService"});
    services.init();

    final JPAService jpaService = Services.get().get(JPAService.class);
    WorkflowJobBean job0 =
        this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING);
    final String jobId = job0.getId();
    WorkflowActionBean action0 =
        this.addRecordToWfActionTable(jobId, "1", WorkflowAction.Status.PREP);
    final String actionId = action0.getId();
    final WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(actionId);

    new ActionStartXCommand(actionId, "map-reduce").call();
    final WorkflowActionBean action1 = jpaService.execute(wfActionGetCmd);
    String originalLauncherId = action1.getExternalId();

    ActionExecutorContext context =
        new ActionXCommand.ActionExecutorContext(job0, action1, false, false);
    MapReduceActionExecutor actionExecutor = new MapReduceActionExecutor();
    JobConf conf =
        actionExecutor.createBaseHadoopConf(context, XmlUtils.parseXml(action1.getConf()));
    String user = conf.get("user.name");
    JobClient jobClient =
        Services.get().get(HadoopAccessorService.class).createJobClient(user, conf);

    final RunningJob launcherJob = jobClient.getJob(JobID.forName(originalLauncherId));

    waitFor(
        120 * 1000,
        new Predicate() {
          @Override
          public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
          }
        });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData =
        LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));

    new ActionCheckXCommand(action1.getId()).call();
    WorkflowActionBean action2 = jpaService.execute(wfActionGetCmd);
    String originalMapperId = action2.getExternalChildIDs();

    assertFalse(originalLauncherId.equals(originalMapperId));

    // At this point, the launcher job has finished and the map-reduce action has started (but not
    // finished)
    // Now, shutdown the job tracker to pretend it has gone down during the map-reduce job
    executeWhileJobTrackerIsShutdown(
        new ShutdownJobTrackerExecutable() {
          @Override
          public void execute() throws Exception {
            assertEquals(0, action1.getRetries());
            new ActionCheckXCommand(actionId).call();

            waitFor(
                30 * 1000,
                new Predicate() {
                  @Override
                  public boolean evaluate() throws Exception {
                    WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd);
                    return (action1a.getRetries() > 0);
                  }
                });
            waitFor(
                180 * 1000,
                new Predicate() {
                  @Override
                  public boolean evaluate() throws Exception {
                    WorkflowActionBean action1a = jpaService.execute(wfActionGetCmd);
                    return (action1a.getRetries() == 0);
                  }
                });
            WorkflowActionBean action1b = jpaService.execute(wfActionGetCmd);
            assertEquals(0, action1b.getRetries());
            assertEquals("START_MANUAL", action1b.getStatusStr());

            WorkflowJobBean job1 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId));
            assertEquals("SUSPENDED", job1.getStatusStr());

            // At this point, the action has gotten a transient error, even after maxRetries tries
            // so the workflow has been
            // SUSPENDED
          }
        });
    // Now, lets bring the job tracker back up and resume the workflow (which will restart the
    // current action)
    // It should now continue and finish with SUCCEEDED
    new ResumeXCommand(jobId).call();
    WorkflowJobBean job2 = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId));
    assertEquals("RUNNING", job2.getStatusStr());

    sleep(500);

    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action3 = jpaService.execute(wfActionGetCmd);
    String launcherId = action3.getExternalId();

    assertFalse(originalLauncherId.equals(launcherId));

    final RunningJob launcherJob2 = jobClient.getJob(JobID.forName(launcherId));

    waitFor(
        120 * 1000,
        new Predicate() {
          @Override
          public boolean evaluate() throws Exception {
            return launcherJob2.isComplete();
          }
        });

    assertTrue(launcherJob2.isSuccessful());
    actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(), conf);
    assertTrue(LauncherMapperHelper.hasIdSwap(actionData));

    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action4 = jpaService.execute(wfActionGetCmd);
    String mapperId = action4.getExternalChildIDs();
    assertFalse(originalMapperId.equals(mapperId));

    final RunningJob mrJob = jobClient.getJob(JobID.forName(mapperId));

    waitFor(
        120 * 1000,
        new Predicate() {
          @Override
          public boolean evaluate() throws Exception {
            return mrJob.isComplete();
          }
        });
    assertTrue(mrJob.isSuccessful());

    new ActionCheckXCommand(actionId).call();
    WorkflowActionBean action5 = jpaService.execute(wfActionGetCmd);

    assertEquals("SUCCEEDED", action5.getExternalStatus());
  }
Exemple #19
0
  /**
   * Test job retire with tasks that report their *first* status only after the job retires. Steps :
   * - Start a mini-mr cluster with 1 task-tracker having only map slots. Note that this
   * task-tracker will take care of setup/cleanup and the map tasks. - Submit a job with 1 map task
   * and 1 reduce task - Wait for the job to finish the map task - Start a 2nd tracker that waits
   * for a long time after contacting the JT. - Wait for the 2nd tracker to get stuck - Kill the job
   * - Wait for the job to retire - Check if the tip mappings are cleaned up.
   */
  public void testJobRetireWithUnreportedTasks() throws Exception {
    MiniMRCluster mr = null;
    try {
      JobConf conf = new JobConf();
      // set the num-map-slots to 1 so that no reduce tasks but setup/cleanup
      // can run on it
      conf.setInt("mapred.tasktracker.map.tasks.maximum", 1);
      conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 0);

      mr = startCluster(conf, 1);
      JobTracker jobtracker = mr.getJobTrackerRunner().getJobTracker();

      RunningJob job =
          UtilsForTests.runJob(
              mr.createJobConf(), new Path(testDir, "in-1"), new Path(testDir, "out-1"), 1, 1);
      JobID id = JobID.downgrade(job.getID());
      JobInProgress jip = jobtracker.getJob(id);

      // wait 100 secs for the job to complete its map task
      for (int i = 0; i < 1000 && jip.finishedMaps() < 1; i++) {
        UtilsForTests.waitFor(100);
      }
      assertEquals(jip.finishedMaps(), 1);

      // start a tracker that will wait
      LOG.info("Adding a waiting tracker");
      TaskTrackerRunner testTrackerRunner =
          mr.new TaskTrackerRunner(1, 1, null, mr.createJobConf()) {
            @Override
            TaskTracker createTaskTracker(JobConf conf) throws InterruptedException, IOException {
              return new WaitingTaskTracker(conf);
            }
          };
      mr.addTaskTracker(testTrackerRunner);
      LOG.info("Waiting tracker added");

      WaitingTaskTracker testTT = (WaitingTaskTracker) testTrackerRunner.getTaskTracker();

      // wait 100 secs for the newly started task-tracker to join
      for (int i = 0; i < 1000 && jobtracker.taskTrackers().size() < 2; i++) {
        UtilsForTests.waitFor(100);
      }
      assertEquals(jobtracker.taskTrackers().size(), 2);
      LOG.info("Cluster is now up with 2 trackers");
      // stop the test-tt as its no longer required
      mr.stopTaskTracker(mr.getTaskTrackerID(testTT.getName()));

      // 1 reduce task should be scheduled
      assertEquals("TestTT contacted but no reduce task scheduled on it", 1, jip.runningReduces());

      // kill the job
      LOG.info("Killing job " + id);
      job.killJob();

      // check if the reduce task attempt status is missing
      TaskInProgress tip = jip.getTasks(TaskType.REDUCE)[0];
      assertNull(tip.getTaskStatus(tip.getAllTaskAttemptIDs()[0]));

      // wait for the job to retire
      waitTillRetire(id, jobtracker);

      // check the taskidToTIPMap
      for (TaskAttemptID tid : jobtracker.taskidToTIPMap.keySet()) {
        LOG.info("TaskidToTIP : " + tid);
      }
      assertEquals("'taskid' to TIP mapping still exists", 0, jobtracker.taskidToTIPMap.size());
    } finally {
      if (mr != null) {
        mr.shutdown();
      }
      // cleanup
      FileUtil.fullyDelete(new File(testDir.toString()));
    }
  }
  @SuppressWarnings("deprecation")
  public SimulatorJobInProgress(
      JobID jobid, JobTracker jobtracker, JobConf default_conf, JobStory jobStory) {
    super(jobid, jobStory.getJobConf(), jobtracker);
    // jobSetupCleanupNeeded set to false in parent cstr, though
    // default is true

    restartCount = 0;
    jobSetupCleanupNeeded = false;

    this.memoryPerMap = conf.getMemoryForMapTask();
    this.memoryPerReduce = conf.getMemoryForReduceTask();
    this.maxTaskFailuresPerTracker = conf.getMaxTaskFailuresPerTracker();

    this.jobId = jobid;
    String url =
        "http://"
            + jobtracker.getJobTrackerMachine()
            + ":"
            + jobtracker.getInfoPort()
            + "/jobdetails.jsp?jobid="
            + jobid;
    this.jobtracker = jobtracker;
    this.conf = jobStory.getJobConf();
    this.priority = conf.getJobPriority();
    Path jobDir = jobtracker.getSystemDirectoryForJob(jobid);
    this.jobFile = new Path(jobDir, "job.xml");
    this.status =
        new JobStatus(jobid, 0.0f, 0.0f, 0.0f, 0.0f, JobStatus.PREP, priority, conf.getUser());
    this.profile =
        new JobProfile(
            jobStory.getUser(),
            jobid,
            this.jobFile.toString(),
            url,
            jobStory.getName(),
            conf.getQueueName());
    this.startTime = JobTracker.getClock().getTime();
    status.setStartTime(startTime);
    this.resourceEstimator = new ResourceEstimator(this);

    this.numMapTasks = jobStory.getNumberMaps();
    this.numReduceTasks = jobStory.getNumberReduces();
    this.taskCompletionEvents =
        new ArrayList<TaskCompletionEvent>(numMapTasks + numReduceTasks + 10);

    this.mapFailuresPercent = conf.getMaxMapTaskFailuresPercent();
    this.reduceFailuresPercent = conf.getMaxReduceTaskFailuresPercent();
    MetricsContext metricsContext = MetricsUtil.getContext("mapred");
    this.jobMetrics = MetricsUtil.createRecord(metricsContext, "job");
    this.jobMetrics.setTag("user", conf.getUser());
    this.jobMetrics.setTag("sessionId", conf.getSessionId());
    this.jobMetrics.setTag("jobName", conf.getJobName());
    this.jobMetrics.setTag("jobId", jobid.toString());

    this.maxLevel = jobtracker.getNumTaskCacheLevels();
    this.anyCacheLevel = this.maxLevel + 1;
    this.nonLocalMaps = new LinkedList<TaskInProgress>();
    this.nonLocalRunningMaps = new LinkedHashSet<TaskInProgress>();
    this.runningMapCache = new IdentityHashMap<Node, Set<TaskInProgress>>();
    this.nonRunningReduces = new LinkedList<TaskInProgress>();
    this.runningReduces = new LinkedHashSet<TaskInProgress>();
    this.slowTaskThreshold =
        Math.max(0.0f, conf.getFloat("mapred.speculative.execution.slowTaskThreshold", 1.0f));
    this.speculativeCap = conf.getFloat("mapred.speculative.execution.speculativeCap", 0.1f);
    this.slowNodeThreshold = conf.getFloat("mapred.speculative.execution.slowNodeThreshold", 1.0f);

    this.jobStory = jobStory;
    //    this.jobHistory = this.jobtracker.getJobHistory();
  }
  public void _jspService(HttpServletRequest request, HttpServletResponse response)
      throws java.io.IOException, ServletException {

    PageContext pageContext = null;
    HttpSession session = null;
    ServletContext application = null;
    ServletConfig config = null;
    JspWriter out = null;
    Object page = this;
    JspWriter _jspx_out = null;
    PageContext _jspx_page_context = null;

    try {
      response.setContentType("text/html; charset=UTF-8");
      pageContext = _jspxFactory.getPageContext(this, request, response, null, true, 8192, true);
      _jspx_page_context = pageContext;
      application = pageContext.getServletContext();
      config = pageContext.getServletConfig();
      session = pageContext.getSession();
      out = pageContext.getOut();
      _jspx_out = out;
      _jspx_resourceInjector =
          (org.apache.jasper.runtime.ResourceInjector)
              application.getAttribute("com.sun.appserv.jsp.resource.injector");

      out.write('\n');
      out.write('\n');

      JobTracker tracker = (JobTracker) application.getAttribute("job.tracker");
      String trackerName = StringUtils.simpleHostname(tracker.getJobTrackerMachine());

      out.write('\n');
      out.write('\n');
      out.write('\n');

      String jobId = request.getParameter("jobid");
      if (jobId == null) {
        out.println("<h2>Missing 'jobid'!</h2>");
        return;
      }
      JobID jobIdObj = JobID.forName(jobId);
      String kind = request.getParameter("kind");
      String cause = request.getParameter("cause");

      out.write("\n\n<html>\n<title>Hadoop ");
      out.print(jobId);
      out.write(" failures on ");
      out.print(trackerName);
      out.write("</title>\n<body>\n<h1>Hadoop <a href=\"jobdetails.jsp?jobid=");
      out.print(jobId);
      out.write('"');
      out.write('>');
      out.print(jobId);
      out.write("</a>\nfailures on <a href=\"jobtracker.jsp\">");
      out.print(trackerName);
      out.write("</a></h1>\n\n");

      printFailures(out, tracker, jobIdObj, kind, cause);

      out.write("\n\n<hr>\n<a href=\"jobtracker.jsp\">Go back to JobTracker</a><br>\n");

      out.println(ServletUtil.htmlFooter());

      out.write('\n');
    } catch (Throwable t) {
      if (!(t instanceof SkipPageException)) {
        out = _jspx_out;
        if (out != null && out.getBufferSize() != 0) out.clearBuffer();
        if (_jspx_page_context != null) _jspx_page_context.handlePageException(t);
      }
    } finally {
      _jspxFactory.releasePageContext(_jspx_page_context);
    }
  }