Exemplo n.º 1
0
  /**
   * Unregisters a finished or aborted task.
   *
   * @param id the ID of the task to be unregistered
   */
  private void unregisterTask(final ExecutionVertexID id) {

    // Task de-registration must be atomic
    synchronized (this) {
      final Task task = this.runningTasks.remove(id);
      if (task == null) {
        LOG.error("Cannot find task with ID " + id + " to unregister");
        return;
      }

      // remove the local tmp file for unregistered tasks.
      for (Entry<String, DistributedCacheEntry> e :
          DistributedCache.readFileInfoFromConfig(task.getEnvironment().getJobConfiguration())) {
        this.fileCache.deleteTmpFile(e.getKey(), e.getValue(), task.getJobID());
      }
      // Unregister task from the byte buffered channel manager
      this.channelManager.unregister(id, task);

      // Unregister task from profiling
      task.unregisterProfiler(this.profiler);

      // Unregister task from memory manager
      task.unregisterMemoryManager(this.memoryManager);

      // Unregister task from library cache manager
      try {
        LibraryCacheManager.unregister(task.getJobID());
      } catch (IOException e) {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Unregistering the job vertex ID " + id + " caused an IOException");
        }
      }
    }
  }
  @SuppressWarnings("deprecation")
  public CoronaJobTrackerRunner(
      TaskTracker.TaskInProgress tip,
      Task task,
      TaskTracker tracker,
      JobConf ttConf,
      CoronaSessionInfo info,
      String originalPath,
      String releasePath)
      throws IOException {
    super(tip, task, tracker, ttConf);
    this.coronaSessionInfo = info;
    this.originalPath = originalPath;
    this.releasePath = releasePath;
    LocalDirAllocator lDirAlloc = new LocalDirAllocator("mapred.local.dir");

    workDir =
        new File(
            lDirAlloc
                .getLocalPathForWrite(
                    TaskTracker.getLocalTaskDir(
                            task.getJobID().toString(),
                            task.getTaskID().toString(),
                            task.isTaskCleanupTask())
                        + Path.SEPARATOR
                        + MRConstants.WORKDIR,
                    conf)
                .toString());
    if (!workDir.mkdirs()) {
      if (!workDir.isDirectory()) {
        throw new IOException("Mkdirs failed to create " + workDir.toString());
      }
    }
    localizeTaskConfiguration(tracker, ttConf, workDir.toString(), task, task.getJobID());
  }
Exemplo n.º 3
0
 public TaskRunner(TaskTracker.TaskInProgress tip, TaskTracker tracker, JobConf conf) {
   this.tip = tip;
   this.t = tip.getTask();
   this.tracker = tracker;
   this.conf = conf;
   this.mapOutputFile = new MapOutputFile(t.getJobID());
   this.mapOutputFile.setConf(conf);
   this.jvmManager = tracker.getJvmManagerInstance();
 }
Exemplo n.º 4
0
  /**
   * Write the child's configuration to the disk and set it in configuration so that the child can
   * pick it up from there.
   *
   * @param lDirAlloc
   * @throws IOException
   */
  void setupChildTaskConfiguration(LocalDirAllocator lDirAlloc) throws IOException {

    Path localTaskFile =
        lDirAlloc.getLocalPathForWrite(
            TaskTracker.getTaskConfFile(
                t.getUser(),
                t.getJobID().toString(),
                t.getTaskID().toString(),
                t.isTaskCleanupTask()),
            conf);

    // write the child's task configuration file to the local disk
    JobLocalizer.writeLocalJobFile(localTaskFile, conf);

    // Set the final job file in the task. The child needs to know the correct
    // path to job.xml. So set this path accordingly.
    t.setJobFile(localTaskFile.toString());
  }
Exemplo n.º 5
0
 /**
  * Prepare the mapred.local.dir for the child. The child is sand-boxed now. Whenever it uses
  * LocalDirAllocator from now on inside the child, it will only see files inside the
  * attempt-directory. This is done in the Child's process space.
  */
 static void setupChildMapredLocalDirs(Task t, JobConf conf) {
   String[] localDirs = conf.getTrimmedStrings(JobConf.MAPRED_LOCAL_DIR_PROPERTY);
   String jobId = t.getJobID().toString();
   String taskId = t.getTaskID().toString();
   boolean isCleanup = t.isTaskCleanupTask();
   String user = t.getUser();
   StringBuffer childMapredLocalDir =
       new StringBuffer(
           localDirs[0]
               + Path.SEPARATOR
               + TaskTracker.getLocalTaskDir(user, jobId, taskId, isCleanup));
   for (int i = 1; i < localDirs.length; i++) {
     childMapredLocalDir.append(
         ","
             + localDirs[i]
             + Path.SEPARATOR
             + TaskTracker.getLocalTaskDir(user, jobId, taskId, isCleanup));
   }
   LOG.debug("mapred.local.dir for child : " + childMapredLocalDir);
   conf.set("mapred.local.dir", childMapredLocalDir.toString());
 }
  @SuppressWarnings("deprecation")
  @Override
  public void run() {
    Task task = getTask();
    TaskAttemptID taskid = task.getTaskID();
    try {
      if (!prepare()) {
        return;
      }

      String sep = System.getProperty("path.separator");
      StringBuffer classPath = new StringBuffer();
      // The alternate runtime can be used to debug tasks by putting a
      // custom version of the mapred libraries. This will get loaded before
      // the TT's jars.
      String debugRuntime = conf.get("mapred.task.debug.runtime.classpath");
      if (debugRuntime != null) {
        classPath.append(debugRuntime);
        classPath.append(sep);
      }
      // start with same classpath as parent process

      String systemClassPath = System.getProperty("java.class.path");
      if (releasePath != null
          && !releasePath.isEmpty()
          && originalPath != null
          && !releasePath.isEmpty()) {
        systemClassPath = systemClassPath.replaceAll(originalPath, releasePath);
      }
      classPath.append(systemClassPath);
      classPath.append(sep);
      //  Build exec child jmv args.
      Vector<String> vargs = new Vector<String>(8);
      File jvm = // use same jvm as parent
          new File(new File(System.getProperty("java.home"), "bin"), "java");

      vargs.add(jvm.toString());

      // Add child (task) java-vm options.
      String javaOpts = getCJTJavaOpts(conf);
      javaOpts = javaOpts.replace("@taskid@", taskid.toString());
      String[] javaOptsSplit = javaOpts.split(" ");
      for (int i = 0; i < javaOptsSplit.length; i++) {
        vargs.add(javaOptsSplit[i]);
      }

      // add java.io.tmpdir given by mapred.child.tmp
      String tmp = conf.get("mapred.child.tmp", "./tmp");
      Path tmpDir = new Path(tmp);
      // if temp directory path is not absolute
      // prepend it with workDir.
      if (!tmpDir.isAbsolute()) {
        tmpDir = new Path(workDir.toString(), tmp);
      }
      FileSystem localFs = FileSystem.getLocal(conf);
      if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) {
        throw new IOException("Mkdirs failed to create " + tmpDir.toString());
      }
      vargs.add("-Djava.io.tmpdir=" + tmpDir.toString());

      // Add classpath.
      vargs.add("-classpath");
      vargs.add(classPath.toString());

      // Setup the log4j prop
      long logSize = TaskLog.getTaskLogLength(conf);
      vargs.add("-Dhadoop.log.dir=" + CoronaTaskTracker.jobTrackerLogDir());
      boolean logToScribe = conf.getBoolean("mapred.task.log.scribe", false);
      if (logToScribe) {
        vargs.addAll(conf.getStringCollection("mapred.task.log.scribe.conf"));
      }
      String logger = logToScribe ? "INFO,TLA,scribe" : "INFO,TLA";

      vargs.add("-Dhadoop.root.logger=" + logger);
      vargs.add("-Dhadoop.tasklog.taskid=" + taskid);
      vargs.add("-Dhadoop.tasklog.totalLogFileSize=" + logSize);

      Path systemDirectory = tracker.systemDirectory;
      if (!systemDirectory.isAbsolute()) {
        systemDirectory = new Path(tracker.systemFS.getWorkingDirectory(), systemDirectory);
      }
      systemDirectory = systemDirectory.makeQualified(tracker.systemFS);
      vargs.add("-Dmapred.system.dir=" + systemDirectory);

      // Add main class and its arguments
      vargs.add(CoronaJobTracker.class.getName()); // main of CJT
      vargs.add(task.getJobID().toString()); // Pass job id.
      vargs.add(task.getTaskID().toString()); // Pass attempt id.
      vargs.add(coronaSessionInfo.getJobTrackerAddr().getHostName());
      vargs.add(Integer.toString(coronaSessionInfo.getJobTrackerAddr().getPort()));

      tracker.addToMemoryManager(task.getTaskID(), task.isMapTask(), conf);

      // set memory limit using ulimit if feasible and necessary ...
      String[] ulimitCmd = Shell.getUlimitMemoryCommand(getChildUlimit(conf));
      List<String> setup = null;
      if (ulimitCmd != null) {
        setup = new ArrayList<String>();
        for (String arg : ulimitCmd) {
          setup.add(arg);
        }
      }

      // Set up the redirection of the task's stdout and stderr streams
      File stdout = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDOUT);
      File stderr = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDERR);
      stdout.getParentFile().mkdirs();

      Map<String, String> env = new HashMap<String, String>();
      StringBuffer ldLibraryPath = new StringBuffer();
      ldLibraryPath.append(workDir.toString());
      String oldLdLibraryPath = null;
      oldLdLibraryPath = System.getenv("LD_LIBRARY_PATH");
      if (oldLdLibraryPath != null) {
        ldLibraryPath.append(sep);
        ldLibraryPath.append(oldLdLibraryPath);
      }
      env.put("LD_LIBRARY_PATH", ldLibraryPath.toString());

      LOG.info(
          "Launching CJT "
              + taskid
              + " in working directory "
              + workDir
              + " Command line "
              + vargs);

      jvmManager.launchJvm(
          this,
          jvmManager.constructJvmEnv(setup, vargs, stdout, stderr, logSize, workDir, env, conf));
      synchronized (lock) {
        while (!done) {
          lock.wait();
        }
      }
    } catch (IOException e) {
      LOG.error("Error while launching CJT ", e);
    } catch (InterruptedException e) {
      LOG.warn("Error while launching CJT ", e);
    }
  }
Exemplo n.º 7
0
  @Override
  public final void run() {
    try {

      // before preparing the job localize
      // all the archives
      TaskAttemptID taskid = t.getTaskID();
      LocalDirAllocator lDirAlloc = new LocalDirAllocator("mapred.local.dir");
      File jobCacheDir = null;
      if (conf.getJar() != null) {
        jobCacheDir = new File(new Path(conf.getJar()).getParent().toString());
      }
      File workDir =
          new File(
              lDirAlloc
                  .getLocalPathToRead(
                      TaskTracker.getJobCacheSubdir()
                          + Path.SEPARATOR
                          + t.getJobID()
                          + Path.SEPARATOR
                          + t.getTaskID()
                          + Path.SEPARATOR
                          + MRConstants.WORKDIR,
                      conf)
                  .toString());

      URI[] archives = DistributedCache.getCacheArchives(conf);
      URI[] files = DistributedCache.getCacheFiles(conf);
      FileStatus fileStatus;
      FileSystem fileSystem;
      Path localPath;
      String baseDir;

      if ((archives != null) || (files != null)) {
        if (archives != null) {
          String[] archivesTimestamps = DistributedCache.getArchiveTimestamps(conf);
          Path[] p = new Path[archives.length];
          for (int i = 0; i < archives.length; i++) {
            fileSystem = FileSystem.get(archives[i], conf);
            fileStatus = fileSystem.getFileStatus(new Path(archives[i].getPath()));
            String cacheId = DistributedCache.makeRelative(archives[i], conf);
            String cachePath = TaskTracker.getCacheSubdir() + Path.SEPARATOR + cacheId;
            if (lDirAlloc.ifExists(cachePath, conf)) {
              localPath = lDirAlloc.getLocalPathToRead(cachePath, conf);
            } else {
              localPath = lDirAlloc.getLocalPathForWrite(cachePath, fileStatus.getLen(), conf);
            }
            baseDir = localPath.toString().replace(cacheId, "");
            p[i] =
                DistributedCache.getLocalCache(
                    archives[i],
                    conf,
                    new Path(baseDir),
                    fileStatus,
                    true,
                    Long.parseLong(archivesTimestamps[i]),
                    new Path(workDir.getAbsolutePath()),
                    false);
          }
          DistributedCache.setLocalArchives(conf, stringifyPathArray(p));
        }
        if ((files != null)) {
          String[] fileTimestamps = DistributedCache.getFileTimestamps(conf);
          Path[] p = new Path[files.length];
          for (int i = 0; i < files.length; i++) {
            fileSystem = FileSystem.get(files[i], conf);
            fileStatus = fileSystem.getFileStatus(new Path(files[i].getPath()));
            String cacheId = DistributedCache.makeRelative(files[i], conf);
            String cachePath = TaskTracker.getCacheSubdir() + Path.SEPARATOR + cacheId;
            if (lDirAlloc.ifExists(cachePath, conf)) {
              localPath = lDirAlloc.getLocalPathToRead(cachePath, conf);
            } else {
              localPath = lDirAlloc.getLocalPathForWrite(cachePath, fileStatus.getLen(), conf);
            }
            baseDir = localPath.toString().replace(cacheId, "");
            p[i] =
                DistributedCache.getLocalCache(
                    files[i],
                    conf,
                    new Path(baseDir),
                    fileStatus,
                    false,
                    Long.parseLong(fileTimestamps[i]),
                    new Path(workDir.getAbsolutePath()),
                    false);
          }
          DistributedCache.setLocalFiles(conf, stringifyPathArray(p));
        }
        Path localTaskFile = new Path(t.getJobFile());
        FileSystem localFs = FileSystem.getLocal(conf);
        localFs.delete(localTaskFile, true);
        OutputStream out = localFs.create(localTaskFile);
        try {
          conf.writeXml(out);
        } finally {
          out.close();
        }
      }

      if (!prepare()) {
        return;
      }

      String sep = System.getProperty("path.separator");
      StringBuffer classPath = new StringBuffer();
      // start with same classpath as parent process
      classPath.append(System.getProperty("java.class.path"));
      classPath.append(sep);
      if (!workDir.mkdirs()) {
        if (!workDir.isDirectory()) {
          LOG.fatal("Mkdirs failed to create " + workDir.toString());
        }
      }

      String jar = conf.getJar();
      if (jar != null) {
        // if jar exists, it into workDir
        File[] libs = new File(jobCacheDir, "lib").listFiles();
        if (libs != null) {
          for (int i = 0; i < libs.length; i++) {
            classPath.append(sep); // add libs from jar to classpath
            classPath.append(libs[i]);
          }
        }
        classPath.append(sep);
        classPath.append(new File(jobCacheDir, "classes"));
        classPath.append(sep);
        classPath.append(jobCacheDir);
      }

      // include the user specified classpath

      // archive paths
      Path[] archiveClasspaths = DistributedCache.getArchiveClassPaths(conf);
      if (archiveClasspaths != null && archives != null) {
        Path[] localArchives = DistributedCache.getLocalCacheArchives(conf);
        if (localArchives != null) {
          for (int i = 0; i < archives.length; i++) {
            for (int j = 0; j < archiveClasspaths.length; j++) {
              if (archives[i].getPath().equals(archiveClasspaths[j].toString())) {
                classPath.append(sep);
                classPath.append(localArchives[i].toString());
              }
            }
          }
        }
      }
      // file paths
      Path[] fileClasspaths = DistributedCache.getFileClassPaths(conf);
      if (fileClasspaths != null && files != null) {
        Path[] localFiles = DistributedCache.getLocalCacheFiles(conf);
        if (localFiles != null) {
          for (int i = 0; i < files.length; i++) {
            for (int j = 0; j < fileClasspaths.length; j++) {
              if (files[i].getPath().equals(fileClasspaths[j].toString())) {
                classPath.append(sep);
                classPath.append(localFiles[i].toString());
              }
            }
          }
        }
      }

      classPath.append(sep);
      classPath.append(workDir);
      //  Build exec child jmv args.
      Vector<String> vargs = new Vector<String>(8);
      File jvm = // use same jvm as parent
          new File(new File(System.getProperty("java.home"), "bin"), "java");

      vargs.add(jvm.toString());

      // Add child (task) java-vm options.
      //
      // The following symbols if present in mapred.child.java.opts value are
      // replaced:
      // + @taskid@ is interpolated with value of TaskID.
      // Other occurrences of @ will not be altered.
      //
      // Example with multiple arguments and substitutions, showing
      // jvm GC logging, and start of a passwordless JVM JMX agent so can
      // connect with jconsole and the likes to watch child memory, threads
      // and get thread dumps.
      //
      //  <property>
      //    <name>mapred.child.java.opts</name>
      //    <value>-verbose:gc -Xloggc:/tmp/@[email protected] \
      //           -Dcom.sun.management.jmxremote.authenticate=false \
      //           -Dcom.sun.management.jmxremote.ssl=false \
      //    </value>
      //  </property>
      //
      String javaOpts = conf.get("mapred.child.java.opts", "-Xmx200m");
      javaOpts = javaOpts.replace("@taskid@", taskid.toString());
      String[] javaOptsSplit = javaOpts.split(" ");

      // Add java.library.path; necessary for loading native libraries.
      //
      // 1. To support native-hadoop library i.e. libhadoop.so, we add the
      //    parent processes' java.library.path to the child.
      // 2. We also add the 'cwd' of the task to it's java.library.path to help
      //    users distribute native libraries via the DistributedCache.
      // 3. The user can also specify extra paths to be added to the
      //    java.library.path via mapred.child.java.opts.
      //
      String libraryPath = System.getProperty("java.library.path");
      if (libraryPath == null) {
        libraryPath = workDir.getAbsolutePath();
      } else {
        libraryPath += sep + workDir;
      }
      boolean hasUserLDPath = false;
      for (int i = 0; i < javaOptsSplit.length; i++) {
        if (javaOptsSplit[i].startsWith("-Djava.library.path=")) {
          javaOptsSplit[i] += sep + libraryPath;
          hasUserLDPath = true;
          break;
        }
      }
      if (!hasUserLDPath) {
        vargs.add("-Djava.library.path=" + libraryPath);
      }
      for (int i = 0; i < javaOptsSplit.length; i++) {
        vargs.add(javaOptsSplit[i]);
      }

      // add java.io.tmpdir given by mapred.child.tmp
      String tmp = conf.get("mapred.child.tmp", "./tmp");
      Path tmpDir = new Path(tmp);

      // if temp directory path is not absolute
      // prepend it with workDir.
      if (!tmpDir.isAbsolute()) {
        tmpDir = new Path(workDir.toString(), tmp);
      }
      FileSystem localFs = FileSystem.getLocal(conf);
      if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) {
        throw new IOException("Mkdirs failed to create " + tmpDir.toString());
      }
      vargs.add("-Djava.io.tmpdir=" + tmpDir.toString());

      // Add classpath.
      vargs.add("-classpath");
      vargs.add(classPath.toString());

      // Setup the log4j prop
      long logSize = TaskLog.getTaskLogLength(conf);
      vargs.add(
          "-Dhadoop.log.dir=" + new File(System.getProperty("hadoop.log.dir")).getAbsolutePath());
      vargs.add("-Dhadoop.root.logger=INFO,TLA");
      vargs.add("-Dhadoop.tasklog.taskid=" + taskid);
      vargs.add("-Dhadoop.tasklog.totalLogFileSize=" + logSize);

      if (conf.getProfileEnabled()) {
        if (conf.getProfileTaskRange(t.isMapTask()).isIncluded(t.getPartition())) {
          File prof = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.PROFILE);
          vargs.add(String.format(conf.getProfileParams(), prof.toString()));
        }
      }

      // Add main class and its arguments
      vargs.add(Child.class.getName()); // main of Child
      // pass umbilical address
      InetSocketAddress address = tracker.getTaskTrackerReportAddress();
      vargs.add(address.getAddress().getHostAddress());
      vargs.add(Integer.toString(address.getPort()));
      vargs.add(taskid.toString()); // pass task identifier

      String pidFile = null;
      if (tracker.isTaskMemoryManagerEnabled()) {
        pidFile =
            lDirAlloc
                .getLocalPathForWrite(
                    (TaskTracker.getPidFilesSubdir() + Path.SEPARATOR + taskid), this.conf)
                .toString();
      }

      // set memory limit using ulimit if feasible and necessary ...
      String[] ulimitCmd = Shell.getUlimitMemoryCommand(conf);
      List<String> setup = null;
      if (ulimitCmd != null) {
        setup = new ArrayList<String>();
        for (String arg : ulimitCmd) {
          setup.add(arg);
        }
      }

      // Set up the redirection of the task's stdout and stderr streams
      File stdout = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDOUT);
      File stderr = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDERR);
      stdout.getParentFile().mkdirs();
      tracker.getTaskTrackerInstrumentation().reportTaskLaunch(taskid, stdout, stderr);

      Map<String, String> env = new HashMap<String, String>();
      StringBuffer ldLibraryPath = new StringBuffer();
      ldLibraryPath.append(workDir.toString());
      String oldLdLibraryPath = null;
      oldLdLibraryPath = System.getenv("LD_LIBRARY_PATH");
      if (oldLdLibraryPath != null) {
        ldLibraryPath.append(sep);
        ldLibraryPath.append(oldLdLibraryPath);
      }
      env.put("LD_LIBRARY_PATH", ldLibraryPath.toString());
      jvmManager.launchJvm(
          this,
          jvmManager.constructJvmEnv(
              setup, vargs, stdout, stderr, logSize, workDir, env, pidFile, conf));
      synchronized (lock) {
        while (!done) {
          lock.wait();
        }
      }
      tracker.getTaskTrackerInstrumentation().reportTaskEnd(t.getTaskID());
      if (exitCodeSet) {
        if (!killed && exitCode != 0) {
          if (exitCode == 65) {
            tracker.getTaskTrackerInstrumentation().taskFailedPing(t.getTaskID());
          }
          throw new IOException("Task process exit with nonzero status of " + exitCode + ".");
        }
      }
    } catch (FSError e) {
      LOG.fatal("FSError", e);
      try {
        tracker.fsError(t.getTaskID(), e.getMessage());
      } catch (IOException ie) {
        LOG.fatal(t.getTaskID() + " reporting FSError", ie);
      }
    } catch (Throwable throwable) {
      LOG.warn(t.getTaskID() + " Child Error", throwable);
      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      throwable.printStackTrace(new PrintStream(baos));
      try {
        tracker.reportDiagnosticInfo(t.getTaskID(), baos.toString());
      } catch (IOException e) {
        LOG.warn(t.getTaskID() + " Reporting Diagnostics", e);
      }
    } finally {
      try {
        URI[] archives = DistributedCache.getCacheArchives(conf);
        URI[] files = DistributedCache.getCacheFiles(conf);
        if (archives != null) {
          for (int i = 0; i < archives.length; i++) {
            DistributedCache.releaseCache(archives[i], conf);
          }
        }
        if (files != null) {
          for (int i = 0; i < files.length; i++) {
            DistributedCache.releaseCache(files[i], conf);
          }
        }
      } catch (IOException ie) {
        LOG.warn("Error releasing caches : Cache files might not have been cleaned up");
      }
      tracker.reportTaskFinished(t.getTaskID(), false);
      if (t.isMapTask()) {
        tracker.addFreeMapSlot();
      } else {
        tracker.addFreeReduceSlot();
      }
    }
  }