Exemplo n.º 1
0
  /**
   * Start the child process to handle the task for us.
   *
   * @param conf the task's configuration
   * @param recordReader the fake record reader to update progress with
   * @param output the collector to send output to
   * @param reporter the reporter for the task
   * @param outputKeyClass the class of the output keys
   * @param outputValueClass the class of the output values
   * @throws IOException
   * @throws InterruptedException
   */
  Application(
      JobConf conf,
      RecordReader<FloatWritable, NullWritable> recordReader,
      OutputCollector<K2, V2> output,
      Reporter reporter,
      Class<? extends K2> outputKeyClass,
      Class<? extends V2> outputValueClass)
      throws IOException, InterruptedException {
    serverSocket = new ServerSocket(0);
    Map<String, String> env = new HashMap<String, String>();
    // add TMPDIR environment variable with the value of java.io.tmpdir
    env.put("TMPDIR", System.getProperty("java.io.tmpdir"));
    env.put("hadoop.pipes.command.port", Integer.toString(serverSocket.getLocalPort()));
    List<String> cmd = new ArrayList<String>();
    String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
    FileUtil.chmod(executable, "a+x");
    cmd.add(executable);
    // wrap the command in a stdout/stderr capture
    TaskAttemptID taskid = TaskAttemptID.forName(conf.get("mapred.task.id"));
    File stdout = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDOUT);
    File stderr = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDERR);
    long logLength = TaskLog.getTaskLogLength(conf);
    cmd = TaskLog.captureOutAndError(cmd, stdout, stderr, logLength);

    process = runClient(cmd, env);
    clientSocket = serverSocket.accept();
    handler = new OutputHandler<K2, V2>(output, reporter, recordReader);
    K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf);
    V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf);
    downlink =
        new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf);
    downlink.start();
    downlink.setJobConf(conf);
  }