Java JobConf.get примеры использования

Пример #1

0

Показать файл

Файл: DistCp.java Проект: neutronsharc/hdfsbackup

  /**
   * Driver to copy srcPath to destPath depending on required protocol.
   *
   * @param args arguments
   */
  static void copy(final Configuration conf, final Arguments args) throws IOException {
    LOG.info("srcPaths=" + args.srcs);
    LOG.info("destPath=" + args.dst);
    checkSrcPath(conf, args.srcs);

    JobConf job = createJobConf(conf);
    if (args.preservedAttributes != null) {
      job.set(PRESERVE_STATUS_LABEL, args.preservedAttributes);
    }
    if (args.mapredSslConf != null) {
      job.set("dfs.https.client.keystore.resource", args.mapredSslConf);
    }

    // Initialize the mapper
    try {
      setup(conf, job, args);
      JobClient.runJob(job);
      finalize(conf, job, args.dst, args.preservedAttributes);
    } finally {
      // delete tmp
      fullyDelete(job.get(TMP_DIR_LABEL), job);
      // delete jobDirectory
      fullyDelete(job.get(JOB_DIR_LABEL), job);
    }
  }

Пример #2

0

Показать файл

Файл: RBMMapper.java Проект: dery-hit/MapReduce-Based-Deep-Learning

  public void configure(JobConf conf) {
    /*
     * It reads all the configurations and distributed cache from outside.
     */

    // Read number of nodes in input layer and output layer from configuration
    inputNumdims = conf.get("numdims");
    inputNumhid = conf.get("numhid");

    // Read the weights from distributed cache
    Path[] pathwaysFiles = new Path[0];
    try {
      pathwaysFiles = DistributedCache.getLocalCacheFiles(conf);
      for (Path path : pathwaysFiles) {
        /*
         * this loop reads all the distributed cache files
         * In fact, the driver program ensures that there is only one distributed cache file
         */
        BufferedReader fis = new BufferedReader(new FileReader(path.toString()));
        weightline = fis.readLine();
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

Пример #3

0

Показать файл

Файл: KMeansMapper.java Проект: RamGhadiyaram/cloud-class

  public void configure(JobConf conf) {
    numberOfCenters = Integer.valueOf(conf.get("numberOfCenters"));
    centersDirectory = conf.get("centersReadDirectory");

    try {
      Configuration c = new Configuration();
      FileSystem fs = FileSystem.get(c);

      for (int index = 0; index < numberOfCenters; ++index) {
        SequenceFile.Reader reader =
            new SequenceFile.Reader(fs, new Path(centersDirectory + "/centers/" + index), c);

        LongWritable key = new LongWritable();
        Point value = new Point();

        reader.next(key, value);

        Point center = (Point) value;

        centers.add(center);

        reader.close();
      }
    } catch (IOException e) {
      // do nothing
      // I hope this doesn't happen
      System.out.println("well, damn.");
      e.printStackTrace();
    }
  }

Пример #4

0

Показать файл

Файл: TaskRunner.java Проект: LefKok/upright

  // Mostly for setting up the symlinks. Note that when we setup the distributed
  // cache, we didn't create the symlinks. This is done on a per task basis
  // by the currently executing task.
  public static void setupWorkDir(JobConf conf) throws IOException {
    File workDir = new File(".").getAbsoluteFile();
    FileUtil.fullyDelete(workDir);
    if (DistributedCache.getSymlink(conf)) {
      URI[] archives = DistributedCache.getCacheArchives(conf);
      URI[] files = DistributedCache.getCacheFiles(conf);
      Path[] localArchives = DistributedCache.getLocalCacheArchives(conf);
      Path[] localFiles = DistributedCache.getLocalCacheFiles(conf);
      if (archives != null) {
        for (int i = 0; i < archives.length; i++) {
          String link = archives[i].getFragment();
          if (link != null) {
            link = workDir.toString() + Path.SEPARATOR + link;
            File flink = new File(link);
            if (!flink.exists()) {
              FileUtil.symLink(localArchives[i].toString(), link);
            }
          }
        }
      }
      if (files != null) {
        for (int i = 0; i < files.length; i++) {
          String link = files[i].getFragment();
          if (link != null) {
            link = workDir.toString() + Path.SEPARATOR + link;
            File flink = new File(link);
            if (!flink.exists()) {
              FileUtil.symLink(localFiles[i].toString(), link);
            }
          }
        }
      }
    }
    File jobCacheDir = null;
    if (conf.getJar() != null) {
      jobCacheDir = new File(new Path(conf.getJar()).getParent().toString());
    }

    // create symlinks for all the files in job cache dir in current
    // workingdir for streaming
    try {
      DistributedCache.createAllSymlink(conf, jobCacheDir, workDir);
    } catch (IOException ie) {
      // Do not exit even if symlinks have not been created.
      LOG.warn(StringUtils.stringifyException(ie));
    }
    // add java.io.tmpdir given by mapred.child.tmp
    String tmp = conf.get("mapred.child.tmp", "./tmp");
    Path tmpDir = new Path(tmp);

    // if temp directory path is not absolute
    // prepend it with workDir.
    if (!tmpDir.isAbsolute()) {
      tmpDir = new Path(workDir.toString(), tmp);
      FileSystem localFs = FileSystem.getLocal(conf);
      if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) {
        throw new IOException("Mkdirs failed to create " + tmpDir.toString());
      }
    }
  }

Пример #5

0

Показать файл

Файл: DistCp.java Проект: neutronsharc/hdfsbackup

  private static void finalize(
      Configuration conf, JobConf jobconf, final Path destPath, String presevedAttributes)
      throws IOException {
    if (presevedAttributes == null) {
      return;
    }
    EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
    if (!preseved.contains(FileAttribute.USER)
        && !preseved.contains(FileAttribute.GROUP)
        && !preseved.contains(FileAttribute.PERMISSION)) {
      return;
    }

    FileSystem dstfs = destPath.getFileSystem(conf);
    Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
    SequenceFile.Reader in = null;
    try {
      in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf);
      Text dsttext = new Text();
      FilePair pair = new FilePair();
      for (; in.next(dsttext, pair); ) {
        Path absdst = new Path(destPath, pair.output);
        updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs);
      }
    } finally {
      checkAndClose(in);
    }
  }

Пример #6

0

Показать файл

Файл: SortValidator.java Проект: hramasamy/evosys

  private static IntWritable deduceInputFile(JobConf job) {
    Path[] inputPaths = FileInputFormat.getInputPaths(job);
    Path inputFile = new Path(job.get("map.input.file"));

    // value == one for sort-input; value == two for sort-output
    return (inputFile.getParent().equals(inputPaths[0])) ? sortInput : sortOutput;
  }

Пример #7

0

Показать файл

Файл: DistCp.java Проект: neutronsharc/hdfsbackup

 /**
  * Mapper configuration. Extracts source and destination file system, as well as top-level paths
  * on source and destination directories. Gets the named file systems, to be used later in map.
  */
 public void configure(JobConf job) {
   destPath = new Path(job.get(DST_DIR_LABEL, "/"));
   try {
     destFileSys = destPath.getFileSystem(job);
   } catch (IOException ex) {
     throw new RuntimeException("Unable to get the named file system.", ex);
   }
   sizeBuf = job.getInt("copy.buf.size", 128 * 1024);
   buffer = new byte[sizeBuf];
   ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false);
   preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false);
   if (preserve_status) {
     preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL));
   }
   update = job.getBoolean(Options.UPDATE.propertyname, false);
   overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false);
   this.job = job;
 }

Пример #8

0

Показать файл

Файл: XMLInputFormat.java Проект: nilesh-c/wes

    /**
     * @param split
     * @param jobConf
     * @throws IOException
     */
    public XMLRecordReader(FileSplit split, JobConf jobConf) throws IOException {
      if (jobConf.get(START_TAG_KEY) == null || jobConf.get(END_TAG_KEY) == null) {
        throw new RuntimeException("Error! XML start and end tags unspecified!");
      }

      startTag = jobConf.get(START_TAG_KEY).getBytes("utf-8");
      endTag = jobConf.get(END_TAG_KEY).getBytes("utf-8");

      start = split.getStart();
      Path file = split.getPath();

      CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(jobConf);
      CompressionCodec codec = compressionCodecs.getCodec(file);

      FileSystem fs = file.getFileSystem(jobConf);

      if (codec != null) {
        LOG.info("Reading compressed file...");

        fsin = new DataInputStream(codec.createInputStream(fs.open(file)));

        end = Long.MAX_VALUE;
      } else {
        LOG.info("Reading uncompressed file...");

        FSDataInputStream fileIn = fs.open(file);

        fileIn.seek(start);
        fsin = fileIn;

        end = start + split.getLength();
      }

      recordStartPos = start;

      // Because input streams of gzipped files are not seekable (specifically,
      // do not support
      // getPos), we need to keep track of bytes consumed ourselves.
      pos = start;
    }

Пример #9

0

Показать файл

Файл: LocalJobRunner.java Проект: Jude7/bc-hadoop2.0

 /** @see org.apache.hadoop.mapred.JobSubmissionProtocol#getStagingAreaDir() */
 public String getStagingAreaDir() throws IOException {
   Path stagingRootDir =
       new Path(conf.get("mapreduce.jobtracker.staging.root.dir", "/tmp/hadoop/mapred/staging"));
   UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
   String user;
   randid = rand.nextInt(Integer.MAX_VALUE);
   if (ugi != null) {
     user = ugi.getShortUserName() + randid;
   } else {
     user = "******" + randid;
   }
   return fs.makeQualified(new Path(stagingRootDir, user + "/.staging")).toString();
 }

Пример #10

0

Показать файл

Файл: DistCp.java Проект: neutronsharc/hdfsbackup

    /**
     * Produce splits such that each is no greater than the quotient of the total size and the
     * number of splits requested.
     *
     * @param job The handle to the JobConf object
     * @param numSplits Number of splits requested
     */
    public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
      int cnfiles = job.getInt(SRC_COUNT_LABEL, -1);
      long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1);
      String srcfilelist = job.get(SRC_LIST_LABEL, "");
      if (cnfiles < 0 || cbsize < 0 || "".equals(srcfilelist)) {
        throw new RuntimeException(
            "Invalid metadata: #files("
                + cnfiles
                + ") total_size("
                + cbsize
                + ") listuri("
                + srcfilelist
                + ")");
      }
      Path src = new Path(srcfilelist);
      FileSystem fs = src.getFileSystem(job);
      FileStatus srcst = fs.getFileStatus(src);

      ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
      LongWritable key = new LongWritable();
      FilePair value = new FilePair();
      final long targetsize = cbsize / numSplits;
      long pos = 0L;
      long last = 0L;
      long acc = 0L;
      long cbrem = srcst.getLen();
      SequenceFile.Reader sl = null;
      try {
        sl = new SequenceFile.Reader(fs, src, job);
        for (; sl.next(key, value); last = sl.getPosition()) {
          // if adding this split would put this split past the target size,
          // cut the last split and put this next file in the next split.
          if (acc + key.get() > targetsize && acc != 0) {
            long splitsize = last - pos;
            splits.add(new FileSplit(src, pos, splitsize, (String[]) null));
            cbrem -= splitsize;
            pos = last;
            acc = 0L;
          }
          acc += key.get();
        }
      } finally {
        checkAndClose(sl);
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, (String[]) null));
      }

      return splits.toArray(new FileSplit[splits.size()]);
    }

Пример #11

0

Показать файл

Файл: AlignerMapper.java Проект: cwhelan/cloudbreak

 @Override
 public void configure(JobConf job) {
   super.configure(job);
   this.localDir = job.get("mapred.child.tmp");
   try {
     FileAndWriter faw = openFileWriter(localDir, 1);
     s1File = faw.file;
     s1FileWriter = faw.writer;
     faw = openFileWriter(localDir, 2);
     s2File = faw.file;
     s2FileWriter = faw.writer;
   } catch (IOException e) {
     e.printStackTrace();
   }
 }

Пример #12

0

Показать файл

 TaskTrackerRunner(int trackerId, int numDir, String hostname, JobConf cfg) throws IOException {
   this.trackerId = trackerId;
   this.numDir = numDir;
   localDirs = new String[numDir];
   final JobConf conf;
   if (cfg == null) {
     conf = createJobConf();
   } else {
     conf = createJobConf(cfg);
   }
   if (hostname != null) {
     conf.set("slave.host.name", hostname);
   }
   conf.set("mapred.task.tracker.http.address", "0.0.0.0:0");
   conf.set("mapred.task.tracker.report.address", "127.0.0.1:" + taskTrackerPort);
   File localDirBase = new File(conf.get("mapred.local.dir")).getAbsoluteFile();
   localDirBase.mkdirs();
   StringBuffer localPath = new StringBuffer();
   for (int i = 0; i < numDir; ++i) {
     File ttDir = new File(localDirBase, Integer.toString(trackerId) + "_" + i);
     if (!ttDir.mkdirs()) {
       if (!ttDir.isDirectory()) {
         throw new IOException("Mkdirs failed to create " + ttDir);
       }
     }
     localDirs[i] = ttDir.toString();
     if (i != 0) {
       localPath.append(",");
     }
     localPath.append(localDirs[i]);
   }
   conf.set("mapred.local.dir", localPath.toString());
   LOG.info("mapred.local.dir is " + localPath);
   try {
     tt =
         ugi.doAs(
             new PrivilegedExceptionAction<TaskTracker>() {
               public TaskTracker run() throws InterruptedException, IOException {
                 return createTaskTracker(conf);
               }
             });
     isInitialized = true;
   } catch (Throwable e) {
     isDead = true;
     tt = null;
     LOG.error("task tracker " + trackerId + " crashed", e);
   }
 }

Пример #13

0

Показать файл

Файл: GenericMRLoadGenerator.java Проект: FloodDragon/hadoop

    public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {

      Path src =
          new Path(
              job.get(
                  org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FILE, null));
      FileSystem fs = src.getFileSystem(job);

      ArrayList<IndirectSplit> splits = new ArrayList<IndirectSplit>(numSplits);
      LongWritable key = new LongWritable();
      Text value = new Text();
      for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, job); sl.next(key, value); ) {
        splits.add(new IndirectSplit(new Path(value.toString()), key.get()));
      }

      return splits.toArray(new IndirectSplit[splits.size()]);
    }

Пример #14

0

Показать файл

Файл: TaskRunner.java Проект: amitkumaar/cdh-mesos

  /**
   * @param taskid
   * @param workDir
   * @return
   * @throws IOException
   */
  static Path createChildTmpDir(File workDir, JobConf conf) throws IOException {

    // add java.io.tmpdir given by mapred.child.tmp
    String tmp = conf.get("mapred.child.tmp", "./tmp");
    Path tmpDir = new Path(tmp);

    // if temp directory path is not absolute, prepend it with workDir.
    if (!tmpDir.isAbsolute()) {
      tmpDir = new Path(workDir.toString(), tmp);

      FileSystem localFs = FileSystem.getLocal(conf);
      if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) {
        throw new IOException("Mkdirs failed to create " + tmpDir.toString());
      }
    }
    return tmpDir;
  }

Пример #15

0

Показать файл

Файл: SimulatorEngine.java Проект: ukulililixl/core

  /**
   * Creates the configuration for mumak simulation. This is kept modular mostly for testing
   * purposes. so that the standard configuration can be modified before passing it to the init()
   * function.
   *
   * @return JobConf: the configuration for the SimulatorJobTracker
   */
  JobConf createMumakConf() {
    JobConf jobConf = new JobConf(getConf());
    jobConf.setClass(
        "topology.node.switch.mapping.impl", StaticMapping.class, DNSToSwitchMapping.class);
    jobConf.set("fs.default.name", "file:///");
    jobConf.set("mapred.job.tracker", "localhost:8012");
    jobConf.setInt("mapred.jobtracker.job.history.block.size", 512);
    jobConf.setInt("mapred.jobtracker.job.history.buffer.size", 512);
    jobConf.setLong("mapred.tasktracker.expiry.interval", 5000);
    jobConf.setInt("mapred.reduce.copy.backoff", 4);
    jobConf.setLong("mapred.job.reuse.jvm.num.tasks", -1);
    jobConf.setUser("mumak");
    jobConf.set(
        "mapred.system.dir",
        jobConf.get("hadoop.log.dir", "/tmp/hadoop-" + jobConf.getUser()) + "/mapred/system");

    return jobConf;
  }

Пример #16

0

Показать файл

Файл: SortValidator.java Проект: hramasamy/evosys

      public void configure(JobConf job) {
        // 'key' == sortInput for sort-input; key == sortOutput for sort-output
        key = deduceInputFile(job);

        if (key == sortOutput) {
          partitioner = new HashPartitioner<WritableComparable, Writable>();

          // Figure the 'current' partition and no. of reduces of the 'sort'
          try {
            URI inputURI = new URI(job.get("map.input.file"));
            String inputFile = inputURI.getPath();
            partition =
                Integer.valueOf(inputFile.substring(inputFile.lastIndexOf("part") + 5)).intValue();
            noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1);
          } catch (Exception e) {
            System.err.println("Caught: " + e);
            System.exit(-1);
          }
        }
      }

Пример #17

0

Показать файл

Файл: TestMapProgress.java Проект: imace/hops

  /**
   * Validates map phase progress after each record is processed by map task using custom task
   * reporter.
   */
  public void testMapProgress() throws Exception {
    JobConf job = new JobConf();
    fs = FileSystem.getLocal(job);
    Path rootDir = new Path(TEST_ROOT_DIR);
    createInputFile(rootDir);

    job.setNumReduceTasks(0);
    TaskAttemptID taskId = TaskAttemptID.forName("attempt_200907082313_0424_m_000000_0");
    job.setClass("mapreduce.job.outputformat.class", NullOutputFormat.class, OutputFormat.class);
    job.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, TEST_ROOT_DIR);
    jobId = taskId.getJobID();

    JobContext jContext = new JobContextImpl(job, jobId);
    InputFormat<?, ?> input = ReflectionUtils.newInstance(jContext.getInputFormatClass(), job);

    List<InputSplit> splits = input.getSplits(jContext);
    JobSplitWriter.createSplitFiles(
        new Path(TEST_ROOT_DIR), job, new Path(TEST_ROOT_DIR).getFileSystem(job), splits);
    TaskSplitMetaInfo[] splitMetaInfo =
        SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, job, new Path(TEST_ROOT_DIR));
    job.setUseNewMapper(true); // use new api
    for (int i = 0; i < splitMetaInfo.length; i++) { // rawSplits.length is 1
      map =
          new TestMapTask(
              job.get(JTConfig.JT_SYSTEM_DIR, "/tmp/hadoop/mapred/system") + jobId + "job.xml",
              taskId,
              i,
              splitMetaInfo[i].getSplitIndex(),
              1);

      JobConf localConf = new JobConf(job);
      map.localizeConfiguration(localConf);
      map.setConf(localConf);
      map.run(localConf, fakeUmbilical);
    }
    // clean up
    fs.delete(rootDir, true);
  }

Пример #18

0

Показать файл

Файл: DistCp.java Проект: neutronsharc/hdfsbackup

 /**
  * Map method. Copies one file from source file system to destination.
  *
  * @param key src len
  * @param value FilePair (FileStatus src, Path dst)
  * @param out Log of failed copies
  * @param reporter
  */
 public void map(
     LongWritable key,
     FilePair value,
     OutputCollector<WritableComparable<?>, Text> out,
     Reporter reporter)
     throws IOException {
   final FileStatus srcstat = value.input;
   final Path relativedst = new Path(value.output);
   try {
     copy(srcstat, relativedst, out, reporter);
   } catch (IOException e) {
     ++failcount;
     reporter.incrCounter(Counter.FAIL, 1);
     updateStatus(reporter);
     final String sfailure = "FAIL " + relativedst + " : " + StringUtils.stringifyException(e);
     out.collect(null, new Text(sfailure));
     LOG.info(sfailure);
     try {
       for (int i = 0; i < 3; ++i) {
         try {
           final Path tmp = new Path(job.get(TMP_DIR_LABEL), relativedst);
           if (destFileSys.delete(tmp, true)) break;
         } catch (Throwable ex) {
           // ignore, we are just cleaning up
           LOG.debug("Ignoring cleanup exception", ex);
         }
         // update status, so we don't get timed out
         updateStatus(reporter);
         Thread.sleep(3 * 1000);
       }
     } catch (InterruptedException inte) {
       throw (IOException) new IOException().initCause(inte);
     }
   } finally {
     updateStatus(reporter);
   }
 }

Пример #19

0

Показать файл

Файл: TaskRunner.java Проект: LefKok/upright

  @Override
  public final void run() {
    try {

      // before preparing the job localize
      // all the archives
      TaskAttemptID taskid = t.getTaskID();
      LocalDirAllocator lDirAlloc = new LocalDirAllocator("mapred.local.dir");
      File jobCacheDir = null;
      if (conf.getJar() != null) {
        jobCacheDir = new File(new Path(conf.getJar()).getParent().toString());
      }
      File workDir =
          new File(
              lDirAlloc
                  .getLocalPathToRead(
                      TaskTracker.getJobCacheSubdir()
                          + Path.SEPARATOR
                          + t.getJobID()
                          + Path.SEPARATOR
                          + t.getTaskID()
                          + Path.SEPARATOR
                          + MRConstants.WORKDIR,
                      conf)
                  .toString());

      URI[] archives = DistributedCache.getCacheArchives(conf);
      URI[] files = DistributedCache.getCacheFiles(conf);
      FileStatus fileStatus;
      FileSystem fileSystem;
      Path localPath;
      String baseDir;

      if ((archives != null) || (files != null)) {
        if (archives != null) {
          String[] archivesTimestamps = DistributedCache.getArchiveTimestamps(conf);
          Path[] p = new Path[archives.length];
          for (int i = 0; i < archives.length; i++) {
            fileSystem = FileSystem.get(archives[i], conf);
            fileStatus = fileSystem.getFileStatus(new Path(archives[i].getPath()));
            String cacheId = DistributedCache.makeRelative(archives[i], conf);
            String cachePath = TaskTracker.getCacheSubdir() + Path.SEPARATOR + cacheId;
            if (lDirAlloc.ifExists(cachePath, conf)) {
              localPath = lDirAlloc.getLocalPathToRead(cachePath, conf);
            } else {
              localPath = lDirAlloc.getLocalPathForWrite(cachePath, fileStatus.getLen(), conf);
            }
            baseDir = localPath.toString().replace(cacheId, "");
            p[i] =
                DistributedCache.getLocalCache(
                    archives[i],
                    conf,
                    new Path(baseDir),
                    fileStatus,
                    true,
                    Long.parseLong(archivesTimestamps[i]),
                    new Path(workDir.getAbsolutePath()),
                    false);
          }
          DistributedCache.setLocalArchives(conf, stringifyPathArray(p));
        }
        if ((files != null)) {
          String[] fileTimestamps = DistributedCache.getFileTimestamps(conf);
          Path[] p = new Path[files.length];
          for (int i = 0; i < files.length; i++) {
            fileSystem = FileSystem.get(files[i], conf);
            fileStatus = fileSystem.getFileStatus(new Path(files[i].getPath()));
            String cacheId = DistributedCache.makeRelative(files[i], conf);
            String cachePath = TaskTracker.getCacheSubdir() + Path.SEPARATOR + cacheId;
            if (lDirAlloc.ifExists(cachePath, conf)) {
              localPath = lDirAlloc.getLocalPathToRead(cachePath, conf);
            } else {
              localPath = lDirAlloc.getLocalPathForWrite(cachePath, fileStatus.getLen(), conf);
            }
            baseDir = localPath.toString().replace(cacheId, "");
            p[i] =
                DistributedCache.getLocalCache(
                    files[i],
                    conf,
                    new Path(baseDir),
                    fileStatus,
                    false,
                    Long.parseLong(fileTimestamps[i]),
                    new Path(workDir.getAbsolutePath()),
                    false);
          }
          DistributedCache.setLocalFiles(conf, stringifyPathArray(p));
        }
        Path localTaskFile = new Path(t.getJobFile());
        FileSystem localFs = FileSystem.getLocal(conf);
        localFs.delete(localTaskFile, true);
        OutputStream out = localFs.create(localTaskFile);
        try {
          conf.writeXml(out);
        } finally {
          out.close();
        }
      }

      if (!prepare()) {
        return;
      }

      String sep = System.getProperty("path.separator");
      StringBuffer classPath = new StringBuffer();
      // start with same classpath as parent process
      classPath.append(System.getProperty("java.class.path"));
      classPath.append(sep);
      if (!workDir.mkdirs()) {
        if (!workDir.isDirectory()) {
          LOG.fatal("Mkdirs failed to create " + workDir.toString());
        }
      }

      String jar = conf.getJar();
      if (jar != null) {
        // if jar exists, it into workDir
        File[] libs = new File(jobCacheDir, "lib").listFiles();
        if (libs != null) {
          for (int i = 0; i < libs.length; i++) {
            classPath.append(sep); // add libs from jar to classpath
            classPath.append(libs[i]);
          }
        }
        classPath.append(sep);
        classPath.append(new File(jobCacheDir, "classes"));
        classPath.append(sep);
        classPath.append(jobCacheDir);
      }

      // include the user specified classpath

      // archive paths
      Path[] archiveClasspaths = DistributedCache.getArchiveClassPaths(conf);
      if (archiveClasspaths != null && archives != null) {
        Path[] localArchives = DistributedCache.getLocalCacheArchives(conf);
        if (localArchives != null) {
          for (int i = 0; i < archives.length; i++) {
            for (int j = 0; j < archiveClasspaths.length; j++) {
              if (archives[i].getPath().equals(archiveClasspaths[j].toString())) {
                classPath.append(sep);
                classPath.append(localArchives[i].toString());
              }
            }
          }
        }
      }
      // file paths
      Path[] fileClasspaths = DistributedCache.getFileClassPaths(conf);
      if (fileClasspaths != null && files != null) {
        Path[] localFiles = DistributedCache.getLocalCacheFiles(conf);
        if (localFiles != null) {
          for (int i = 0; i < files.length; i++) {
            for (int j = 0; j < fileClasspaths.length; j++) {
              if (files[i].getPath().equals(fileClasspaths[j].toString())) {
                classPath.append(sep);
                classPath.append(localFiles[i].toString());
              }
            }
          }
        }
      }

      classPath.append(sep);
      classPath.append(workDir);
      //  Build exec child jmv args.
      Vector<String> vargs = new Vector<String>(8);
      File jvm = // use same jvm as parent
          new File(new File(System.getProperty("java.home"), "bin"), "java");

      vargs.add(jvm.toString());

      // Add child (task) java-vm options.
      //
      // The following symbols if present in mapred.child.java.opts value are
      // replaced:
      // + @taskid@ is interpolated with value of TaskID.
      // Other occurrences of @ will not be altered.
      //
      // Example with multiple arguments and substitutions, showing
      // jvm GC logging, and start of a passwordless JVM JMX agent so can
      // connect with jconsole and the likes to watch child memory, threads
      // and get thread dumps.
      //
      //  <property>
      //    <name>mapred.child.java.opts</name>
      //    <value>-verbose:gc -Xloggc:/tmp/@[email protected] \
      //           -Dcom.sun.management.jmxremote.authenticate=false \
      //           -Dcom.sun.management.jmxremote.ssl=false \
      //    </value>
      //  </property>
      //
      String javaOpts = conf.get("mapred.child.java.opts", "-Xmx200m");
      javaOpts = javaOpts.replace("@taskid@", taskid.toString());
      String[] javaOptsSplit = javaOpts.split(" ");

      // Add java.library.path; necessary for loading native libraries.
      //
      // 1. To support native-hadoop library i.e. libhadoop.so, we add the
      //    parent processes' java.library.path to the child.
      // 2. We also add the 'cwd' of the task to it's java.library.path to help
      //    users distribute native libraries via the DistributedCache.
      // 3. The user can also specify extra paths to be added to the
      //    java.library.path via mapred.child.java.opts.
      //
      String libraryPath = System.getProperty("java.library.path");
      if (libraryPath == null) {
        libraryPath = workDir.getAbsolutePath();
      } else {
        libraryPath += sep + workDir;
      }
      boolean hasUserLDPath = false;
      for (int i = 0; i < javaOptsSplit.length; i++) {
        if (javaOptsSplit[i].startsWith("-Djava.library.path=")) {
          javaOptsSplit[i] += sep + libraryPath;
          hasUserLDPath = true;
          break;
        }
      }
      if (!hasUserLDPath) {
        vargs.add("-Djava.library.path=" + libraryPath);
      }
      for (int i = 0; i < javaOptsSplit.length; i++) {
        vargs.add(javaOptsSplit[i]);
      }

      // add java.io.tmpdir given by mapred.child.tmp
      String tmp = conf.get("mapred.child.tmp", "./tmp");
      Path tmpDir = new Path(tmp);

      // if temp directory path is not absolute
      // prepend it with workDir.
      if (!tmpDir.isAbsolute()) {
        tmpDir = new Path(workDir.toString(), tmp);
      }
      FileSystem localFs = FileSystem.getLocal(conf);
      if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) {
        throw new IOException("Mkdirs failed to create " + tmpDir.toString());
      }
      vargs.add("-Djava.io.tmpdir=" + tmpDir.toString());

      // Add classpath.
      vargs.add("-classpath");
      vargs.add(classPath.toString());

      // Setup the log4j prop
      long logSize = TaskLog.getTaskLogLength(conf);
      vargs.add(
          "-Dhadoop.log.dir=" + new File(System.getProperty("hadoop.log.dir")).getAbsolutePath());
      vargs.add("-Dhadoop.root.logger=INFO,TLA");
      vargs.add("-Dhadoop.tasklog.taskid=" + taskid);
      vargs.add("-Dhadoop.tasklog.totalLogFileSize=" + logSize);

      if (conf.getProfileEnabled()) {
        if (conf.getProfileTaskRange(t.isMapTask()).isIncluded(t.getPartition())) {
          File prof = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.PROFILE);
          vargs.add(String.format(conf.getProfileParams(), prof.toString()));
        }
      }

      // Add main class and its arguments
      vargs.add(Child.class.getName()); // main of Child
      // pass umbilical address
      InetSocketAddress address = tracker.getTaskTrackerReportAddress();
      vargs.add(address.getAddress().getHostAddress());
      vargs.add(Integer.toString(address.getPort()));
      vargs.add(taskid.toString()); // pass task identifier

      String pidFile = null;
      if (tracker.isTaskMemoryManagerEnabled()) {
        pidFile =
            lDirAlloc
                .getLocalPathForWrite(
                    (TaskTracker.getPidFilesSubdir() + Path.SEPARATOR + taskid), this.conf)
                .toString();
      }

      // set memory limit using ulimit if feasible and necessary ...
      String[] ulimitCmd = Shell.getUlimitMemoryCommand(conf);
      List<String> setup = null;
      if (ulimitCmd != null) {
        setup = new ArrayList<String>();
        for (String arg : ulimitCmd) {
          setup.add(arg);
        }
      }

      // Set up the redirection of the task's stdout and stderr streams
      File stdout = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDOUT);
      File stderr = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDERR);
      stdout.getParentFile().mkdirs();
      tracker.getTaskTrackerInstrumentation().reportTaskLaunch(taskid, stdout, stderr);

      Map<String, String> env = new HashMap<String, String>();
      StringBuffer ldLibraryPath = new StringBuffer();
      ldLibraryPath.append(workDir.toString());
      String oldLdLibraryPath = null;
      oldLdLibraryPath = System.getenv("LD_LIBRARY_PATH");
      if (oldLdLibraryPath != null) {
        ldLibraryPath.append(sep);
        ldLibraryPath.append(oldLdLibraryPath);
      }
      env.put("LD_LIBRARY_PATH", ldLibraryPath.toString());
      jvmManager.launchJvm(
          this,
          jvmManager.constructJvmEnv(
              setup, vargs, stdout, stderr, logSize, workDir, env, pidFile, conf));
      synchronized (lock) {
        while (!done) {
          lock.wait();
        }
      }
      tracker.getTaskTrackerInstrumentation().reportTaskEnd(t.getTaskID());
      if (exitCodeSet) {
        if (!killed && exitCode != 0) {
          if (exitCode == 65) {
            tracker.getTaskTrackerInstrumentation().taskFailedPing(t.getTaskID());
          }
          throw new IOException("Task process exit with nonzero status of " + exitCode + ".");
        }
      }
    } catch (FSError e) {
      LOG.fatal("FSError", e);
      try {
        tracker.fsError(t.getTaskID(), e.getMessage());
      } catch (IOException ie) {
        LOG.fatal(t.getTaskID() + " reporting FSError", ie);
      }
    } catch (Throwable throwable) {
      LOG.warn(t.getTaskID() + " Child Error", throwable);
      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      throwable.printStackTrace(new PrintStream(baos));
      try {
        tracker.reportDiagnosticInfo(t.getTaskID(), baos.toString());
      } catch (IOException e) {
        LOG.warn(t.getTaskID() + " Reporting Diagnostics", e);
      }
    } finally {
      try {
        URI[] archives = DistributedCache.getCacheArchives(conf);
        URI[] files = DistributedCache.getCacheFiles(conf);
        if (archives != null) {
          for (int i = 0; i < archives.length; i++) {
            DistributedCache.releaseCache(archives[i], conf);
          }
        }
        if (files != null) {
          for (int i = 0; i < files.length; i++) {
            DistributedCache.releaseCache(files[i], conf);
          }
        }
      } catch (IOException ie) {
        LOG.warn("Error releasing caches : Cache files might not have been cleaned up");
      }
      tracker.reportTaskFinished(t.getTaskID(), false);
      if (t.isMapTask()) {
        tracker.addFreeMapSlot();
      } else {
        tracker.addFreeReduceSlot();
      }
    }
  }

Пример #20

0

Показать файл

Файл: CoronaJobTrackerRunner.java Проект: niliquan/hadoop-20

 private String getCJTJavaOpts(JobConf conf) {
   return conf.get("mapred.corona.standalonecjt.java.opts", JobConf.DEFAULT_MAPRED_TASK_JAVA_OPTS);
 }

Пример #21

0

Показать файл

Файл: TestNodeBlacklisting.java Проект: JichengSong/hadoop-20

 @Override
 public void configure(JobConf conf) {
   TaskAttemptID id = TaskAttemptID.forName(conf.get("mapred.task.id"));
   shouldFail = id.getId() == 0 && id.getTaskID().getId() == 0;
 }

Пример #22

0

Показать файл

Файл: TaskRunner.java Проект: BenDavolsf/hadoop-cdh4

 /**
  * Get the environment variables for the child map/reduce tasks.
  *
  * @param jobConf job configuration
  * @return the environment variables for the child map/reduce tasks or <code>null</code> if
  *     unspecified
  * @deprecated Use environment variables specific to the map or reduce tasks set via {@link
  *     JobConf#MAPRED_MAP_TASK_ENV} or {@link JobConf#MAPRED_REDUCE_TASK_ENV}
  */
 public String getChildEnv(JobConf jobConf) {
   return jobConf.get(JobConf.MAPRED_TASK_ENV);
 }

Пример #23

0

Показать файл

Файл: TaskRunner.java Проект: BenDavolsf/hadoop-cdh4

  private String getVMEnvironment(
      String errorInfo,
      File workDir,
      JobConf conf,
      Map<String, String> env,
      TaskAttemptID taskid,
      long logSize)
      throws Throwable {
    StringBuffer ldLibraryPath = new StringBuffer();
    ldLibraryPath.append(workDir.toString());
    String oldLdLibraryPath = null;
    oldLdLibraryPath = System.getenv("LD_LIBRARY_PATH");
    if (oldLdLibraryPath != null) {
      ldLibraryPath.append(SYSTEM_PATH_SEPARATOR);
      ldLibraryPath.append(oldLdLibraryPath);
    }
    env.put("LD_LIBRARY_PATH", ldLibraryPath.toString());
    env.put(HADOOP_WORK_DIR, workDir.toString());
    // put jobTokenFile name into env
    String jobTokenFile = conf.get(TokenCache.JOB_TOKENS_FILENAME);
    LOG.debug("putting jobToken file name into environment " + jobTokenFile);
    env.put(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION, jobTokenFile);
    // for the child of task jvm, set hadoop.root.logger
    env.put("HADOOP_ROOT_LOGGER", "INFO,TLA");
    String hadoopClientOpts = System.getenv("HADOOP_CLIENT_OPTS");
    if (hadoopClientOpts == null) {
      hadoopClientOpts = "";
    } else {
      hadoopClientOpts = hadoopClientOpts + " ";
    }
    hadoopClientOpts =
        hadoopClientOpts
            + "-Dhadoop.tasklog.taskid="
            + taskid
            + " -Dhadoop.tasklog.iscleanup="
            + t.isTaskCleanupTask()
            + " -Dhadoop.tasklog.totalLogFileSize="
            + logSize;
    // following line is a backport from jira MAPREDUCE-1286
    env.put("HADOOP_CLIENT_OPTS", hadoopClientOpts);

    // add the env variables passed by the user
    String mapredChildEnv = getChildEnv(conf);
    if (mapredChildEnv != null && mapredChildEnv.length() > 0) {
      String childEnvs[] = mapredChildEnv.split(",");
      for (String cEnv : childEnvs) {
        try {
          String[] parts = cEnv.split("="); // split on '='
          String value = env.get(parts[0]);
          if (value != null) {
            // replace $env with the child's env constructed by tt's
            // example LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp
            value = parts[1].replace("$" + parts[0], value);
          } else {
            // this key is not configured by the tt for the child .. get it
            // from the tt's env
            // example PATH=$PATH:/tmp
            value = System.getenv(parts[0]);
            if (value != null) {
              // the env key is present in the tt's env
              value = parts[1].replace("$" + parts[0], value);
            } else {
              // the env key is note present anywhere .. simply set it
              // example X=$X:/tmp or X=/tmp
              value = parts[1].replace("$" + parts[0], "");
            }
          }
          env.put(parts[0], value);
        } catch (Throwable t) {
          // set the error msg
          errorInfo =
              "Invalid User environment settings : "
                  + mapredChildEnv
                  + ". Failed to parse user-passed environment param."
                  + " Expecting : env1=value1,env2=value2...";
          LOG.warn(errorInfo);
          throw t;
        }
      }
    }
    return errorInfo;
  }

Пример #24

0

Показать файл

Файл: DistCp.java Проект: neutronsharc/hdfsbackup

    /**
     * Copy a file to a destination.
     *
     * @param srcstat src path and metadata
     * @param dstpath dst path
     * @param reporter
     */
    private void copy(
        FileStatus srcstat,
        Path relativedst,
        OutputCollector<WritableComparable<?>, Text> outc,
        Reporter reporter)
        throws IOException {
      Path absdst = new Path(destPath, relativedst);
      int totfiles = job.getInt(SRC_COUNT_LABEL, -1);
      assert totfiles >= 0 : "Invalid file count " + totfiles;

      // if a directory, ensure created even if empty
      if (srcstat.isDir()) {
        if (destFileSys.exists(absdst)) {
          if (!destFileSys.getFileStatus(absdst).isDir()) {
            throw new IOException("Failed to mkdirs: " + absdst + " is a file.");
          }
        } else if (!destFileSys.mkdirs(absdst)) {
          throw new IOException("Failed to mkdirs " + absdst);
        }
        // TODO: when modification times can be set, directories should be
        // emitted to reducers so they might be preserved. Also, mkdirs does
        // not currently return an error when the directory already exists;
        // if this changes, all directory work might as well be done in reduce
        return;
      }

      if (destFileSys.exists(absdst) && !overwrite && !needsUpdate(srcstat, destFileSys, absdst)) {
        outc.collect(null, new Text("SKIP: " + srcstat.getPath()));
        ++skipcount;
        reporter.incrCounter(Counter.SKIP, 1);
        updateStatus(reporter);
        return;
      }

      Path tmpfile = new Path(job.get(TMP_DIR_LABEL), relativedst);
      long cbcopied = 0L;
      FSDataInputStream in = null;
      FSDataOutputStream out = null;
      try {
        // open src file
        try {
          in = srcstat.getPath().getFileSystem(job).open(srcstat.getPath());
        } catch (IOException e) {
          LOG.error("Failed to open src file " + srcstat.getPath() + ", ignore and return");
          in = null;
          return;
        }
        reporter.incrCounter(Counter.BYTESEXPECTED, srcstat.getLen());
        // open tmp file
        out = create(tmpfile, reporter, srcstat);
        // copy file
        for (int cbread; (cbread = in.read(buffer)) >= 0; ) {
          out.write(buffer, 0, cbread);
          cbcopied += cbread;
          reporter.setStatus(
              String.format("%.2f ", cbcopied * 100.0 / srcstat.getLen())
                  + absdst
                  + " [ "
                  + StringUtils.humanReadableInt(cbcopied)
                  + " / "
                  + StringUtils.humanReadableInt(srcstat.getLen())
                  + " ]");
        }
      } finally {
        checkAndClose(in);
        checkAndClose(out);
      }

      if (cbcopied != srcstat.getLen()) {
        if (srcstat.getLen() == 0 && cbcopied > 0) {
          LOG.info("most likely see a WAL file corruption: " + srcstat.getPath());
        } else {
          throw new IOException(
              "File size not matched: copied "
                  + bytesString(cbcopied)
                  + " to tmpfile (="
                  + tmpfile
                  + ") but expected "
                  + bytesString(srcstat.getLen())
                  + " from "
                  + srcstat.getPath());
        }
      } else {
        if (totfiles == 1) {
          // Copying a single file; use dst path provided by user as destination
          // rather than destination directory, if a file
          Path dstparent = absdst.getParent();
          if (!(destFileSys.exists(dstparent) && destFileSys.getFileStatus(dstparent).isDir())) {
            absdst = dstparent;
          }
        }
        if (destFileSys.exists(absdst) && destFileSys.getFileStatus(absdst).isDir()) {
          throw new IOException(absdst + " is a directory");
        }
        if (!destFileSys.mkdirs(absdst.getParent())) {
          throw new IOException("Failed to craete parent dir: " + absdst.getParent());
        }
        rename(tmpfile, absdst);

        FileStatus dststat = destFileSys.getFileStatus(absdst);
        if (dststat.getLen() != srcstat.getLen()) {
          destFileSys.delete(absdst, false);
          throw new IOException(
              "File size not matched: copied "
                  + bytesString(dststat.getLen())
                  + " to dst (="
                  + absdst
                  + ") but expected "
                  + bytesString(srcstat.getLen())
                  + " from "
                  + srcstat.getPath());
        }
        updatePermissions(srcstat, dststat);
      }

      // report at least once for each file
      ++copycount;
      reporter.incrCounter(Counter.BYTESCOPIED, cbcopied);
      reporter.incrCounter(Counter.COPY, 1);
      updateStatus(reporter);
    }

Пример #25

0

Показать файл

Файл: SimulatorEngine.java Проект: ukulililixl/core

  /**
   * Initiate components in the simulation. The JobConf is create separately and passed to the
   * init().
   *
   * @param JobConf: The configuration for the jobtracker.
   * @throws InterruptedException
   * @throws IOException if trace or topology files cannot be opened.
   */
  @SuppressWarnings("deprecation")
  void init(JobConf jobConf) throws InterruptedException, IOException {

    FileSystem lfs = FileSystem.getLocal(getConf());
    Path logPath = new Path(System.getProperty("hadoop.log.dir")).makeQualified(lfs);
    jobConf.set("mapred.system.dir", logPath.toString());
    jobConf.set("hadoop.job.history.location", (new Path(logPath, "history").toString()));

    // start time for virtual clock
    // possible improvement: set default value to sth more meaningful based on
    // the 1st job
    long now = getTimeProperty(jobConf, "mumak.start.time", System.currentTimeMillis());

    jt = SimulatorJobTracker.startTracker(jobConf, now, this);
    jt.offerService();

    masterRandomSeed = jobConf.getLong("mumak.random.seed", System.nanoTime());

    // max Map/Reduce tasks per node
    int maxMaps =
        getConf()
            .getInt("mapred.tasktracker.map.tasks.maximum", SimulatorTaskTracker.DEFAULT_MAP_SLOTS);
    int maxReduces =
        getConf()
            .getInt(
                "mapred.tasktracker.reduce.tasks.maximum",
                SimulatorTaskTracker.DEFAULT_REDUCE_SLOTS);

    MachineNode defaultNode =
        new MachineNode.Builder("default", 2)
            .setMapSlots(maxMaps)
            .setReduceSlots(maxReduces)
            .build();

    LoggedNetworkTopology topology =
        new ClusterTopologyReader(new Path(topologyFile), jobConf).get();
    // Setting the static mapping before removing numeric IP hosts.
    setStaticMapping(topology);
    if (getConf().getBoolean("mumak.topology.filter-numeric-ips", true)) {
      removeIpHosts(topology);
    }
    ZombieCluster cluster = new ZombieCluster(topology, defaultNode);

    // create TTs based on topology.json
    long firstJobStartTime = startTaskTrackers(cluster, jobConf, now);

    long subRandomSeed =
        RandomSeedGenerator.getSeed("forSimulatorJobStoryProducer", masterRandomSeed);
    JobStoryProducer jobStoryProducer =
        new SimulatorJobStoryProducer(
            new Path(traceFile), cluster, firstJobStartTime, jobConf, subRandomSeed);

    final SimulatorJobSubmissionPolicy submissionPolicy =
        SimulatorJobSubmissionPolicy.getPolicy(jobConf);

    jc = new SimulatorJobClient(jt, jobStoryProducer, submissionPolicy);
    queue.addAll(jc.init(firstJobStartTime));

    // if the taskScheduler is CapacityTaskScheduler start off the JobInitialization
    // threads too
    if (jobConf
        .get("mapred.jobtracker.taskScheduler")
        .equals(CapacityTaskScheduler.class.getName())) {
      LOG.info("CapacityScheduler used: starting simulatorThreads");
      startSimulatorThreadsCapSched(now);
    }
    terminateTime = getTimeProperty(jobConf, "mumak.terminate.time", Long.MAX_VALUE);
  }

Пример #26

0

Показать файл

Файл: TaskRunner.java Проект: BenDavolsf/hadoop-cdh4

 /**
  * Get the java command line options for the child map/reduce tasks.
  *
  * @param jobConf job configuration
  * @param defaultValue default value
  * @return the java command line options for child map/reduce tasks
  * @deprecated Use command line options specific to map or reduce tasks set via {@link
  *     JobConf#MAPRED_MAP_TASK_JAVA_OPTS} or {@link JobConf#MAPRED_REDUCE_TASK_JAVA_OPTS}
  */
 @Deprecated
 public String getChildJavaOpts(JobConf jobConf, String defaultValue) {
   return jobConf.get(JobConf.MAPRED_TASK_JAVA_OPTS, defaultValue);
 }

Пример #27

0

Показать файл

Файл: SortValidator.java Проект: hramasamy/evosys

    static void checkRecords(
        Configuration defaults, int noMaps, int noReduces, Path sortInput, Path sortOutput)
        throws IOException {
      JobConf jobConf = new JobConf(defaults, RecordChecker.class);
      jobConf.setJobName("sortvalidate-record-checker");

      jobConf.setInputFormat(SequenceFileInputFormat.class);
      jobConf.setOutputFormat(SequenceFileOutputFormat.class);

      jobConf.setOutputKeyClass(BytesWritable.class);
      jobConf.setOutputValueClass(IntWritable.class);

      jobConf.setMapperClass(Map.class);
      jobConf.setReducerClass(Reduce.class);

      JobClient client = new JobClient(jobConf);
      ClusterStatus cluster = client.getClusterStatus();
      if (noMaps == -1) {
        noMaps = cluster.getTaskTrackers() * jobConf.getInt("test.sortvalidate.maps_per_host", 10);
      }
      if (noReduces == -1) {
        noReduces = (int) (cluster.getMaxReduceTasks() * 0.9);
        String sortReduces = jobConf.get("test.sortvalidate.reduces_per_host");
        if (sortReduces != null) {
          noReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces);
        }
      }
      jobConf.setNumMapTasks(noMaps);
      jobConf.setNumReduceTasks(noReduces);

      FileInputFormat.setInputPaths(jobConf, sortInput);
      FileInputFormat.addInputPath(jobConf, sortOutput);
      Path outputPath = new Path("/tmp/sortvalidate/recordchecker");
      FileSystem fs = FileSystem.get(defaults);
      if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
      }
      FileOutputFormat.setOutputPath(jobConf, outputPath);

      // Uncomment to run locally in a single process
      // job_conf.set("mapred.job.tracker", "local");
      Path[] inputPaths = FileInputFormat.getInputPaths(jobConf);
      System.out.println(
          "\nSortValidator.RecordChecker: Running on "
              + cluster.getTaskTrackers()
              + " nodes to validate sort from "
              + inputPaths[0]
              + ", "
              + inputPaths[1]
              + " into "
              + FileOutputFormat.getOutputPath(jobConf)
              + " with "
              + noReduces
              + " reduces.");
      Date startTime = new Date();
      System.out.println("Job started: " + startTime);
      JobClient.runJob(jobConf);
      Date end_time = new Date();
      System.out.println("Job ended: " + end_time);
      System.out.println(
          "The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    }

Пример #28

0

Показать файл

Файл: UserViewMuliHostStepThreeGroup.java Проект: RyanFu/shopJob

 public void configure(JobConf job) {
   this.inputArgs = Utils.asArray(job.get("user.args"));
 }

Пример #29

0

Показать файл

Файл: LocalJobRunner.java Проект: Jude7/bc-hadoop2.0

 /** @see org.apache.hadoop.mapred.JobSubmissionProtocol#getSystemDir() */
 public String getSystemDir() {
   Path sysDir = new Path(conf.get("mapred.system.dir", "/tmp/hadoop/mapred/system"));
   return fs.makeQualified(sysDir).toString();
 }

Пример #30

0

Показать файл

Файл: FileTest.java Проект: RyanFu/shopJob

 @Override
 public void configure(JobConf config) {
   splitChar = (char) NumberUtils.toInt(config.get("user.split"));
 }