public void configure(JobConf job) {
   bytesToWrite = job.getLong(RandomTextWriter.BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
   keymin = job.getInt(RandomTextWriter.MIN_KEY, 5);
   keymax = job.getInt(RandomTextWriter.MAX_KEY, 10);
   valmin = job.getInt(RandomTextWriter.MIN_VALUE, 5);
   valmax = job.getInt(RandomTextWriter.MAX_VALUE, 10);
Exemplo n.º 2
  private MiniMRCluster startCluster(JobConf conf, int numTrackers) throws IOException {
    conf.setLong("mapred.job.tracker.retiredjobs.cache.size", 1);
    conf.setLong("mapred.jobtracker.retirejob.interval", 0);
    conf.setLong("mapred.jobtracker.retirejob.check", 0);
    conf.getLong("mapred.jobtracker.completeuserjobs.maximum", 0);

    return new MiniMRCluster(0, 0, numTrackers, "file:///", 1, null, null, null, conf, 0);
Exemplo n.º 3
 public void configure(JobConf job) {
   this.jobConf = job;
   urlNormalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT);
   interval = jobConf.getInt("db.fetch.interval.default", 2592000);
   filters = new URLFilters(jobConf);
   scfilters = new ScoringFilters(jobConf);
   scoreInjected = jobConf.getFloat("db.score.injected", 1.0f);
   curTime = job.getLong("injector.current.time", System.currentTimeMillis());
Exemplo n.º 4
  * Calculate how many maps to run. Number of maps is bounded by a minimum of the cumulative size
  * of the copy / (, default BYTES_PER_MAP or -m on the command line) and at
  * most (, default MAX_MAPS_PER_NODE * nodes in the cluster).
  * @param totalBytes Count of total bytes for job
  * @param job The job to configure
  * @return Count of maps to run.
 private static void setMapCount(long totalBytes, JobConf job) throws IOException {
   int numMaps = (int) (totalBytes / job.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP));
   numMaps =
               MAX_MAPS_PER_NODE * new JobClient(job).getClusterStatus().getTaskTrackers()));
   job.setNumMapTasks(Math.max(numMaps, 1));
Exemplo n.º 5
     * Produce splits such that each is no greater than the quotient of the total size and the
     * number of splits requested.
     * @param job The handle to the JobConf object
     * @param numSplits Number of splits requested
    public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
      int cnfiles = job.getInt(SRC_COUNT_LABEL, -1);
      long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1);
      String srcfilelist = job.get(SRC_LIST_LABEL, "");
      if (cnfiles < 0 || cbsize < 0 || "".equals(srcfilelist)) {
        throw new RuntimeException(
            "Invalid metadata: #files("
                + cnfiles
                + ") total_size("
                + cbsize
                + ") listuri("
                + srcfilelist
                + ")");
      Path src = new Path(srcfilelist);
      FileSystem fs = src.getFileSystem(job);
      FileStatus srcst = fs.getFileStatus(src);

      ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
      LongWritable key = new LongWritable();
      FilePair value = new FilePair();
      final long targetsize = cbsize / numSplits;
      long pos = 0L;
      long last = 0L;
      long acc = 0L;
      long cbrem = srcst.getLen();
      SequenceFile.Reader sl = null;
      try {
        sl = new SequenceFile.Reader(fs, src, job);
        for (;, value); last = sl.getPosition()) {
          // if adding this split would put this split past the target size,
          // cut the last split and put this next file in the next split.
          if (acc + key.get() > targetsize && acc != 0) {
            long splitsize = last - pos;
            splits.add(new FileSplit(src, pos, splitsize, (String[]) null));
            cbrem -= splitsize;
            pos = last;
            acc = 0L;
          acc += key.get();
      } finally {
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, (String[]) null));

      return splits.toArray(new FileSplit[splits.size()]);
  /** When no input dir is specified, generate random data. */
  protected static void confRandom(JobConf job) throws IOException {
    // from RandomWriter

    final ClusterStatus cluster = new JobClient(job).getClusterStatus();
    int numMapsPerHost = job.getInt(RandomTextWriter.MAPS_PER_HOST, 10);
    long numBytesToWritePerMap =
        job.getLong(RandomTextWriter.BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
      throw new IOException("Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0");
    long totalBytesToWrite =
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
      numMaps = 1;
      job.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite);
Exemplo n.º 7
  * Get the desired maximum length of task's logs.
  * @param conf the job to look in
  * @return the number of bytes to cap the log files at
 public static long getTaskLogLength(JobConf conf) {
   return conf.getLong("mapred.userlog.limit.kb", 100) * 1024;
Exemplo n.º 8
  public CoronaJobHistory(JobConf conf, JobID jobId, String logPath) {
    try {
      this.conf = conf;
      this.jobId = jobId;
      if (logPath == null) {
        logPath =
                + new File(System.getProperty("hadoop.log.dir", "/tmp")).getAbsolutePath()
                + File.separator
                + "history";
      logDir = new Path(logPath);
      logDirFs = logDir.getFileSystem(conf);

      if (!logDirFs.exists(logDir)) {"Creating history folder at " + logDir);
        if (!logDirFs.mkdirs(logDir, new FsPermission(HISTORY_DIR_PERMISSION))) {
          throw new IOException("Mkdirs failed to create " + logDir.toString());
      conf.set("hadoop.job.history.location", logDir.toString());
      disableHistory = false;

      // set the job history block size (default is 3MB)
      jobHistoryBlockSize =
          conf.getLong("mapred.jobtracker.job.history.block.size", 3 * 1024 * 1024);

      doneDir = new Path(logDir, "done");
      doneDirFs = logDirFs;

      if (!doneDirFs.exists(doneDir)) {"Creating DONE folder at " + doneDir);
        if (!doneDirFs.mkdirs(doneDir, new FsPermission(HISTORY_DIR_PERMISSION))) {
          throw new IOException("Mkdirs failed to create " + doneDir);

      String logFileName =
      logFile = new Path(logDir, logFileName);
      doneFile = new Path(doneDir, logFileName);

      // initialize the file manager
      conf.setInt("mapred.jobtracker.historythreads.maximum", 1);
      fileManager =
          new CoronaJobHistoryFilesManager(
              new JobHistoryObserver() {
                public void historyFileCopied(JobID jobid, String historyFile) {}


      // sleeping with the past means tolerating two start methods instead of one

    } catch (IOException e) {
      LOG.error("Failed to initialize JobHistory log file", e);
      disableHistory = true;
Exemplo n.º 9
   * Initiate components in the simulation. The JobConf is create separately and passed to the
   * init().
   * @param JobConf: The configuration for the jobtracker.
   * @throws InterruptedException
   * @throws IOException if trace or topology files cannot be opened.
  void init(JobConf jobConf) throws InterruptedException, IOException {

    FileSystem lfs = FileSystem.getLocal(getConf());
    Path logPath = new Path(System.getProperty("hadoop.log.dir")).makeQualified(lfs);
    jobConf.set("mapred.system.dir", logPath.toString());
    jobConf.set("hadoop.job.history.location", (new Path(logPath, "history").toString()));

    // start time for virtual clock
    // possible improvement: set default value to sth more meaningful based on
    // the 1st job
    long now = getTimeProperty(jobConf, "mumak.start.time", System.currentTimeMillis());

    jt = SimulatorJobTracker.startTracker(jobConf, now, this);

    masterRandomSeed = jobConf.getLong("mumak.random.seed", System.nanoTime());

    // max Map/Reduce tasks per node
    int maxMaps =
            .getInt("", SimulatorTaskTracker.DEFAULT_MAP_SLOTS);
    int maxReduces =

    MachineNode defaultNode =
        new MachineNode.Builder("default", 2)

    LoggedNetworkTopology topology =
        new ClusterTopologyReader(new Path(topologyFile), jobConf).get();
    // Setting the static mapping before removing numeric IP hosts.
    if (getConf().getBoolean("mumak.topology.filter-numeric-ips", true)) {
    ZombieCluster cluster = new ZombieCluster(topology, defaultNode);

    // create TTs based on topology.json
    long firstJobStartTime = startTaskTrackers(cluster, jobConf, now);

    long subRandomSeed =
        RandomSeedGenerator.getSeed("forSimulatorJobStoryProducer", masterRandomSeed);
    JobStoryProducer jobStoryProducer =
        new SimulatorJobStoryProducer(
            new Path(traceFile), cluster, firstJobStartTime, jobConf, subRandomSeed);

    final SimulatorJobSubmissionPolicy submissionPolicy =

    jc = new SimulatorJobClient(jt, jobStoryProducer, submissionPolicy);

    // if the taskScheduler is CapacityTaskScheduler start off the JobInitialization
    // threads too
    if (jobConf
        .equals(CapacityTaskScheduler.class.getName())) {"CapacityScheduler used: starting simulatorThreads");
    terminateTime = getTimeProperty(jobConf, "mumak.terminate.time", Long.MAX_VALUE);