public void configure(JobConf job) {
   bytesToWrite = job.getLong(RandomTextWriter.BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
   keymin = job.getInt(RandomTextWriter.MIN_KEY, 5);
   keymax = job.getInt(RandomTextWriter.MAX_KEY, 10);
   valmin = job.getInt(RandomTextWriter.MIN_VALUE, 5);
   valmax = job.getInt(RandomTextWriter.MAX_VALUE, 10);
 }
Beispiel #2
0
  private MiniMRCluster startCluster(JobConf conf, int numTrackers) throws IOException {
    conf.setLong("mapred.job.tracker.retiredjobs.cache.size", 1);
    conf.setLong("mapred.jobtracker.retirejob.interval", 0);
    conf.setLong("mapred.jobtracker.retirejob.check", 0);
    conf.getLong("mapred.jobtracker.completeuserjobs.maximum", 0);

    return new MiniMRCluster(0, 0, numTrackers, "file:///", 1, null, null, null, conf, 0);
  }
Beispiel #3
0
 public void configure(JobConf job) {
   this.jobConf = job;
   urlNormalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT);
   interval = jobConf.getInt("db.fetch.interval.default", 2592000);
   filters = new URLFilters(jobConf);
   scfilters = new ScoringFilters(jobConf);
   scoreInjected = jobConf.getFloat("db.score.injected", 1.0f);
   curTime = job.getLong("injector.current.time", System.currentTimeMillis());
 }
Beispiel #4
0
 /**
  * Calculate how many maps to run. Number of maps is bounded by a minimum of the cumulative size
  * of the copy / (distcp.bytes.per.map, default BYTES_PER_MAP or -m on the command line) and at
  * most (distcp.max.map.tasks, default MAX_MAPS_PER_NODE * nodes in the cluster).
  *
  * @param totalBytes Count of total bytes for job
  * @param job The job to configure
  * @return Count of maps to run.
  */
 private static void setMapCount(long totalBytes, JobConf job) throws IOException {
   int numMaps = (int) (totalBytes / job.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP));
   numMaps =
       Math.min(
           numMaps,
           job.getInt(
               MAX_MAPS_LABEL,
               MAX_MAPS_PER_NODE * new JobClient(job).getClusterStatus().getTaskTrackers()));
   job.setNumMapTasks(Math.max(numMaps, 1));
 }
Beispiel #5
0
    /**
     * Produce splits such that each is no greater than the quotient of the total size and the
     * number of splits requested.
     *
     * @param job The handle to the JobConf object
     * @param numSplits Number of splits requested
     */
    public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
      int cnfiles = job.getInt(SRC_COUNT_LABEL, -1);
      long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1);
      String srcfilelist = job.get(SRC_LIST_LABEL, "");
      if (cnfiles < 0 || cbsize < 0 || "".equals(srcfilelist)) {
        throw new RuntimeException(
            "Invalid metadata: #files("
                + cnfiles
                + ") total_size("
                + cbsize
                + ") listuri("
                + srcfilelist
                + ")");
      }
      Path src = new Path(srcfilelist);
      FileSystem fs = src.getFileSystem(job);
      FileStatus srcst = fs.getFileStatus(src);

      ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
      LongWritable key = new LongWritable();
      FilePair value = new FilePair();
      final long targetsize = cbsize / numSplits;
      long pos = 0L;
      long last = 0L;
      long acc = 0L;
      long cbrem = srcst.getLen();
      SequenceFile.Reader sl = null;
      try {
        sl = new SequenceFile.Reader(fs, src, job);
        for (; sl.next(key, value); last = sl.getPosition()) {
          // if adding this split would put this split past the target size,
          // cut the last split and put this next file in the next split.
          if (acc + key.get() > targetsize && acc != 0) {
            long splitsize = last - pos;
            splits.add(new FileSplit(src, pos, splitsize, (String[]) null));
            cbrem -= splitsize;
            pos = last;
            acc = 0L;
          }
          acc += key.get();
        }
      } finally {
        checkAndClose(sl);
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, (String[]) null));
      }

      return splits.toArray(new FileSplit[splits.size()]);
    }
  /** When no input dir is specified, generate random data. */
  protected static void confRandom(JobConf job) throws IOException {
    // from RandomWriter
    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(RandomMapOutput.class);

    final ClusterStatus cluster = new JobClient(job).getClusterStatus();
    int numMapsPerHost = job.getInt(RandomTextWriter.MAPS_PER_HOST, 10);
    long numBytesToWritePerMap =
        job.getLong(RandomTextWriter.BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
      throw new IOException("Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0");
    }
    long totalBytesToWrite =
        job.getLong(
            RandomTextWriter.TOTAL_BYTES,
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
      numMaps = 1;
      job.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite);
    }
    job.setNumMapTasks(numMaps);
  }
 /**
  * Get the desired maximum length of task's logs.
  *
  * @param conf the job to look in
  * @return the number of bytes to cap the log files at
  */
 public static long getTaskLogLength(JobConf conf) {
   return conf.getLong("mapred.userlog.limit.kb", 100) * 1024;
 }
  public CoronaJobHistory(JobConf conf, JobID jobId, String logPath) {
    try {
      this.conf = conf;
      this.jobId = jobId;
      if (logPath == null) {
        logPath =
            "file:///"
                + new File(System.getProperty("hadoop.log.dir", "/tmp")).getAbsolutePath()
                + File.separator
                + "history";
      }
      logDir = new Path(logPath);
      logDirFs = logDir.getFileSystem(conf);

      if (!logDirFs.exists(logDir)) {
        LOG.info("Creating history folder at " + logDir);
        if (!logDirFs.mkdirs(logDir, new FsPermission(HISTORY_DIR_PERMISSION))) {
          throw new IOException("Mkdirs failed to create " + logDir.toString());
        }
      }
      conf.set("hadoop.job.history.location", logDir.toString());
      disableHistory = false;

      // set the job history block size (default is 3MB)
      jobHistoryBlockSize =
          conf.getLong("mapred.jobtracker.job.history.block.size", 3 * 1024 * 1024);

      doneDir = new Path(logDir, "done");
      doneDirFs = logDirFs;

      if (!doneDirFs.exists(doneDir)) {
        LOG.info("Creating DONE folder at " + doneDir);
        if (!doneDirFs.mkdirs(doneDir, new FsPermission(HISTORY_DIR_PERMISSION))) {
          throw new IOException("Mkdirs failed to create " + doneDir);
        }
      }

      String logFileName =
          encodeJobHistoryFileName(CoronaJobHistoryFilesManager.getHistoryFilename(jobId));
      logFile = new Path(logDir, logFileName);
      doneFile = new Path(doneDir, logFileName);

      // initialize the file manager
      conf.setInt("mapred.jobtracker.historythreads.maximum", 1);
      fileManager =
          new CoronaJobHistoryFilesManager(
              conf,
              new JobHistoryObserver() {
                public void historyFileCopied(JobID jobid, String historyFile) {}
              },
              logDir);

      fileManager.setDoneDir(doneDir);

      // sleeping with the past means tolerating two start methods instead of one
      fileManager.start();
      fileManager.startIOExecutor();

    } catch (IOException e) {
      LOG.error("Failed to initialize JobHistory log file", e);
      disableHistory = true;
    }
  }
Beispiel #9
0
  /**
   * Initiate components in the simulation. The JobConf is create separately and passed to the
   * init().
   *
   * @param JobConf: The configuration for the jobtracker.
   * @throws InterruptedException
   * @throws IOException if trace or topology files cannot be opened.
   */
  @SuppressWarnings("deprecation")
  void init(JobConf jobConf) throws InterruptedException, IOException {

    FileSystem lfs = FileSystem.getLocal(getConf());
    Path logPath = new Path(System.getProperty("hadoop.log.dir")).makeQualified(lfs);
    jobConf.set("mapred.system.dir", logPath.toString());
    jobConf.set("hadoop.job.history.location", (new Path(logPath, "history").toString()));

    // start time for virtual clock
    // possible improvement: set default value to sth more meaningful based on
    // the 1st job
    long now = getTimeProperty(jobConf, "mumak.start.time", System.currentTimeMillis());

    jt = SimulatorJobTracker.startTracker(jobConf, now, this);
    jt.offerService();

    masterRandomSeed = jobConf.getLong("mumak.random.seed", System.nanoTime());

    // max Map/Reduce tasks per node
    int maxMaps =
        getConf()
            .getInt("mapred.tasktracker.map.tasks.maximum", SimulatorTaskTracker.DEFAULT_MAP_SLOTS);
    int maxReduces =
        getConf()
            .getInt(
                "mapred.tasktracker.reduce.tasks.maximum",
                SimulatorTaskTracker.DEFAULT_REDUCE_SLOTS);

    MachineNode defaultNode =
        new MachineNode.Builder("default", 2)
            .setMapSlots(maxMaps)
            .setReduceSlots(maxReduces)
            .build();

    LoggedNetworkTopology topology =
        new ClusterTopologyReader(new Path(topologyFile), jobConf).get();
    // Setting the static mapping before removing numeric IP hosts.
    setStaticMapping(topology);
    if (getConf().getBoolean("mumak.topology.filter-numeric-ips", true)) {
      removeIpHosts(topology);
    }
    ZombieCluster cluster = new ZombieCluster(topology, defaultNode);

    // create TTs based on topology.json
    long firstJobStartTime = startTaskTrackers(cluster, jobConf, now);

    long subRandomSeed =
        RandomSeedGenerator.getSeed("forSimulatorJobStoryProducer", masterRandomSeed);
    JobStoryProducer jobStoryProducer =
        new SimulatorJobStoryProducer(
            new Path(traceFile), cluster, firstJobStartTime, jobConf, subRandomSeed);

    final SimulatorJobSubmissionPolicy submissionPolicy =
        SimulatorJobSubmissionPolicy.getPolicy(jobConf);

    jc = new SimulatorJobClient(jt, jobStoryProducer, submissionPolicy);
    queue.addAll(jc.init(firstJobStartTime));

    // if the taskScheduler is CapacityTaskScheduler start off the JobInitialization
    // threads too
    if (jobConf
        .get("mapred.jobtracker.taskScheduler")
        .equals(CapacityTaskScheduler.class.getName())) {
      LOG.info("CapacityScheduler used: starting simulatorThreads");
      startSimulatorThreadsCapSched(now);
    }
    terminateTime = getTimeProperty(jobConf, "mumak.terminate.time", Long.MAX_VALUE);
  }