Пример #1
0
  /** Read a source file and compile it up to the the current job's last barrier. */
  public boolean readSource(FileSource source) {
    // Add a new SourceJob for the given source. If a Job for the source
    // already exists, then we will be given the existing job.
    SourceJob job = addJob(source);

    if (job == null) {
      // addJob returns null if the job has already been completed, in
      // which case we can just ignore the request to read in the source.
      return true;
    }

    // Run the new job up to the currentJob's SourceJob's last barrier, to
    // make sure that dependencies are satisfied.
    Pass.ID barrier;

    if (currentJob != null) {
      if (currentJob.sourceJob().lastBarrier() == null) {
        throw new InternalCompilerError(
            "A Source Job which has "
                + "not reached a barrier cannot read another "
                + "source file.");
      }

      barrier = currentJob.sourceJob().lastBarrier().id();
    } else {
      barrier = Pass.FIRST_BARRIER;
    }

    // Make sure we reach at least the first barrier defined
    // in the base compiler.  This forces types to be constructed.
    // If FIRST_BARRIER is before "barrier",
    // then the second runToPass will just return true.
    return runToPass(job, barrier) && runToPass(job, Pass.FIRST_BARRIER);
  }
Пример #2
0
  /** Run a job up to the <code>goal</code> pass. */
  public boolean runToPass(Job job, Pass goal) {
    if (Report.should_report(Report.frontend, 1))
      Report.report(1, "Running " + job + " to pass " + goal);

    while (!job.pendingPasses().isEmpty()) {
      Pass pass = (Pass) job.pendingPasses().get(0);

      try {
        runPass(job, pass);
      } catch (CyclicDependencyException e) {
        // cause the pass to fail.
        job.finishPass(pass, false);
      }

      if (pass == goal) {
        break;
      }
    }

    if (job.completed()) {
      if (Report.should_report(Report.frontend, 1)) Report.report(1, "Job " + job + " completed");
    }

    return job.status();
  }
Пример #3
0
  public static void main(String[] args) throws Exception {
    String inputDirectory = "/home/cs246/Desktop/HW2/input";
    String outputDirectory = "/home/cs246/Desktop/HW2/output";
    String centroidDirectory = "/home/cs246/Desktop/HW2/config";

    int iterations = 20;

    for (int i = 1; i <= iterations; i++) {
      Configuration conf = new Configuration();

      String cFile = centroidDirectory + "/c" + i + ".txt";
      String nextCFile = centroidDirectory + "/c" + (i + 1) + ".txt";
      conf.set("CFILE", cFile);
      conf.set("NEXTCFILE", nextCFile);

      String cFile = centroidDirectory + "/c" + i + ".txt";
      String nextCFile = centroidDirectory + "/c" + (i + 1) + ".txt";
      conf.set("CFILE", cFile);
      conf.set("NEXTCFILE", nextCFile);

      Job job = new Job(conf, "HW2_Q4." + i);
      job.setJarByClass(HW2_Q4.class);
      job.setOutputKeyClass(IntWritable.class);
      job.setOutputValueClass(Text.class);
      job.setMapperClass(Map1.class);
      job.setReducerClass(Reduce1.class);
      job.setInputFormatClass(TextInputFormat.class);
      job.setOutputFormatClass(TextOutputFormat.class);

      FileInputFormat.addInputPath(job, new Path(inputDirectory));
      FileOutputFormat.setOutputPath(job, new Path(outputDirectory + "/output" + i));

      job.waitForCompletion(true);
    }
  }
Пример #4
0
  /** Run a job until the <code>goal</code> pass completes. */
  public boolean runToPass(Job job, Pass.ID goal) {
    if (Report.should_report(Report.frontend, 1))
      Report.report(1, "Running " + job + " to pass named " + goal);

    if (job.completed(goal)) {
      return true;
    }

    Pass pass = job.passByID(goal);

    return runToPass(job, pass);
  }
Пример #5
0
 /** Adds a dependency from the current job to the given Source. */
 public void addDependencyToCurrentJob(Source s) {
   if (s == null) return;
   if (currentJob != null) {
     Object o = jobs.get(s);
     if (o != COMPLETED_JOB) {
       if (Report.should_report(Report.frontend, 2)) {
         Report.report(2, "Adding dependency from " + currentJob.source() + " to " + s);
       }
       currentJob.sourceJob().addDependency(s);
     }
   } else {
     throw new InternalCompilerError("No current job!");
   }
 }
  void checkFormat(Job job) throws Exception {
    TaskAttemptContext attemptContext =
        new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2));

    MyClassMessagePackBase64LineInputFormat format = new MyClassMessagePackBase64LineInputFormat();
    FileInputFormat.setInputPaths(job, workDir);

    List<InputSplit> splits = format.getSplits(job);
    for (int j = 0; j < splits.size(); j++) {
      RecordReader<LongWritable, MyClassWritable> reader =
          format.createRecordReader(splits.get(j), attemptContext);
      reader.initialize(splits.get(j), attemptContext);

      int count = 0;
      try {
        while (reader.nextKeyValue()) {
          LongWritable key = reader.getCurrentKey();
          MyClassWritable val = reader.getCurrentValue();
          MyClass mc = val.get();
          assertEquals(mc.v, count);
          assertEquals(mc.s, Integer.toString(count));
          count++;
        }
      } finally {
        reader.close();
      }
    }
  }
Пример #7
0
  /** Run all pending passes on <code>job</code>. */
  public boolean runAllPasses(Job job) {
    List pending = job.pendingPasses();

    // Run until there are no more passes.
    if (!pending.isEmpty()) {
      Pass lastPass = (Pass) pending.get(pending.size() - 1);
      return runToPass(job, lastPass);
    }

    return true;
  }
Пример #8
0
  @Override
  public int run(String[] args) throws Exception {

    String locatorHost = args[0];
    int locatorPort = Integer.parseInt(args[1]);
    String hdfsHomeDir = args[2];

    System.out.println(
        "KnownKeysMRv2 invoked with args (locatorHost = "
            + locatorHost
            + " locatorPort = "
            + locatorPort
            + " hdfsHomeDir = "
            + hdfsHomeDir);

    Configuration conf = getConf();
    conf.set(GFInputFormat.INPUT_REGION, "partitionedRegion");
    conf.set(GFInputFormat.HOME_DIR, hdfsHomeDir);
    conf.setBoolean(GFInputFormat.CHECKPOINT, false);
    conf.set(GFOutputFormat.REGION, "validationRegion");
    conf.set(GFOutputFormat.LOCATOR_HOST, locatorHost);
    conf.setInt(GFOutputFormat.LOCATOR_PORT, locatorPort);

    Job job = Job.getInstance(conf, "knownKeysMRv2");
    job.setInputFormatClass(GFInputFormat.class);
    job.setOutputFormatClass(GFOutputFormat.class);

    job.setMapperClass(KnownKeysMRv2Mapper.class);
    job.setMapOutputKeyClass(GFKey.class);
    job.setMapOutputValueClass(PEIWritable.class);

    job.setReducerClass(KnownKeysMRv2Reducer.class);
    // job.setOutputKeyClass(String.class);
    // job.setOutputValueClass(ValueHolder.class);

    return job.waitForCompletion(false) ? 0 : 1;
  }
Пример #9
0
  static void realMain(String[] args) throws Throwable {
    // jmap doesn't work on Windows
    if (System.getProperty("os.name").startsWith("Windows")) return;

    final String childClassName = Job.class.getName();
    final String classToCheckForLeaks = Job.classToCheckForLeaks();
    final String uniqueID = String.valueOf(new Random().nextInt(Integer.MAX_VALUE));

    final String[] jobCmd = {
      java,
      "-Xmx8m",
      "-classpath",
      System.getProperty("test.classes", "."),
      childClassName,
      uniqueID
    };
    final Process p = new ProcessBuilder(jobCmd).start();

    final String childPid =
        match(
            commandOutputOf(jps, "-m"),
            "(?m)^ *([0-9]+) +\\Q" + childClassName + "\\E *" + uniqueID + "$",
            1);

    final int n0 = objectsInUse(p, childPid, classToCheckForLeaks);
    final int n1 = objectsInUse(p, childPid, classToCheckForLeaks);
    equal(p.waitFor(), 0);
    equal(p.exitValue(), 0);
    failed += p.exitValue();

    // Check that no objects were leaked.
    System.out.printf("%d -> %d%n", n0, n1);
    check(Math.abs(n1 - n0) < 2); // Almost always n0 == n1
    check(n1 < 20);
    drainers.shutdown();
  }
Пример #10
0
  /**
   * Before running <code>Pass pass</code> on <code>SourceJob job</code> make sure that all
   * appropriate scheduling invariants are satisfied, to ensure that all passes of other jobs that
   * <code>job</code> depends on will have already been done.
   */
  protected void enforceInvariants(Job job, Pass pass) throws CyclicDependencyException {
    SourceJob srcJob = job.sourceJob();
    if (srcJob == null) {
      return;
    }

    BarrierPass lastBarrier = srcJob.lastBarrier();
    if (lastBarrier != null) {
      // make sure that _all_ dependent jobs have completed at least up to
      // the last barrier (not just children).
      //
      // Ideally the invariant should be that only the source jobs that
      // job _depends on_ should be brought up to the last barrier.
      // This is work to be done in the future...
      List allDependentSrcs = new ArrayList(srcJob.dependencies());
      Iterator i = allDependentSrcs.iterator();
      while (i.hasNext()) {
        Source s = (Source) i.next();
        Object o = jobs.get(s);
        if (o == COMPLETED_JOB) continue;
        if (o == null) {
          throw new InternalCompilerError("Unknown source " + s);
        }
        SourceJob sj = (SourceJob) o;
        if (sj.pending(lastBarrier.id())) {
          // Make the job run up to the last barrier.
          // We ignore the return result, since even if the job
          // fails, we will keep on going and see
          // how far we get...
          if (Report.should_report(Report.frontend, 3)) {
            Report.report(3, "Running " + sj + " to " + lastBarrier.id() + " for " + srcJob);
          }
          runToPass(sj, lastBarrier.id());
        }
      }
    }

    if (pass instanceof GlobalBarrierPass) {
      // need to make sure that _all_ jobs have completed just up to
      // this global barrier.

      // If we hit a cyclic dependency, ignore it and run the other
      // jobs up to that pass.  Then try again to run the cyclic
      // pass.  If we hit the cycle again for the same job, stop.
      LinkedList barrierWorklist = new LinkedList(jobs.values());

      while (!barrierWorklist.isEmpty()) {
        Object o = barrierWorklist.removeFirst();
        if (o == COMPLETED_JOB) continue;
        SourceJob sj = (SourceJob) o;
        if (sj.completed(pass.id()) || sj.nextPass() == sj.passByID(pass.id())) {
          // the source job has either done this global pass
          // (which is possible if the job was loaded late in the
          // game), or is right up to the global barrier.
          continue;
        }

        // Make the job run up to just before the global barrier.
        // We ignore the return result, since even if the job
        // fails, we will keep on going and see
        // how far we get...
        Pass beforeGlobal = sj.getPreviousTo(pass.id());
        if (Report.should_report(Report.frontend, 3)) {
          Report.report(3, "Running " + sj + " to " + beforeGlobal.id() + " for " + srcJob);
        }

        // Don't use runToPass, since that catches the
        // CyclicDependencyException that we should report
        // back to the caller.
        while (!sj.pendingPasses().isEmpty()) {
          Pass p = (Pass) sj.pendingPasses().get(0);

          runPass(sj, p);

          if (p == beforeGlobal) {
            break;
          }
        }
      }
    }
  }
Пример #11
0
  /**
   * Run the pass <code>pass</code> on the job. Before running the pass on the job, if the job is a
   * <code>SourceJob</code>, then this method will ensure that the scheduling invariants are
   * enforced by calling <code>enforceInvariants</code>.
   */
  protected void runPass(Job job, Pass pass) throws CyclicDependencyException {
    // make sure that all scheduling invariants are satisfied before running
    // the next pass. We may thus execute some other passes on other
    // jobs running the given pass.
    try {
      enforceInvariants(job, pass);
    } catch (CyclicDependencyException e) {
      // A job that depends on this job is still running
      // an earlier pass.  We cannot continue this pass,
      // but we can just silently fail since the job we're
      // that depends on this one will eventually try
      // to run this pass again when it reaches a barrier.
      return;
    }

    if (getOptions().disable_passes.contains(pass.name())) {
      if (Report.should_report(Report.frontend, 1)) Report.report(1, "Skipping pass " + pass);
      job.finishPass(pass, true);
      return;
    }

    if (Report.should_report(Report.frontend, 1))
      Report.report(1, "Trying to run pass " + pass + " in " + job);

    if (job.isRunning()) {
      // We're currently running.  We can't reach the goal.
      throw new CyclicDependencyException(job + " cannot reach pass " + pass);
    }

    pass.resetTimers();

    boolean result = false;
    if (job.status()) {
      Job oldCurrentJob = this.currentJob;
      this.currentJob = job;
      Report.should_report.push(pass.name());

      // Stop the timer on the old pass. */
      Pass oldPass = oldCurrentJob != null ? oldCurrentJob.runningPass() : null;

      if (oldPass != null) {
        oldPass.toggleTimers(true);
      }

      job.setRunningPass(pass);
      pass.toggleTimers(false);

      result = pass.run();

      pass.toggleTimers(false);
      job.setRunningPass(null);

      Report.should_report.pop();
      this.currentJob = oldCurrentJob;

      // Restart the timer on the old pass. */
      if (oldPass != null) {
        oldPass.toggleTimers(true);
      }

      // pretty-print this pass if we need to.
      if (getOptions().print_ast.contains(pass.name())) {
        System.err.println("--------------------------------" + "--------------------------------");
        System.err.println("Pretty-printing AST for " + job + " after " + pass.name());

        PrettyPrinter pp = new PrettyPrinter();
        pp.printAst(job.ast(), new CodeWriter(System.err, 78));
      }

      // dump this pass if we need to.
      if (getOptions().dump_ast.contains(pass.name())) {
        System.err.println("--------------------------------" + "--------------------------------");
        System.err.println("Dumping AST for " + job + " after " + pass.name());

        NodeVisitor dumper = new DumpAst(new CodeWriter(System.err, 78));
        dumper = dumper.begin();
        job.ast().visit(dumper);
        dumper.finish();
      }

      // This seems to work around a VM bug on linux with JDK
      // 1.4.0.  The mark-sweep collector will sometimes crash.
      // Running the GC explicitly here makes the bug go away.
      // If this fails, maybe run with bigger heap.

      // System.gc();
    }

    Stats stats = getStats();
    stats.accumPassTimes(pass.id(), pass.inclusiveTime(), pass.exclusiveTime());

    if (Report.should_report(Report.time, 2)) {
      Report.report(
          2,
          "Finished "
              + pass
              + " status="
              + str(result)
              + " inclusive_time="
              + pass.inclusiveTime()
              + " exclusive_time="
              + pass.exclusiveTime());
    } else if (Report.should_report(Report.frontend, 1)) {
      Report.report(1, "Finished " + pass + " status=" + str(result));
    }

    job.finishPass(pass, result);
  }
Пример #12
0
  @Override
  void solve(Set<? extends Job> jobs) {
    Job[] sortedJobs = jobs.toArray(new Job[jobs.size()]);
    Arrays.sort(sortedJobs, Job::compareArrivalTime);

    processTime = totWT = 0;
    long usefulTime = 0;
    int jobCount = jobs.size();
    PriorityQueue<Job> queue = new PriorityQueue<>(Job::compareBurstTime);
    for (Job job : sortedJobs) {
      if (job == null) {
        jobCount--;
        continue;
      }

      while (!queue.isEmpty() && processTime < job.getArrivalTime()) {
        Job nextJob = queue.poll();
        long arrivalTime = nextJob.getArrivalTime();
        long burstTime = nextJob.getBurstTime();

        if (processTime < nextJob.getArrivalTime()) {
          processList.add(new RunningProcess("Idle", arrivalTime - processTime));
          processTime = arrivalTime;
        }

        processList.add(new RunningProcess("P" + nextJob.getId(), burstTime));
        usefulTime += burstTime;
        totWT += processTime - arrivalTime;
        processTime += burstTime;
      }

      queue.add(job);
    }

    while (!queue.isEmpty()) {
      Job nextJob = queue.poll();
      long arrivalTime = nextJob.getArrivalTime();
      long burstTime = nextJob.getBurstTime();

      if (processTime < nextJob.getArrivalTime()) {
        processList.add(new RunningProcess("Idle", arrivalTime - processTime));
        processTime = arrivalTime;
      }

      processList.add(new RunningProcess("P" + nextJob.getId(), burstTime));
      usefulTime += burstTime;
      totWT += processTime - arrivalTime;
      processTime += burstTime;
    }

    totRT = totWT;
    totTAT = totWT + usefulTime;

    avgRT = avgWT = (double) totWT / (double) jobCount;
    avgTAT = (double) totTAT / (double) jobCount;

    utilization = usefulTime * 100.0 / processTime;
  }
Пример #13
0
  @Override
  public void addJob(
      String jobName,
      byte type,
      String pathToJar,
      String className,
      String pathToData,
      String[] peers,
      int parNumber,
      int numberOfReducers,
      HashMap<String, Integer> finishedMappers)
      throws RemoteException {
    SysLogger.getInstance().info("Job " + jobName + " started");

    Logger logger = null;
    try {
      logger = new Logger(0, "..\\log\\WorkerNode.log");
    } catch (IncorrectLogFileException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    DFSClient dfs = DFSClient.getInstance();
    try {
      dfs.init("localhost", 20000, logger);
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    String localPathToJar = "..\\tasks\\" + jobName + ".jar";
    File jarFile = new File(localPathToJar);
    if (jarFile.exists()) {
      jarFile.delete();
    }
    String localPathToData = "..\\tasks\\" + jobName + ".dat";
    File dataFile = new File(localPathToData);
    if (dataFile.exists()) {
      dataFile.delete();
    }
    try {
      dfs.downloadFile(pathToJar, localPathToJar);
      if (type == MapReduce.TYPE_MAPPER) {
        dfs.downloadFile(pathToData, localPathToData);
      }
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    try {
      URL url = new URL("file:///" + jarFile.getAbsolutePath());
      URLClassLoader classLoader = new URLClassLoader(new URL[] {url});
      // todo make this
      Class c = classLoader.loadClass(className);

      MapReduce jobObject = (MapReduce) c.newInstance();
      Job job = null;
      if (type == MapReduce.TYPE_MAPPER) {
        job =
            new Job(
                jobName,
                type,
                jobObject,
                dataFile.getAbsolutePath(),
                peers,
                parNumber,
                numberOfReducers,
                finishedMappers);
      } else if (type == MapReduce.TYPE_REDUCER) {
        job =
            new Job(
                jobName,
                type,
                jobObject,
                pathToData,
                peers,
                parNumber,
                numberOfReducers,
                finishedMappers);
      }
      jobList.put(jobName, job);

      job.start(); // start new thread
      // return control
    } catch (MalformedURLException e) {
      e.printStackTrace();
    } catch (ClassNotFoundException e) {
      e.printStackTrace();
    } catch (InstantiationException e) {
      e.printStackTrace();
    } catch (IllegalAccessException e) {
      e.printStackTrace();
    }
  }
  public int run(String args[]) throws Exception {
    Configuration conf = getConf();

    /* creates a new job with given job name */
    Job job = Job.getInstance(conf, "Mat_vect_mul");
    job.setJarByClass(Mat_vect_mul.class);

    /* adds input-vector file to the cache */
    DistributedCache.addCacheFile(new Path(args[1]).toUri(), job.getConfiguration());

    /* set the key class and value class for the job output */
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    /* set mapper and reducer for the job */
    job.setMapperClass(Map1.class);
    job.setReducerClass(Reduce1.class);

    /* set the input foramt and output format for the job */
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    /* add path to the list of inputs for the map-reduce job */
    FileInputFormat.addInputPath(job, new Path(args[0]));

    /* set path of the output diretcory for the map-reduce job*/
    FileOutputFormat.setOutputPath(job, new Path(args[2]));

    /* Submit the job, then poll for progress until the job is complete */
    boolean succ = job.waitForCompletion(true);
    if (!succ) {
      System.out.println("Job failed, exiting");
      return -1;
    }

    return 0;
  }
Пример #15
0
  public static void main(String[] args) throws Exception {

    final String NAME_NODE = "hdfs://sandbox.hortonworks.com:8020";
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setJarByClass(WordCount.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(NullWritable.class);
    if (args.length > 2) {
      job.setNumReduceTasks(Integer.parseInt(args[2]));
    }

    job.setMapperClass(CountMapper.class);
    job.setReducerClass(CountReducer.class);
    job.setJarByClass(WordCount.class);
    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(args[0] + "data/plot_summaries.txt"));
    FileSystem fs = FileSystem.get(conf);
    // handle (e.g. delete) existing output path
    Path outputDestination = new Path(args[0] + args[1]);
    if (fs.exists(outputDestination)) {
      fs.delete(outputDestination, true);
    }

    // set output path & start job1
    FileOutputFormat.setOutputPath(job, outputDestination);
    int jobCompletionStatus = job.waitForCompletion(true) ? 0 : 1;
  }
Пример #16
0
  public static void main(String[] args)
      throws IOException, ClassNotFoundException, InterruptedException {

    Configuration conf = new Configuration();
    Job job = new Job(conf, "job");

    job.setJarByClass(PVidConvert.class);
    job.setMapperClass(Map1.class);
    job.setReducerClass(Reduce1.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path("/tmp/temporary_execution/"));
    job.waitForCompletion(true);

    Configuration conf1 = new Configuration();
    Job job1 = new Job(conf1, "job1");

    job1.setJarByClass(PVidConvert.class);
    job1.setMapperClass(Map2.class);
    job1.setReducerClass(Reduce2.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job1, new Path("/tmp/temporary_execution/"));
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));
    job1.waitForCompletion(true);
  }