Exemplo n.º 1
0
  public static JobControl createValueAggregatorJobs(
      String args[], Class<? extends ValueAggregatorDescriptor>[] descriptors) throws IOException {

    JobControl theControl = new JobControl("ValueAggregatorJobs");
    ArrayList<Job> dependingJobs = new ArrayList<Job>();
    JobConf aJobConf = createValueAggregatorJob(args);
    if (descriptors != null) setAggregatorDescriptors(aJobConf, descriptors);
    Job aJob = new Job(aJobConf, dependingJobs);
    theControl.addJob(aJob);
    return theControl;
  }
Exemplo n.º 2
0
  public static void main(String[] args) throws IOException {

    if (args.length != 3) {
      System.out.println("Parameters: inputDir outputDir parallel");
      System.exit(1);
    }
    String inputDir = args[0];
    String outputDir = args[1];
    String parallel = args[2];
    JobConf lp = new JobConf(L10.class);
    lp.setJobName("L10 Load Page Views");
    lp.setInputFormat(TextInputFormat.class);
    lp.setOutputKeyClass(MyType.class);
    lp.setOutputValueClass(Text.class);
    lp.setMapperClass(ReadPageViews.class);
    lp.setReducerClass(Group.class);
    lp.setPartitionerClass(MyPartitioner.class);
    Properties props = System.getProperties();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
      lp.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lp, new Path(inputDir + "/page_views"));
    FileOutputFormat.setOutputPath(lp, new Path(outputDir + "/L10out"));
    // Hardcode the parallel to 40 since MyPartitioner assumes it
    lp.setNumReduceTasks(40);
    Job group = new Job(lp);

    JobControl jc = new JobControl("L10 join");
    jc.addJob(group);

    new Thread(jc).start();

    int i = 0;
    while (!jc.allFinished()) {
      ArrayList<Job> failures = jc.getFailedJobs();
      if (failures != null && failures.size() > 0) {
        for (Job failure : failures) {
          System.err.println(failure.getMessage());
        }
        break;
      }

      try {
        Thread.sleep(5000);
      } catch (InterruptedException e) {
      }

      if (i % 10000 == 0) {
        System.out.println("Running jobs");
        ArrayList<Job> running = jc.getRunningJobs();
        if (running != null && running.size() > 0) {
          for (Job r : running) {
            System.out.println(r.getJobName());
          }
        }
        System.out.println("Ready jobs");
        ArrayList<Job> ready = jc.getReadyJobs();
        if (ready != null && ready.size() > 0) {
          for (Job r : ready) {
            System.out.println(r.getJobName());
          }
        }
        System.out.println("Waiting jobs");
        ArrayList<Job> waiting = jc.getWaitingJobs();
        if (waiting != null && waiting.size() > 0) {
          for (Job r : ready) {
            System.out.println(r.getJobName());
          }
        }
        System.out.println("Successful jobs");
        ArrayList<Job> success = jc.getSuccessfulJobs();
        if (success != null && success.size() > 0) {
          for (Job r : ready) {
            System.out.println(r.getJobName());
          }
        }
      }
      i++;
    }
    ArrayList<Job> failures = jc.getFailedJobs();
    if (failures != null && failures.size() > 0) {
      for (Job failure : failures) {
        System.err.println(failure.getMessage());
      }
    }
    jc.stop();
  }
Exemplo n.º 3
0
Arquivo: L4.java Projeto: kaituo/sedge
  public static void main(String[] args) throws IOException {

    JobConf lp = new JobConf(L4.class);
    lp.setJobName("Load Page Views");
    lp.setInputFormat(TextInputFormat.class);
    lp.setOutputKeyClass(Text.class);
    lp.setOutputValueClass(Text.class);
    lp.setMapperClass(ReadPageViews.class);
    lp.setCombinerClass(Combiner.class);
    lp.setReducerClass(Group.class);
    Properties props = System.getProperties();
    String dataDir = props.getProperty("PIGMIX_DIR", "/user/pig/tests/data/pigmix");
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
      lp.set((String) entry.getKey(), (String) entry.getValue());
    }
    FileInputFormat.addInputPath(lp, new Path(dataDir, "page_views"));
    FileOutputFormat.setOutputPath(
        lp, new Path("/user/" + System.getProperty("user.name") + "/L4out"));
    lp.setNumReduceTasks(40);
    Job group = new Job(lp);

    JobControl jc = new JobControl("L4 join");
    jc.addJob(group);

    new Thread(jc).start();

    int i = 0;
    while (!jc.allFinished()) {
      ArrayList<Job> failures = jc.getFailedJobs();
      if (failures != null && failures.size() > 0) {
        for (Job failure : failures) {
          System.err.println(failure.getMessage());
        }
        break;
      }

      try {
        Thread.sleep(5000);
      } catch (InterruptedException e) {
      }

      if (i % 10000 == 0) {
        System.out.println("Running jobs");
        ArrayList<Job> running = jc.getRunningJobs();
        if (running != null && running.size() > 0) {
          for (Job r : running) {
            System.out.println(r.getJobName());
          }
        }
        System.out.println("Ready jobs");
        ArrayList<Job> ready = jc.getReadyJobs();
        if (ready != null && ready.size() > 0) {
          for (Job r : ready) {
            System.out.println(r.getJobName());
          }
        }
        System.out.println("Waiting jobs");
        ArrayList<Job> waiting = jc.getWaitingJobs();
        if (waiting != null && waiting.size() > 0) {
          for (Job r : ready) {
            System.out.println(r.getJobName());
          }
        }
        System.out.println("Successful jobs");
        ArrayList<Job> success = jc.getSuccessfulJobs();
        if (success != null && success.size() > 0) {
          for (Job r : ready) {
            System.out.println(r.getJobName());
          }
        }
      }
      i++;
    }
    ArrayList<Job> failures = jc.getFailedJobs();
    if (failures != null && failures.size() > 0) {
      for (Job failure : failures) {
        System.err.println(failure.getMessage());
      }
    }
    jc.stop();
  }
  /** @param args */
  public static void main(String[] args) {

    File inputFile = new File(args[0]);
    File frameFile = new File(args[1]);
    File tempDir = new File(args[2]);
    String dbPath = args[3];

    try {
      JobControl jobControl = new JobControl("jsonld-entities");

      JobConf defaultConf = new JobConf();

      // Map the triples into JSON-LD fragments
      JobConf initialLoadConf = new JobConf(defaultConf);
      initialLoadConf.setInt("rank", 0);
      initialLoadConf.setStrings("frame-file", frameFile.toString());
      initialLoadConf.setMapperClass(TripleMapper.class);
      initialLoadConf.setReducerClass(EntityReducer.class);
      initialLoadConf.setInputFormat(TextInputFormat.class);
      initialLoadConf.setOutputFormat(TextOutputFormat.class);
      initialLoadConf.setMapOutputKeyClass(Text.class);
      initialLoadConf.setMapOutputValueClass(Text.class);
      initialLoadConf.setOutputKeyClass(Text.class);
      initialLoadConf.setOutputValueClass(Text.class);
      FileInputFormat.setInputPaths(initialLoadConf, new Path(inputFile.toString()));
      Path outputPath = new Path(tempDir.toString() + "/stage0");
      FileOutputFormat.setOutputPath(initialLoadConf, outputPath);
      Path prevOutput = outputPath;
      Job initialLoad = new Job(initialLoadConf);
      jobControl.addJob(initialLoad);

      // Aggregate JSON-LD fragments into nested structure
      EntityFrame entityFrame = new EntityFrame();
      entityFrame.parse(frameFile);
      Job prevJob = initialLoad;
      for (int rank = 1; rank <= entityFrame.getMaxRank(); rank++) {
        JobConf conf = new JobConf(defaultConf);
        conf.setInt("rank", rank);
        conf.setStrings("frame-file", frameFile.toString());
        conf.setMapperClass(IdentityMapper.class);
        conf.setReducerClass(EntityReducer.class);
        conf.setInputFormat(KeyValueTextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);
        conf.setMapOutputKeyClass(Text.class);
        conf.setMapOutputValueClass(Text.class);
        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);
        FileInputFormat.setInputPaths(conf, prevOutput);
        outputPath = new Path(tempDir.toString() + "/stage" + rank);
        FileOutputFormat.setOutputPath(conf, outputPath);
        prevOutput = outputPath;
        Job buildEntityJob = new Job(conf);
        jobControl.addJob(buildEntityJob);
        buildEntityJob.addDependingJob(prevJob);
        prevJob = buildEntityJob;
      }

      // Frame nested data
      JobConf frameConf = new JobConf(defaultConf);
      frameConf.setStrings("frame-file", frameFile.toString());
      frameConf.setMapperClass(IdentityMapper.class);
      frameConf.setReducerClass(EntityFrameReducer.class);
      frameConf.setInputFormat(KeyValueTextInputFormat.class);
      frameConf.setOutputFormat(MongoOutputFormat.class);
      frameConf.set("mongo.output.uri", dbPath);
      frameConf.set(
          "stream.io.identifier.resolver.class", "com.mongodb.hadoop.mapred.MongoOutputFormat");
      frameConf.setMapOutputKeyClass(Text.class);
      frameConf.setMapOutputValueClass(Text.class);
      frameConf.setOutputKeyClass(NullWritable.class);
      frameConf.setOutputValueClass(MongoUpdateWritable.class);
      FileInputFormat.setInputPaths(frameConf, prevOutput);
      Job frameEntitiesJob = new Job(frameConf);
      jobControl.addJob(frameEntitiesJob);
      frameEntitiesJob.addDependingJob(prevJob);

      FileSystem fs = FileSystem.get(defaultConf);
      fs.delete(new Path(tempDir.toString()), true);

      // Run pipeline
      jobControl.run();

    } catch (IOException e) {
      // TODO(simister): Auto-generated catch block
      e.printStackTrace();
    }
  }