예제 #1
0
  public int run(String[] args) throws Exception {
    Path inputPath = new Path("weblog_entries.txt");
    Path outputPath = new Path("output");
    Schema schema = ReflectData.get().getSchema(WeblogRecord.class);
    Configuration conf = getConf();

    Job weblogJob = Job.getInstance(conf);
    weblogJob.setJobName("Avro Writer");
    weblogJob.setJarByClass(getClass());

    weblogJob.setNumReduceTasks(0);
    weblogJob.setMapperClass(WeblogMapper_Ex_5.class);
    weblogJob.setMapOutputKeyClass(AvroWrapper.class);
    weblogJob.setMapOutputValueClass(NullWritable.class);

    weblogJob.setInputFormatClass(TextInputFormat.class);

    AvroJob.setOutputKeySchema(weblogJob, schema);

    FileInputFormat.setInputPaths(weblogJob, inputPath);
    FileOutputFormat.setOutputPath(weblogJob, outputPath);

    if (weblogJob.waitForCompletion(true)) {
      return 0;
    }
    return 1;
  }
예제 #2
0
  private boolean runJob(Configuration conf)
      throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(conf, "WordCount");
    job.setJarByClass(WordCount.class);

    // Configure input format and files
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(inputDir));

    // Configure output format and files
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));

    // set up mapper, combiner and reducer
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);
    job.setCombinerClass(WordCountReducer.class);

    // set sorting, grouping and partitioning
    // set key and value types
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true);
  }
예제 #3
0
 /**
  * Get job corresponding to jobid.
  *
  * @param jobId
  * @return object of {@link Job}
  * @throws IOException
  * @throws InterruptedException
  */
 public Job getJob(JobID jobId) throws IOException, InterruptedException {
   JobStatus status = client.getJobStatus(jobId);
   if (status != null) {
     return Job.getInstance(this, status, new JobConf(status.getJobFile()));
   }
   return null;
 }
  public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    Job job =
        Job.getInstance(
            getConf(),
            "Import vessel locations from files in "
                + args[0]
                + " into table cdb_vessel:vessel_location"); // co

    FileInputFormat.addInputPath(job, new Path(args[0]));

    job.setJarByClass(ImportVTLocationFromFileWithReducer.class);
    job.setJobName("Vessel_location_injection");
    job.setInputFormatClass(VTVesselLocationFileInputFormat.class);
    job.setMapOutputKeyClass(Key_IMOAndRecordTime.class);
    job.setMapOutputValueClass(TextArrayWritable.class);

    job.setPartitionerClass(Partitioner_IMO.class);
    job.setGroupingComparatorClass(GroupComparator_IMO.class);

    job.setReducerClass(ImportReducer.class);
    job.setNumReduceTasks(Integer.parseInt(args[1]));

    job.setOutputFormatClass(NullOutputFormat.class);

    return job.waitForCompletion(true) ? 0 : 1;
  }
예제 #5
0
  private void doMapReduce() {
    try {
      Job job = Job.getInstance();

      job.getConfiguration().set(OutputFormat.NAMESPACE, "/");
      job.getConfiguration().set(OutputFormat.TABLE, "LoadTest");
      job.getConfiguration().setInt(OutputFormat.MUTATOR_FLAGS, MutatorFlag.NO_LOG_SYNC.getValue());
      job.getConfiguration().setInt(OutputFormat.MUTATOR_FLUSH_INTERVAL, 0);
      job.getConfiguration().setInt("LoadSplit.TOTAL_ROWS", this.totalRows);
      job.getConfiguration().setInt("LoadSplit.CLIENTS", this.clients);
      job.setJarByClass(LoadTest.class);
      job.setJobName("Hypertable MapReduce connector LoadTest");
      job.setInputFormatClass(LoadInputFormat.class);
      job.setOutputFormatClass(OutputFormat.class);
      job.setMapOutputKeyClass(KeyWritable.class);
      job.setMapOutputValueClass(BytesWritable.class);
      job.setMapperClass(LoadMapper.class);
      job.setReducerClass(LoadReducer.class);
      job.setNumReduceTasks(this.clients);

      job.waitForCompletion(true);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
예제 #6
0
 @Override
 public int run(String[] args) throws Exception {
   if (args.length < 7) {
     System.exit(-1);
   }
   Configuration conf = new Configuration();
   Job job = Job.getInstance(conf, "OSM-Gridding");
   job.setJarByClass(OSMGrid.class);
   job.setOutputKeyClass(WritablePoint.class);
   job.setOutputValueClass(LongWritable.class);
   job.setMapperClass(OSMMapper.class);
   job.setPartitionerClass(GridPartitioner.class);
   job.setReducerClass(OSMReducer.class);
   job.setInputFormatClass(TextInputFormat.class);
   job.setOutputFormatClass(TextOutputFormat.class);
   FileInputFormat.addInputPath(job, new Path(args[0]));
   FileOutputFormat.setOutputPath(job, new Path(args[1]));
   job.getConfiguration().set(OSMMapper.MINLAT, args[2]);
   job.getConfiguration().set(OSMMapper.MINLON, args[3]);
   job.getConfiguration().set(OSMMapper.MAXLAT, args[4]);
   job.getConfiguration().set(OSMMapper.MAXLON, args[5]);
   job.getConfiguration().set(OSMReducer.GRID, args[6]);
   job.setNumReduceTasks(Integer.parseInt(args[6]) * Integer.parseInt(args[6]));
   boolean succ = job.waitForCompletion(true);
   return succ ? 0 : 1;
 }
  /** Job configuration. */
  public static Job configureJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    String columnFamily = args[1];
    String outputPath = args[2];
    String rowKeyType = args[3];
    conf.set("row.key.type", rowKeyType);
    conf.set("table.name", tableName);
    Scan scan = new Scan();
    scan.addFamily(Bytes.toBytes(columnFamily));
    scan.setBatch(ConstantsTruthy.TRUTHY_TABLE_SCAN_BATCH);

    conf.set("mapred.map.tasks.speculative.execution", "false");
    conf.set("mapred.reduce.tasks.speculative.execution", "false");
    Job job =
        Job.getInstance(
            conf, "Count the column count and indexRecordSize for each row in " + tableName);
    job.setJarByClass(TruthyIndexFeatureCounter.class);
    TableMapReduceUtil.initTableMapperJob(
        tableName, scan, TfcMapper.class, Text.class, Text.class, job, true);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    TableMapReduceUtil.addDependencyJars(job);
    return job;
  }
예제 #8
0
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 2) {
      System.err.println("Usage: MapRedAWithIP <in> <out>");
      System.exit(2);
    }

    Job job = Job.getInstance(conf, "A RRs with IP Addresses");

    job.setInputFormatClass(DNSFileInputFormat.class);

    job.setJarByClass(MapRedARRs.class);

    job.setMapperClass(MapRecords.class);
    job.setReducerClass(ReduceRecords.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DNSRecordIO.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    MapRedFileUtils.deleteDir(otherArgs[1]);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
예제 #9
0
  public static void main(String[] args) throws Exception {
    {
      Configuration conf = new Configuration();
      conf.set("mapreduce.job.jar", "wcwin.jar"); // 此处代码,一定放在Job任务前面,否则会报类找不到的异常
      Job job = Job.getInstance(conf, "winjob");
      getConf(conf);
      job.setJarByClass(WordCount.class);
      job.setMapperClass(WMapper.class);
      job.setReducerClass(WReducer.class);
      job.setInputFormatClass(TextInputFormat.class);
      job.setOutputFormatClass(TextOutputFormat.class);

      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(IntWritable.class);
      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(Text.class);

      String path = "/wc/output";
      //			FileSystem fs = FileSystem.get(conf);
      Path p = new Path(path);
      //			if (fs.exists(p)) {
      //				fs.delete(p, true);
      //				System.out.println("输出路径存在,已删除!");
      //			}

      FileInputFormat.setInputPaths(job, "/wc/srcdata");
      FileOutputFormat.setOutputPath(job, p);
      printEnv(job);
      System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
  }
예제 #10
0
  public int run(String[] args) throws Exception {
    String input = "hdfs://master:9000/tmp/logs/merge_data/";
    String output = "hdfs://master:9000/tmp/logs/merge_data/result";

    Configuration conf = getConf();
    Job job = Job.getInstance(conf, MergeExample.class.getSimpleName());

    // 设置jar
    job.setJarByClass(MergeExample.class);
    job.setInputFormatClass(TextInputFormat.class);
    // 设置Mapper相关的属性
    job.setMapperClass(MergeExampleMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, input); //
    // 设置Reducer相关属性
    // job.setReducerClass(MergeExampleReducer.class);
    // job.setOutputKeyClass(Text.class);
    // job.setOutputValueClass(LongWritable.class);

    FileOutputFormat.setOutputPath(job, new Path(output));
    // 提交任务
    job.setOutputFormatClass(TextOutputFormat.class);
    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
  }
  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Repartition Join (projection by text)");
    job.setJarByClass(ReplicatedJoinBasic.class);

    // Input parameters
    Path donationsPath = new Path(args[0]);
    Path projectsPath = new Path(args[1]);
    Path outputPath = new Path(args[2]);

    // Mappers configuration
    MultipleInputs.addInputPath(
        job, donationsPath, SequenceFileInputFormat.class, DonationsMapper.class);
    MultipleInputs.addInputPath(
        job, projectsPath, SequenceFileInputFormat.class, ProjectsMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // Reducer configuration
    job.setNumReduceTasks(3);
    job.setReducerClass(JoinReducer.class);

    FileOutputFormat.setOutputPath(job, outputPath);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
예제 #12
0
  public static void runDailyTrendEstimationJob(String inputPath) throws Exception {

    Configuration conf = new Configuration();
    Job job;
    try {
      conf.set("mongo.output.uri", "mongodb://52.33.93.221:27017/mongo_hadoop.page_trends");
      job = Job.getInstance(conf, "dataclean");

      job.setJarByClass(DataCleanJob.class);
      job.setMapperClass(DataCleanMapper.class);
      job.setReducerClass(DataCleanReducer.class);

      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(PageDataValue.class);
      job.setOutputKeyClass(NullWritable.class);
      job.setOutputValueClass(MongoUpdateWritable.class);

      FileInputFormat.setInputDirRecursive(job, true);
      FileInputFormat.addInputPath(job, new Path(inputPath));

      MongoConfig mongoConfig = new MongoConfig(conf);
      mongoConfig.setOutputFormat(MongoOutputFormat.class);

      job.setOutputFormatClass(MongoOutputFormat.class);

      System.exit(job.waitForCompletion(true) ? 0 : 1);

    } catch (IOException e) {
      e.printStackTrace();
    } catch (ClassNotFoundException e) {
      e.printStackTrace();
    } catch (InterruptedException e) {
      e.printStackTrace();
    }
  }
예제 #13
0
  public static void runTrendCalcJob(String inputPath, String outputPath)
      throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    Job job;
    try {
      job = Job.getInstance(conf, "Trend Calc");
      job.setInputFormatClass(TextInputFormat.class);

      job.setMapperClass(TrendMapper.class);
      job.setReducerClass(TrendTopCalcReducer.class);

      job.setMapOutputKeyClass(TrendKey.class);
      job.setMapOutputValueClass(Text.class);
      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(Text.class);

      job.setJarByClass(DataCleanJob.class);

      FileInputFormat.addInputPath(job, new Path(inputPath));
      FileOutputFormat.setOutputPath(job, new Path(outputPath));

      job.waitForCompletion(true);
    } catch (IOException e) {
      e.printStackTrace();
    } catch (ClassNotFoundException e) {
      e.printStackTrace();
    } catch (InterruptedException e) {
      e.printStackTrace();
    }
  }
예제 #14
0
  /** Job configuration. */
  public static Job configureJob(Configuration conf, String[] args) throws IOException {
    Scan scan = new Scan();
    scan.addFamily(Cw09Constants.CF_FREQUENCIES_BYTES);
    scan.setBatch(Cw09Constants.CW09_INDEX_SCAN_BATCH);

    conf.set("mapred.map.tasks.speculative.execution", "false");
    conf.set("mapred.reduce.tasks.speculative.execution", "false");
    Job job = Job.getInstance(conf, "Count the total frequency of each term in the index table");
    job.setJarByClass(TermHitsCounter.class);
    // TableMapReduceUtil.initTableMapperJob(Constants.CLUEWEB09_INDEX_TABLE_NAME, scan,
    //		ThcMapper.class, Text.class, LongWritable.class, job);
    TableMapReduceUtil.initTableMapperJob(
        Cw09Constants.CLUEWEB09_INDEX_TABLE_NAME,
        scan,
        ThcMapper.class,
        Text.class,
        LongWritable.class,
        job,
        true,
        CustomizedSplitTableInputFormat.class);
    job.setCombinerClass(ThcCombiner.class);
    TableMapReduceUtil.initTableReducerJob(
        Cw09Constants.CLUEWEB09_TERM_COUNT_TABLE_NAME, ThcReducer.class, job);
    job.setNumReduceTasks(40);
    return job;
  }
예제 #15
0
  private static void StartingJob()
      throws IOException, InterruptedException, ClassNotFoundException {

    conf = new Configuration();
    fs = FileSystem.get(conf);
    conf.setLong("my.vertex.num", num);
    job = Job.getInstance(conf, "Levelized Nested Dissection Starting");

    job.setJarByClass(LevNestDissectJob.class);
    job.setMapperClass(StartVertexMapper.class);
    job.setReducerClass(StartVertexReducer.class);

    in = out.suffix("/" + outPath_count);
    FileInputFormat.addInputPath(job, in);

    out_start = out.suffix("/" + outPath_start);
    if (fs.exists(out_start)) {
      fs.delete(out_start, true);
    }
    FileOutputFormat.setOutputPath(job, out_start);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(VertexWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.waitForCompletion(true);

    depth = depth == 0 ? depth + 1 : depth;
    wasStart = true;
  }
예제 #16
0
  @Override
  public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, AnalysisMapReduce.class.getName());
    job.setJarByClass(AnalysisMapReduce.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(AnalysicMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    //		job.setNumReduceTasks(0);

    job.setNumReduceTasks(1);

    job.setReducerClass(AnalysicReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(NullWritable.class);

    Path out = new Path(args[1]);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(out)) {
      fs.delete(out, true);
    }
    FileOutputFormat.setOutputPath(job, out);

    return job.waitForCompletion(true) ? 0 : 1;
  }
  @Override
  public int run(String[] args) throws Exception {

    String instance = args[0];
    String zookeepers = args[1];
    String user = args[2];
    String tokenFile = args[3];
    String input = args[4];
    String tableName = args[5];

    Job job = Job.getInstance(getConf());
    job.setJobName(TokenFileWordCount.class.getName());
    job.setJarByClass(this.getClass());

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, input);

    job.setMapperClass(MapClass.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Mutation.class);

    // AccumuloInputFormat not used here, but it uses the same functions.
    AccumuloOutputFormat.setZooKeeperInstance(
        job, ClientConfiguration.loadDefault().withInstance(instance).withZkHosts(zookeepers));
    AccumuloOutputFormat.setConnectorInfo(job, user, tokenFile);
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, tableName);

    job.waitForCompletion(true);
    return 0;
  }
예제 #18
0
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "simple feature writer");

    job.setJarByClass(FeatureWriterJob.class);
    job.setMapperClass(MyMapper.class);
    job.setInputFormatClass(GeoMesaInputFormat.class);
    job.setOutputFormatClass(GeoMesaOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(ScalaSimpleFeature.class);
    job.setNumReduceTasks(0);

    Map<String, String> params = new HashMap<String, String>();
    params.put("instanceId", "myinstance");
    params.put("zookeepers", "zoo1,zoo2,zoo3");
    params.put("user", "myuser");
    params.put("password", "mypassword");
    params.put("tableName", "mycatalog");

    Query query = new Query("myfeature", ECQL.toFilter("BBOX(geom, -165,5,-50,75)"));

    GeoMesaInputFormat.configure(job, params, query);

    Map<String, String> outParams = new HashMap<String, String>();
    outParams.put("instanceId", "myinstance");
    outParams.put("zookeepers", "zoo1,zoo2,zoo3");
    outParams.put("user", "myuser");
    outParams.put("password", "mypassword");
    outParams.put("tableName", "mycatalog_2");

    GeoMesaOutputFormat.configureDataStore(job, outParams);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
예제 #19
0
  /**
   * Create a map-only Hadoop Job out of the passed in parameters.  Does not set the
   * Job name.
   *
   * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)
   */
  @SuppressWarnings("rawtypes")
	public static Job prepareJob(Path inputPath,
                                 Path outputPath,
                                 Class<? extends InputFormat> inputFormat,
                                 Class<? extends Mapper> mapper,
                                 Class<? extends Writable> mapperKey,
                                 Class<? extends Writable> mapperValue,
                                 Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException {

    //Job job = new Job(new Configuration(conf));
  	Job job = Job.getInstance(conf);
    Configuration jobConf = job.getConfiguration();

    if (mapper.equals(Mapper.class)) {
        throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    }
    job.setJarByClass(mapper);

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);
    job.setOutputKeyClass(mapperKey);
    job.setOutputValueClass(mapperValue);
    jobConf.setBoolean("mapred.compress.map.output", true);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
  }
예제 #20
0
  @Override
  public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
      System.err.println("Usage: topreviews <in> [<in>...] <out>");
      System.exit(2);
    }

    Job job = Job.getInstance(conf, "Top Five Reviews");
    job.setJarByClass(TopFive.class);
    job.setPartitionerClass(NaturalKeyPartitioner.class);
    job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setMapperClass(TopFiveMapper.class);
    job.setReducerClass(TopFiveReducer.class);

    job.setMapOutputKeyClass(TextPair.class);
    job.setMapOutputValueClass(TextPair.class);

    job.setOutputKeyClass(TextPair.class);
    job.setOutputValueClass(TextPair.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
  }
예제 #21
0
  public int run(String[] args) throws Exception {
    // Check input arguments
    if (args.length != 2) {
      System.out.println("Usage: firstprog <input HIB> <output directory>");
      System.exit(0);
    }

    // Initialize and configure MapReduce job
    Job job = Job.getInstance();
    // Set input format class which parses the input HIB and spawns map tasks
    //        job.setInputFormatClass(ImageBundleInputFormat.class);
    job.setInputFormatClass(HibInputFormat.class);
    // Set the driver, mapper, and reducer classes which express the computation
    job.setJarByClass(SampleProgram.class);
    job.setMapperClass(SampleProgramMapper.class);
    job.setReducerClass(SampleProgramReducer.class);
    // Set the types for the key/value pairs passed to/from map and reduce layers
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(FloatImage.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    // Set the input and output paths on the HDFS
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // Execute the MapReduce job and block until it complets
    boolean success = job.waitForCompletion(true);

    // Return success or failure
    return success ? 0 : 1;
  }
  public static void main(String[] args) throws Exception {

    JobConf conf = new JobConf(AccessProcessJob.class);
    conf.set(nameNode, hdfsURL);
    conf.setJobName("AccessProcessJob");
    Job job = Job.getInstance(conf, "AccessProcessJob");

    new Path(outputPath).getFileSystem(conf).delete(new Path(outputPath), true);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(AccessProcessMap.class);
    conf.setReducerClass(AccessProcessReduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(1);

    JobClient.runJob(conf);
  }
예제 #23
0
  @Override
  public int run(String[] args) throws Exception {
    final int ret = parseArgs(args);
    if (ret < 0) {
      return ret;
    }

    Job job = Job.getInstance(getConf());
    job.setJarByClass(GreeDiFirst.class);
    job.setJobName(String.format("Coverage-GreeDiFirst[%s %s]", partitionCount, selectCount));

    job.getConfiguration().setInt(Fields.PARTITION_COUNT.get(), partitionCount);
    job.getConfiguration().setInt(Fields.SELECT_COUNT.get(), selectCount);

    job.setNumReduceTasks(partitionCount);

    SetupHelper.getInstance().setSequenceInput(job, inputPath).setTextOutput(job, outputPath);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DocumentWithVectorWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(GreeDiReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(getConf()).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
  }
  @Before
  public void setup() throws IOException {
    job = Job.getInstance();
    conf = job.getConfiguration();
    attemptId = new TaskAttemptID();
    Path inputPath = new Path(TABLE_PATH_STR);
    inputSplit = new FileSplit(inputPath, 0, 1, null);
    Descriptor desc =
        new Descriptor(new File(TABLE_PATH_STR), "keyspace", "columnFamily", 1, false);

    doReturn(desc).when(ssTableColumnRecordReader).getDescriptor();
    doReturn(desc).when(ssTableRowRecordReader).getDescriptor();

    doNothing()
        .when(ssTableColumnRecordReader)
        .copyTablesToLocal(any(FileSplit.class), any(TaskAttemptContext.class));
    doNothing()
        .when(ssTableRowRecordReader)
        .copyTablesToLocal(any(FileSplit.class), any(TaskAttemptContext.class));

    doReturn(ssTableReader)
        .when(ssTableColumnRecordReader)
        .openSSTableReader(any(IPartitioner.class), any(CFMetaData.class));
    doReturn(ssTableReader)
        .when(ssTableRowRecordReader)
        .openSSTableReader(any(IPartitioner.class), any(CFMetaData.class));
    when(ssTableReader.getDirectScanner(null)).thenReturn(tableScanner);
  }
예제 #25
0
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
      System.err.println("Usage: RemoveDup <in> [<in>...] <out>");
      System.exit(2);
    }

    // 删除输出目录(可选,省得多次运行时,总是报OUTPUT目录已存在)
    // HDFSUtil.deleteFile(conf, otherArgs[otherArgs.length - 1]);

    Job job = Job.getInstance(conf, "RemoveDup");
    job.setJarByClass(RemoveDup.class);
    job.setMapperClass(RemoveDupMapper.class);
    job.setCombinerClass(RemoveDupReducer.class);
    job.setReducerClass(RemoveDupReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    for (int i = 0; i < otherArgs.length - 1; ++i) {
      FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
예제 #26
0
  @Override
  public int run(String[] arg0) throws Exception {
    Job job = Job.getInstance(getConf(), "PopulationJob");
    Configuration conf = job.getConfiguration();
    job.setJarByClass(Population.class);

    Path out = new Path("totalorder");
    FileInputFormat.setInputPaths(job, "populations");
    FileOutputFormat.setOutputPath(job, out);
    out.getFileSystem(conf).delete(out, true);

    job.setMapperClass(PopulationMapper.class);
    job.setReducerClass(PopulationReducer.class);
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(5);

    // Configure the TotalOrderPartitioner here...
    job.setPartitionerClass(TotalOrderPartitioner.class);
    InputSampler.Sampler<Text, Text> sampler =
        new InputSampler.RandomSampler<Text, Text>(0.1, 200, 3);
    InputSampler.writePartitionFile(job, sampler);
    String partitionFile = TotalOrderPartitioner.getPartitionFile(conf);
    URI partitionURI = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    job.addCacheFile(partitionURI);

    return job.waitForCompletion(true) ? 0 : 1;
  }
예제 #27
0
 private Job[] getJobs(JobStatus[] stats) throws IOException {
   List<Job> jobs = new ArrayList<Job>();
   for (JobStatus stat : stats) {
     jobs.add(Job.getInstance(this, stat, new JobConf(stat.getJobFile())));
   }
   return jobs.toArray(new Job[0]);
 }
예제 #28
0
  @Override
  public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName("TeraSortCloud");
    job.setJarByClass(this.getClass());
    Opts opts = new Opts();
    opts.parseArgs(TeraSortIngest.class.getName(), args);

    job.setInputFormatClass(RangeInputFormat.class);
    job.setMapperClass(SortGenMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    opts.setAccumuloConfigs(job);
    BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L * 1000 * 1000);
    AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);

    Configuration conf = job.getConfiguration();
    conf.setLong(NUMROWS, opts.numRows);
    conf.setInt("cloudgen.minkeylength", opts.minKeyLength);
    conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength);
    conf.setInt("cloudgen.minvaluelength", opts.minValueLength);
    conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength);
    conf.set("cloudgen.tablename", opts.getTableName());

    if (args.length > 10) conf.setInt(NUMSPLITS, opts.splits);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
  }
  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {

    if (args.length != 2) throw new IllegalArgumentException(args.length + " usage: ... ");

    String bitvectorpath = args[0], outputPath = args[1];

    Configuration conf = new Configuration();
    Job job = Job.getInstance(new Cluster(conf), conf);
    job.setJobName("int key replace phase1");
    job.setJarByClass(OutlinkGrowthAnalysis.class);

    job.setMapperClass(BVIdentitiyMapper.class);
    job.setReducerClass(AnaylseOLGrowthReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TabSeperatedTextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileInputFormat.setInputPaths(job, new Path(bitvectorpath));

    job.setNumReduceTasks(1);
    job.waitForCompletion(true);
  }
예제 #30
0
  public static void main(String[] args) throws Exception {

    final String NAME_NODE = "hdfs://sandbox.hortonworks.com:8020";
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setJarByClass(WordCount.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(NullWritable.class);
    if (args.length > 2) {
      job.setNumReduceTasks(Integer.parseInt(args[2]));
    }

    job.setMapperClass(CountMapper.class);
    job.setReducerClass(CountReducer.class);
    job.setJarByClass(WordCount.class);
    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(args[0] + "data/plot_summaries.txt"));
    FileSystem fs = FileSystem.get(conf);
    // handle (e.g. delete) existing output path
    Path outputDestination = new Path(args[0] + args[1]);
    if (fs.exists(outputDestination)) {
      fs.delete(outputDestination, true);
    }

    // set output path & start job1
    FileOutputFormat.setOutputPath(job, outputDestination);
    int jobCompletionStatus = job.waitForCompletion(true) ? 0 : 1;
  }