Java Job.setMapOutputKeyClass Examples

Programming Language: Java

Namespace/Package Name: org.apache.hadoop.mapreduce

Class/Type: Job

Method/Function: setMapOutputKeyClass

Examples at hotexamples.com: 30

The method setMapOutputKeyClass() is a function provided by the Java org.apache.hadoop.mapreduce.Job class in the Hadoop framework. It is used to specify the class of the keys emitted by the Mapper function in a MapReduce job.

The MapReduce framework organizes data into key-value pairs, where the keys are used to partition the data and the values are the corresponding data records. This method allows developers to define the class of these keys. By setting the appropriate key class, users can control the format and type of the keys produced by the Mapper before they are passed to the Reducer.

This method is helpful in situations where the keys need to be customized based on specific requirements or when the default key class provided by the framework is not suitable for the job at hand. It ensures that the Mapper produces keys of the desired type, enabling efficient data processing and analysis during the MapReduce execution.

Java Job.setMapOutputKeyClass - 30 examples found. These are the top rated real world Java examples of org.apache.hadoop.mapreduce.Job.setMapOutputKeyClass extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

waitForCompletion(30)

getInstance(30)

setInputFormatClass(30)

setJarByClass(30)

setJobName(30)

setMapOutputKeyClass(30)

setMapOutputValueClass(30)

setMapperClass(30)

setNumReduceTasks(30)

setOutputFormatClass(30)

setCombinerClass(30)

setOutputKeyClass(30)

setOutputValueClass(30)

getCounters(30)

getConfiguration(30)

setPartitionerClass(30)

setReducerClass(30)

submit(21)

ensureState(20)

setGroupingComparatorClass(15)

isSuccessful(15)

getJobName(13)

setSortComparatorClass(10)

getJobID(9)

getNumReduceTasks(6)

setWorkingDirectory(6)

isComplete(6)

setSpeculativeExecution(6)

getTrackingURL(5)

getWorkingDirectory(5)

getCredentials(5)

getID(5)

addCacheFile(4)

killJob(3)

getOutputFormatClass(3)

setJar(2)

setJobID(2)

getTaskReports(2)

getJar(2)

getStatus(2)

failTask(1)

toString(1)

ensureNotSet(1)

setUseNewAPI(1)

connect(1)

killTask(1)

setReduceSpeculativeExecution(1)

getClass(1)

getInputFormatClass(1)

getSortComparator(1)

Example #1

Show file

File: JobFactoryBean.java Project: nellaivijay/spring-hadoop

  private void configureMapperTypesIfPossible(Job j, Class<? extends Mapper> mapper) {
    // Find mapper
    Class<?> targetClass = mapper;
    Type targetType = mapper;

    do {
      targetType = targetClass.getGenericSuperclass();
      targetClass = targetClass.getSuperclass();
    } while (targetClass != null
        && targetClass != Object.class
        && !Mapper.class.equals(targetClass));

    if (targetType instanceof ParameterizedType) {
      Type[] params = ((ParameterizedType) targetType).getActualTypeArguments();
      if (params.length == 4) {
        // set each param (if possible);
        if (params[2] instanceof Class) {
          Class<?> clz = (Class<?>) params[2];
          if (!clz.isInterface()) j.setMapOutputKeyClass(clz);
        }

        // set each param (if possible);
        if (params[3] instanceof Class) {
          Class<?> clz = (Class<?>) params[3];
          if (!clz.isInterface()) {
            j.setMapOutputValueClass(clz);
          }
        }
      }
    }
  }

Example #2

Show file

File: DayhslogUserDate.java Project: wisgood/mobile-core

  @Override
  public int run(String[] args) throws Exception {

    // TODO Auto-generated method stub

    Configuration conf = getConf();
    // conf.set("stat_date", dateString);

    Job job = new Job(conf, "DayhslogUserDateNewMac");
    job.setJarByClass(DayhslogUserDate.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileInputFormat.addInputPath(job, new Path(args[1]));
    FileOutputFormat.setOutputPath(job, new Path(args[2] + "tmp"));
    FileSystem.get(conf).delete(new Path(args[2] + "tmp"), true);
    job.setMapperClass(DayhslogUserDateNewMacMapper.class);
    job.setReducerClass(DayhslogUserDateNewMacReducer.class);

    // job.setInputFormatClass(LzoTextInputFormat.class);

    job.setOutputFormatClass(TextOutputFormat.class);

    job.setNumReduceTasks(10);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    int code = job.waitForCompletion(true) ? 0 : 1;

    if (code == 0) {
      Job resultJob = new Job(conf, "DayhslogUserDate");

      resultJob.setJarByClass(DayhslogUserDate.class);

      FileInputFormat.addInputPath(resultJob, new Path(args[0]));
      FileInputFormat.addInputPath(resultJob, new Path(args[2] + "tmp"));
      FileOutputFormat.setOutputPath(resultJob, new Path(args[2]));
      FileSystem.get(conf).delete(new Path(args[2]), true);

      resultJob.setMapperClass(DayhslogUserDateMapper.class);
      resultJob.setReducerClass(DayhslogUserDateReducer.class);

      resultJob.setNumReduceTasks(10);

      resultJob.setMapOutputKeyClass(Text.class);
      resultJob.setMapOutputValueClass(Text.class);

      resultJob.setOutputKeyClass(Text.class);
      resultJob.setOutputValueClass(Text.class);

      code = resultJob.waitForCompletion(true) ? 0 : 1;
    }
    Path tmpPath = new Path(args[2] + "tmp");
    FileSystem.get(conf).delete(tmpPath, true);
    System.exit(code);

    return code;
  }

Example #3

Show file

File: map_reduce.java Project: upadhyaysabyasachi/InvertedWordIndexUsingHadoop

  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    conf.set("mapred.textoutputformat.separator", ",");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    //	Logger log  = Logger.getLogger("sds");
    Job job = new Job(conf, "Max ");

    job.setMapOutputKeyClass(CompositeKey.class);

    job.setPartitionerClass(ActualKeyPartitioner.class);
    job.setGroupingComparatorClass(ActualKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setJarByClass(map_reduce.class);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    job.setNumReduceTasks(27);

    job.setMapOutputKeyClass(CompositeKey.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

Example #4

Show file

File: TopFive.java Project: vikash11/Amazon-Reviews

  @Override
  public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
      System.err.println("Usage: topreviews <in> [<in>...] <out>");
      System.exit(2);
    }

    Job job = Job.getInstance(conf, "Top Five Reviews");
    job.setJarByClass(TopFive.class);
    job.setPartitionerClass(NaturalKeyPartitioner.class);
    job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setMapperClass(TopFiveMapper.class);
    job.setReducerClass(TopFiveReducer.class);

    job.setMapOutputKeyClass(TextPair.class);
    job.setMapOutputValueClass(TextPair.class);

    job.setOutputKeyClass(TextPair.class);
    job.setOutputValueClass(TextPair.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
  }

Example #5

Show file

File: HadoopUtil.java Project: nextmining/course-hadoop

  /**
   * Create a map-only Hadoop Job out of the passed in parameters.  Does not set the
   * Job name.
   *
   * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)
   */
  @SuppressWarnings("rawtypes")
	public static Job prepareJob(Path inputPath,
                                 Path outputPath,
                                 Class<? extends InputFormat> inputFormat,
                                 Class<? extends Mapper> mapper,
                                 Class<? extends Writable> mapperKey,
                                 Class<? extends Writable> mapperValue,
                                 Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException {

    //Job job = new Job(new Configuration(conf));
  	Job job = Job.getInstance(conf);
    Configuration jobConf = job.getConfiguration();

    if (mapper.equals(Mapper.class)) {
        throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    }
    job.setJarByClass(mapper);

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);
    job.setOutputKeyClass(mapperKey);
    job.setOutputValueClass(mapperValue);
    jobConf.setBoolean("mapred.compress.map.output", true);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
  }

Example #6

Show file

File: TCMReasoner.java Project: hualichenxi/bio-tcm-cloud

  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (args.length < 1) {
      System.out.println("USAGE: RFDSReasoner [pool path] [options]");
      return;
    }

    Job job = new Job(conf, "reasoner");
    job.setJarByClass(TCMReasoner.class);
    System.out.println(args[0]);

    job.setMapperClass(TCMMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Triple.class);

    job.setReducerClass(TCMReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Triple.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    job.waitForCompletion(true);
    Counter derivedTriples =
        job.getCounters()
            .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS");
    System.out.println(derivedTriples.getValue());

    return;
  }

Example #7

Show file

File: OutlinkGrowthAnalysis.java Project: yavcular/WebGraphConstruction

  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {

    if (args.length != 2) throw new IllegalArgumentException(args.length + " usage: ... ");

    String bitvectorpath = args[0], outputPath = args[1];

    Configuration conf = new Configuration();
    Job job = Job.getInstance(new Cluster(conf), conf);
    job.setJobName("int key replace phase1");
    job.setJarByClass(OutlinkGrowthAnalysis.class);

    job.setMapperClass(BVIdentitiyMapper.class);
    job.setReducerClass(AnaylseOLGrowthReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TabSeperatedTextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileInputFormat.setInputPaths(job, new Path(bitvectorpath));

    job.setNumReduceTasks(1);
    job.waitForCompletion(true);
  }

Example #8

Show file

File: SampleProgram.java Project: yangboz/verdant-octo-woof

  public int run(String[] args) throws Exception {
    // Check input arguments
    if (args.length != 2) {
      System.out.println("Usage: firstprog <input HIB> <output directory>");
      System.exit(0);
    }

    // Initialize and configure MapReduce job
    Job job = Job.getInstance();
    // Set input format class which parses the input HIB and spawns map tasks
    //        job.setInputFormatClass(ImageBundleInputFormat.class);
    job.setInputFormatClass(HibInputFormat.class);
    // Set the driver, mapper, and reducer classes which express the computation
    job.setJarByClass(SampleProgram.class);
    job.setMapperClass(SampleProgramMapper.class);
    job.setReducerClass(SampleProgramReducer.class);
    // Set the types for the key/value pairs passed to/from map and reduce layers
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(FloatImage.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    // Set the input and output paths on the HDFS
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // Execute the MapReduce job and block until it complets
    boolean success = job.waitForCompletion(true);

    // Return success or failure
    return success ? 0 : 1;
  }

Example #9

Show file

File: GreeDiFirst.java Project: uvictor/wikimining

  @Override
  public int run(String[] args) throws Exception {
    final int ret = parseArgs(args);
    if (ret < 0) {
      return ret;
    }

    Job job = Job.getInstance(getConf());
    job.setJarByClass(GreeDiFirst.class);
    job.setJobName(String.format("Coverage-GreeDiFirst[%s %s]", partitionCount, selectCount));

    job.getConfiguration().setInt(Fields.PARTITION_COUNT.get(), partitionCount);
    job.getConfiguration().setInt(Fields.SELECT_COUNT.get(), selectCount);

    job.setNumReduceTasks(partitionCount);

    SetupHelper.getInstance().setSequenceInput(job, inputPath).setTextOutput(job, outputPath);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DocumentWithVectorWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(GreeDiReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(getConf()).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
  }

Example #10

Show file

File: HubsAndSpokes.java Project: kidaak/Hadoop-MapReduce-1

  public static void main(String[] args) throws Exception {

    /// Create JOB 1 to convert all the flight data in to NODE : Graph Structure.
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
      System.err.println("Usage: hubsandspokesload <in> <out> <finalout>");
      System.exit(2);
    }
    Job job = new Job(conf, "hubsandspokesload");
    job.setJarByClass(HubsAndSpokes.class);
    job.setMapperClass(HubSpokeLoadMapper.class);
    job.setReducerClass(HubSpokeLoadReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NodeWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    boolean b = job.waitForCompletion(true);
    if (!b) {
      System.exit(2);
    }

    /// Call Job 2 where we perform HITS Algorithm to calculate Hub and Spoke
    /// Value at each Node in the graph iteratively.

    dijkstra(otherArgs[1], otherArgs[2]);

    // dijkstra("output1", "finaloutput");

  }

Example #11

Show file

File: ParaphrasePivotingJob.java Project: fone4u/thrax

  public Job getJob(Configuration conf) throws IOException {
    Job job = new Job(conf, "pivoting");

    job.setJarByClass(PivotingReducer.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(PivotingReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapOutputKeyClass(RuleWritable.class);
    job.setMapOutputValueClass(MapWritable.class);
    job.setOutputKeyClass(RuleWritable.class);
    job.setOutputValueClass(MapWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setPartitionerClass(RuleWritable.SourcePartitioner.class);

    FileInputFormat.setInputPaths(job, new Path(conf.get("thrax.work-dir") + "collected"));
    int maxSplitSize = conf.getInt("thrax.max-split-size", 0);
    if (maxSplitSize != 0) FileInputFormat.setMaxInputSplitSize(job, maxSplitSize);

    int numReducers = conf.getInt("thrax.reducers", 4);
    job.setNumReduceTasks(numReducers);

    FileOutputFormat.setOutputPath(job, new Path(conf.get("thrax.work-dir") + "pivoted"));
    FileOutputFormat.setCompressOutput(job, true);

    return job;
  }

Example #12

Show file

File: HCatMapReduceTest.java Project: hadoop-zuiwanyuan/hive

  /**
   * Run a local map reduce job to read records from HCatalog table and verify if the count is as
   * expected
   *
   * @param readCount
   * @param filter
   * @return
   * @throws Exception
   */
  List<HCatRecord> runMRRead(int readCount, String filter) throws Exception {
    MapRead.readCount = 0;
    readRecords.clear();

    Configuration conf = new Configuration();
    conf.set(HiveConf.ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN.varname, "true");
    Job job = new Job(conf, "hcat mapreduce read test");
    job.setJarByClass(this.getClass());
    job.setMapperClass(HCatMapReduceTest.MapRead.class);

    // input/output settings
    job.setInputFormatClass(HCatInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    HCatInputFormat.setInput(job, dbName, tableName, filter);

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(0);

    Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceOutput");
    if (fs.exists(path)) {
      fs.delete(path, true);
    }

    TextOutputFormat.setOutputPath(job, path);

    job.waitForCompletion(true);
    Assert.assertEquals(readCount, MapRead.readCount);

    return readRecords;
  }

Example #13

Show file

File: InstallApp.java Project: link-jang/mapreduce

  public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    if (args.length < 1) {
      System.out.println("missing input ");
      System.exit(1);
    }

    Configuration conf = new Configuration();
    Job job = new Job(new JobConf(conf));
    job.setJarByClass(InstallApp.class);
    job.setMapperClass(InstallMapper.class);
    job.setReducerClass(InstalleReduce.class);
    job.setJobName("installtaotaosou");
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileSystem fs = FileSystem.get(URI.create(args[0]), conf);
    FileStatus fileList[] = fs.listStatus(new Path(args[0]));

    int length = fileList.length;

    for (int i = 0; i < length; i++) {

      FileInputFormat.addInputPath(job, fileList[i].getPath());
    }

    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(1);
    job.waitForCompletion(true);
    return 0;
  }

Example #14

Show file

File: FeatureWriterJob.java Project: ellisbjohns/geomesa

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "simple feature writer");

    job.setJarByClass(FeatureWriterJob.class);
    job.setMapperClass(MyMapper.class);
    job.setInputFormatClass(GeoMesaInputFormat.class);
    job.setOutputFormatClass(GeoMesaOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(ScalaSimpleFeature.class);
    job.setNumReduceTasks(0);

    Map<String, String> params = new HashMap<String, String>();
    params.put("instanceId", "myinstance");
    params.put("zookeepers", "zoo1,zoo2,zoo3");
    params.put("user", "myuser");
    params.put("password", "mypassword");
    params.put("tableName", "mycatalog");

    Query query = new Query("myfeature", ECQL.toFilter("BBOX(geom, -165,5,-50,75)"));

    GeoMesaInputFormat.configure(job, params, query);

    Map<String, String> outParams = new HashMap<String, String>();
    outParams.put("instanceId", "myinstance");
    outParams.put("zookeepers", "zoo1,zoo2,zoo3");
    outParams.put("user", "myuser");
    outParams.put("password", "mypassword");
    outParams.put("tableName", "mycatalog_2");

    GeoMesaOutputFormat.configureDataStore(job, outParams);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

Example #15

Show file

File: AnalysisMapReduce.java Project: hymanliu/mapred-hyman

  @Override
  public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, AnalysisMapReduce.class.getName());
    job.setJarByClass(AnalysisMapReduce.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(AnalysicMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    //		job.setNumReduceTasks(0);

    job.setNumReduceTasks(1);

    job.setReducerClass(AnalysicReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(NullWritable.class);

    Path out = new Path(args[1]);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(out)) {
      fs.delete(out, true);
    }
    FileOutputFormat.setOutputPath(job, out);

    return job.waitForCompletion(true) ? 0 : 1;
  }

Example #16

Show file

File: WhiteHouseVisitorDriver.java Project: pikkapika/cs216

  @Override
  public int run(String[] args) throws Exception {
    if (args.length != 2) {
      System.err.println("Usage: WhiteHouseVisitorDriver <input path> <output path>");
      ToolRunner.printGenericCommandUsage(System.err);
      return -1;
    }

    Configuration conf = new Configuration();
    Job job = new Job(conf);
    job.setJarByClass(WhiteHouseVisitorDriver.class);

    // input file format
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // map/combine/reduce class definition
    job.setMapperClass(WhiteHouseVisitorMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    // Key, Value set type definition
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
  }

Example #17

Show file

File: WordCount.java Project: paohaijiao/mapreduce

  public static void main(String[] args) throws Exception {
    {
      Configuration conf = new Configuration();
      conf.set("mapreduce.job.jar", "wcwin.jar"); // 此处代码，一定放在Job任务前面，否则会报类找不到的异常
      Job job = Job.getInstance(conf, "winjob");
      getConf(conf);
      job.setJarByClass(WordCount.class);
      job.setMapperClass(WMapper.class);
      job.setReducerClass(WReducer.class);
      job.setInputFormatClass(TextInputFormat.class);
      job.setOutputFormatClass(TextOutputFormat.class);

      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(IntWritable.class);
      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(Text.class);

      String path = "/wc/output";
      //			FileSystem fs = FileSystem.get(conf);
      Path p = new Path(path);
      //			if (fs.exists(p)) {
      //				fs.delete(p, true);
      //				System.out.println("输出路径存在，已删除！");
      //			}

      FileInputFormat.setInputPaths(job, "/wc/srcdata");
      FileOutputFormat.setOutputPath(job, p);
      printEnv(job);
      System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
  }

Example #18

Show file

File: SequenceFileMergeJob.java Project: rahulj/hadoop-training-samples

 private void configureMapTasks(String arg, Job job) throws IOException {
   job.setMapperClass(IdentityMapper.class);
   job.setMapOutputKeyClass(keyClass);
   job.setMapOutputValueClass(valueClass);
   job.setInputFormatClass(SequenceFileInputFormat.class);
   SequenceFileInputFormat.setInputPaths(job, arg);
 }

Example #19

Show file

File: HubsAndSpokes.java Project: kidaak/Hadoop-MapReduce-1

  public static void dijkstra(String input, String output) throws Exception {

    String temp = output;

    ///  Run HITS Algorithm JOB:2 For 32 Times
    /// Setting the Value of k-> 32

    for (int i = 0; i < 32; i++) {
      Configuration conf = new Configuration();
      Job job = new Job(conf, "hubsandspokes");
      job.setJarByClass(HubsAndSpokes.class);
      job.setMapperClass(HubSpokeMapper.class);
      job.setReducerClass(HubSpokeReducer.class);
      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(NodeWritable.class);
      job.setOutputKeyClass(NodeWritable.class);
      job.setOutputValueClass(Text.class);
      job.setNumReduceTasks(1);
      FileInputFormat.addInputPath(job, new Path(input));
      FileOutputFormat.setOutputPath(job, new Path(output));

      // Toggle the value of Input and Output variable
      // For Next iteration
      input = output;
      output = temp + Integer.toString(i);

      // Wait for completing the JOB
      boolean b = job.waitForCompletion(true);
      if (!b) System.exit(2);
      // System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
  }

Example #20

Show file

File: L13.java Project: ekaynar/BenchmarkScripts

  /** RUN */
  @Override
  public int run(String[] args) throws Exception {

    if (args.length != 3) {
      System.err.println("Usage: wordcount <input_dir> <output_dir> <reducers>");
      return -1;
    }

    Job job = new Job(getConf(), "PigMix L13");
    job.setJarByClass(L13.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(ReadInput.class);
    job.setReducerClass(Join.class);

    Properties props = System.getProperties();
    Configuration conf = job.getConfiguration();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
      conf.set((String) entry.getKey(), (String) entry.getValue());
    }

    FileInputFormat.addInputPath(job, new Path(args[0] + "/pigmix_page_views"));
    FileInputFormat.addInputPath(job, new Path(args[0] + "/pigmix_power_users_samples"));
    FileOutputFormat.setOutputPath(job, new Path(args[1] + "/L13out"));
    job.setNumReduceTasks(Integer.parseInt(args[2]));

    return job.waitForCompletion(true) ? 0 : -1;
  }

Example #21

Show file

File: UtilityPredictor.java Project: pranab/sifarish

  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Rating predictor  MR";
    job.setJobName(jobName);

    job.setJarByClass(UtilityPredictor.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(UtilityPredictor.PredictionMapper.class);
    job.setReducerClass(UtilityPredictor.PredictorReducer.class);

    job.setMapOutputKeyClass(TextInt.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(ItemIdGroupComprator.class);
    job.setPartitionerClass(ItemIdPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());
    int numReducer = job.getConfiguration().getInt("utp.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }

Example #22

Show file

File: ImplicitRatingEstimator.java Project: vbajaria/sifarish

  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Implicit rating estimator MR";
    job.setJobName(jobName);

    job.setJarByClass(ImplicitRatingEstimator.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(ImplicitRatingEstimator.RatingEstimatorMapper.class);
    job.setReducerClass(ImplicitRatingEstimator.RatingEstimatorReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());
    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));
    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }

Example #23

Show file

File: ImportVTLocationFromFileWithReducer.java Project: sihanwang/vesselmovemnt

  public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    Job job =
        Job.getInstance(
            getConf(),
            "Import vessel locations from files in "
                + args[0]
                + " into table cdb_vessel:vessel_location"); // co

    FileInputFormat.addInputPath(job, new Path(args[0]));

    job.setJarByClass(ImportVTLocationFromFileWithReducer.class);
    job.setJobName("Vessel_location_injection");
    job.setInputFormatClass(VTVesselLocationFileInputFormat.class);
    job.setMapOutputKeyClass(Key_IMOAndRecordTime.class);
    job.setMapOutputValueClass(TextArrayWritable.class);

    job.setPartitionerClass(Partitioner_IMO.class);
    job.setGroupingComparatorClass(GroupComparator_IMO.class);

    job.setReducerClass(ImportReducer.class);
    job.setNumReduceTasks(Integer.parseInt(args[1]));

    job.setOutputFormatClass(NullOutputFormat.class);

    return job.waitForCompletion(true) ? 0 : 1;
  }

Example #24

Show file

File: InputSampler.java Project: AndreSchumacher/adam

  /**
   * Driver for InputSampler from the command line. Configures a JobConf instance and calls {@link
   * #writePartitionFile}.
   */
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    ArrayList<String> otherArgs = new ArrayList<String>();
    Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
      try {
        if ("-r".equals(args[i])) {
          job.setNumReduceTasks(Integer.parseInt(args[++i]));
        } else if ("-inFormat".equals(args[i])) {
          job.setInputFormatClass(Class.forName(args[++i]).asSubclass(InputFormat.class));
        } else if ("-keyClass".equals(args[i])) {
          job.setMapOutputKeyClass(Class.forName(args[++i]).asSubclass(WritableComparable.class));
        } else if ("-splitSample".equals(args[i])) {
          int numSamples = Integer.parseInt(args[++i]);
          int maxSplits = Integer.parseInt(args[++i]);
          if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE;
          sampler = new SplitSampler<K, V>(numSamples, maxSplits);
        } else if ("-splitRandom".equals(args[i])) {
          double pcnt = Double.parseDouble(args[++i]);
          int numSamples = Integer.parseInt(args[++i]);
          int maxSplits = Integer.parseInt(args[++i]);
          if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE;
          sampler = new RandomSampler<K, V>(pcnt, numSamples, maxSplits);
        } else if ("-splitInterval".equals(args[i])) {
          double pcnt = Double.parseDouble(args[++i]);
          int maxSplits = Integer.parseInt(args[++i]);
          if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE;
          sampler = new IntervalSampler<K, V>(pcnt, maxSplits);
        } else {
          otherArgs.add(args[i]);
        }
      } catch (NumberFormatException except) {
        System.out.println("ERROR: Integer expected instead of " + args[i]);
        return printUsage();
      } catch (ArrayIndexOutOfBoundsException except) {
        System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
        return printUsage();
      }
    }
    if (job.getNumReduceTasks() <= 1) {
      System.err.println("Sampler requires more than one reducer");
      return printUsage();
    }
    if (otherArgs.size() < 2) {
      System.out.println("ERROR: Wrong number of parameters: ");
      return printUsage();
    }
    if (null == sampler) {
      sampler = new RandomSampler<K, V>(0.1, 10000, 10);
    }

    Path outf = new Path(otherArgs.remove(otherArgs.size() - 1));
    TotalOrderPartitioner.setPartitionFile(getConf(), outf);
    for (String s : otherArgs) {
      FileInputFormat.addInputPath(job, new Path(s));
    }
    InputSampler.<K, V>writePartitionFile(job, sampler);

    return 0;
  }

Example #25

Show file

File: LoadTest.java Project: hypertable/hypertable

  private void doMapReduce() {
    try {
      Job job = Job.getInstance();

      job.getConfiguration().set(OutputFormat.NAMESPACE, "/");
      job.getConfiguration().set(OutputFormat.TABLE, "LoadTest");
      job.getConfiguration().setInt(OutputFormat.MUTATOR_FLAGS, MutatorFlag.NO_LOG_SYNC.getValue());
      job.getConfiguration().setInt(OutputFormat.MUTATOR_FLUSH_INTERVAL, 0);
      job.getConfiguration().setInt("LoadSplit.TOTAL_ROWS", this.totalRows);
      job.getConfiguration().setInt("LoadSplit.CLIENTS", this.clients);
      job.setJarByClass(LoadTest.class);
      job.setJobName("Hypertable MapReduce connector LoadTest");
      job.setInputFormatClass(LoadInputFormat.class);
      job.setOutputFormatClass(OutputFormat.class);
      job.setMapOutputKeyClass(KeyWritable.class);
      job.setMapOutputValueClass(BytesWritable.class);
      job.setMapperClass(LoadMapper.class);
      job.setReducerClass(LoadReducer.class);
      job.setNumReduceTasks(this.clients);

      job.waitForCompletion(true);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

Example #26

Show file

File: StatisticsGenerator.java Project: pdendek/CoAnSys

  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(conf);
    job.setJarByClass(StatisticsGenerator.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(args[0]));
    SequenceFileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClass(StatisticsMap.class);
    job.setReducerClass(StatisticsReduce.class);
    job.setMapOutputKeyClass(SortedMapWritableComparable.class);
    job.setMapOutputValueClass(BytesWritable.class);
    job.setNumReduceTasks(16);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);

    long startTime =
        ManagementFactory.getThreadMXBean().getThreadCpuTime(Thread.currentThread().getId());
    boolean success = job.waitForCompletion(true);
    long endTime =
        ManagementFactory.getThreadMXBean().getThreadCpuTime(Thread.currentThread().getId());
    double duration = (endTime - startTime) / Math.pow(10, 9);
    logger.info(
        "=== Job Finished in " + duration + " seconds " + (success ? "(success)" : "(failure)"));
    return success ? 0 : 1;
  }

Example #27

Show file

File: WordCount.java Project: dimajix/hadoop-training

  private boolean runJob(Configuration conf)
      throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(conf, "WordCount");
    job.setJarByClass(WordCount.class);

    // Configure input format and files
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(inputDir));

    // Configure output format and files
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));

    // set up mapper, combiner and reducer
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);
    job.setCombinerClass(WordCountReducer.class);

    // set sorting, grouping and partitioning
    // set key and value types
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true);
  }

Example #28

Show file

File: FileCombiner.java Project: hfausta/thesis-file-combiner

  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {
    // TODO Auto-generated method stub
    JobConf conf = new JobConf();
    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(5);

    FileSystem fs = FileSystem.get(conf);
    Path dir = new Path(args[0]);
    FileStatus[] stats = fs.listStatus(dir);
    numFiles = stats.length;

    Job job = new Job(conf);
    job.setJarByClass(FileCombiner.class);
    job.setJobName("File Combiner");

    job.setMapperClass(FileCombinerMapper.class);
    job.setReducerClass(FileCombinerReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    // LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);
  }

Example #29

Show file

File: RunningAggregator.java Project: vbajaria/chombo

  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Running aggregates  for numerical attributes";
    job.setJobName(jobName);

    job.setJarByClass(RunningAggregator.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "chombo");
    job.setMapperClass(RunningAggregator.AggrMapper.class);
    job.setReducerClass(RunningAggregator.AggrReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }

Example #30

Show file

File: TeraSortIngest.java Project: ollie314/accumulo

  @Override
  public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName("TeraSortCloud");
    job.setJarByClass(this.getClass());
    Opts opts = new Opts();
    opts.parseArgs(TeraSortIngest.class.getName(), args);

    job.setInputFormatClass(RangeInputFormat.class);
    job.setMapperClass(SortGenMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    opts.setAccumuloConfigs(job);
    BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L * 1000 * 1000);
    AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);

    Configuration conf = job.getConfiguration();
    conf.setLong(NUMROWS, opts.numRows);
    conf.setInt("cloudgen.minkeylength", opts.minKeyLength);
    conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength);
    conf.setInt("cloudgen.minvaluelength", opts.minValueLength);
    conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength);
    conf.set("cloudgen.tablename", opts.getTableName());

    if (args.length > 10) conf.setInt(NUMSPLITS, opts.splits);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
  }