Esempi in Java per Job.setSortComparatorClass

Linguaggio di programmazione: Java

Spazio dei nomi/nome del pacchetto: org.apache.hadoop.mapreduce

Classe/tipologia: Job

Metodo/funzione: setSortComparatorClass

Esempi su hotexamples.com: 10

Job.setSortComparatorClass in Java: 10 esempi trovati. Questi sono i migliori esempi reali in Java per org.apache.hadoop.mapreduce.Job.setSortComparatorClass, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

waitForCompletion(30)

getInstance(30)

setInputFormatClass(30)

setJarByClass(30)

setJobName(30)

setMapOutputKeyClass(30)

setMapOutputValueClass(30)

setMapperClass(30)

setNumReduceTasks(30)

setOutputFormatClass(30)

setCombinerClass(30)

setOutputKeyClass(30)

setOutputValueClass(30)

getCounters(30)

getConfiguration(30)

setPartitionerClass(30)

setReducerClass(30)

submit(21)

ensureState(20)

setGroupingComparatorClass(15)

isSuccessful(15)

getJobName(13)

setSortComparatorClass(10)

getJobID(9)

getNumReduceTasks(6)

setWorkingDirectory(6)

isComplete(6)

setSpeculativeExecution(6)

getTrackingURL(5)

getWorkingDirectory(5)

getCredentials(5)

getID(5)

addCacheFile(4)

killJob(3)

getOutputFormatClass(3)

setJar(2)

setJobID(2)

getTaskReports(2)

getJar(2)

getStatus(2)

failTask(1)

toString(1)

ensureNotSet(1)

setUseNewAPI(1)

connect(1)

killTask(1)

setReduceSpeculativeExecution(1)

getClass(1)

getInputFormatClass(1)

getSortComparator(1)

Esempio n. 1

Mostra file

File: HadoopJoin.java Progetto: yaol9/gbkws-hadoop

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: test.icde12.HadoopJoin <in> <out>");
      System.exit(2);
    }
    Job job = new Job(conf, "hadoop join");
    job.setJarByClass(HadoopJoin.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    job.setPartitionerClass(ICDEPartitioner.class);

    //    WritableComparator.define(Text.class,new ICDEComparator());

    job.setSortComparatorClass(ICDEComparator.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(8);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

Esempio n. 2

Mostra file

File: NotInFinder.java Progetto: KGayan/Acacia

  public static void main(String[] args) throws Exception {
    String dir1 = "/user/miyuru/wcout";
    String dir2 = "/user/miyuru/notinverts";
    // We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());

    if (fs1.exists(new Path(dir2))) {
      fs1.delete(new Path(dir2), true);
    }

    JobConf conf = new JobConf();
    conf.setNumMapTasks(96);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(LongWritable.class);
    conf.setMapperClass(TokenizerMapper.class);
    conf.setReducerClass(IntSumReducer.class);
    conf.setCombinerClass(IntSumReducer.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    FileInputFormat.setInputPaths(conf, new Path(dir1));
    FileOutputFormat.setOutputPath(conf, new Path(dir2));
    Job job = new Job(conf, "NotInFinder");
    job.setJarByClass(WordCount.class);
    //   job.setMapperClass(TokenizerMapper.class);
    //   job.setCombinerClass(IntSumReducer.class);
    //   job.setReducerClass(IntSumReducer.class);
    //   job.setOutputKeyClass(LongWritable.class);
    //   job.setOutputValueClass(LongWritable.class);

    job.setSortComparatorClass(SortComparator.class);
    job.waitForCompletion(true);
  }

Esempio n. 3

Mostra file

File: TopFive.java Progetto: vikash11/Amazon-Reviews

  @Override
  public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
      System.err.println("Usage: topreviews <in> [<in>...] <out>");
      System.exit(2);
    }

    Job job = Job.getInstance(conf, "Top Five Reviews");
    job.setJarByClass(TopFive.class);
    job.setPartitionerClass(NaturalKeyPartitioner.class);
    job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setMapperClass(TopFiveMapper.class);
    job.setReducerClass(TopFiveReducer.class);

    job.setMapOutputKeyClass(TextPair.class);
    job.setMapOutputValueClass(TextPair.class);

    job.setOutputKeyClass(TextPair.class);
    job.setOutputValueClass(TextPair.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
  }

Esempio n. 4

Mostra file

File: map_reduce.java Progetto: upadhyaysabyasachi/InvertedWordIndexUsingHadoop

  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    conf.set("mapred.textoutputformat.separator", ",");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    //	Logger log  = Logger.getLogger("sds");
    Job job = new Job(conf, "Max ");

    job.setMapOutputKeyClass(CompositeKey.class);

    job.setPartitionerClass(ActualKeyPartitioner.class);
    job.setGroupingComparatorClass(ActualKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setJarByClass(map_reduce.class);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    job.setNumReduceTasks(27);

    job.setMapOutputKeyClass(CompositeKey.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

Esempio n. 5

Mostra file

File: RepartitionJoinQ11.java Progetto: subbu3490/rdf-mapreduce-joins

  public static Job startJob(String[] args) throws IOException {

    // args[0] = hbase table name
    // args[1] = zookeeper

    Configuration hConf = HBaseConfiguration.create(new Configuration());
    hConf.set("hbase.zookeeper.quorum", args[1]);
    hConf.set("scan.table", args[0]);
    hConf.set("hbase.zookeeper.property.clientPort", "2181");

    Scan scan = new Scan();
    // scan.setFilter(rowColBloomFilter());

    Job job = new Job(hConf);
    job.setJobName("BSBM-Q11-RepartitionJoin");
    job.setJarByClass(RepartitionJoinQ11.class);
    // Change caching to speed up the scan
    scan.setCaching(500);
    scan.setMaxVersions(200);
    scan.setCacheBlocks(false);

    // Mapper settings
    TableMapReduceUtil.initTableMapperJob(
        args[0], // input HBase table name
        scan, // Scan instance to control CF and attribute selection
        RepartitionMapper.class, // mapper
        CompositeKeyWritable.class, // mapper output key
        KeyValueArrayWritable.class, // mapper output value
        job);

    // Repartition settings
    job.setPartitionerClass(CompositePartitioner.class);
    job.setSortComparatorClass(CompositeSortComparator.class);
    job.setGroupingComparatorClass(CompositeGroupingComparator.class);

    // Reducer settings
    job.setReducerClass(SharedServices.RepartitionJoin_Reducer.class); // reducer class
    job.setNumReduceTasks(1); // at least one, adjust as required

    FileOutputFormat.setOutputPath(job, new Path("output/BSBMQ11"));

    try {
      System.exit(job.waitForCompletion(true) ? 0 : 1);
    } catch (ClassNotFoundException e) {
      e.printStackTrace();
    } catch (InterruptedException e) {
      e.printStackTrace();
    }

    return job;
  }

Esempio n. 6

Mostra file

File: GridmixJob.java Progetto: JichengSong/hadoop-20

 public Job call() throws IOException, InterruptedException, ClassNotFoundException {
   job.setMapperClass(GridmixMapper.class);
   job.setReducerClass(GridmixReducer.class);
   job.setNumReduceTasks(jobdesc.getNumberReduces());
   job.setMapOutputKeyClass(GridmixKey.class);
   job.setMapOutputValueClass(GridmixRecord.class);
   job.setSortComparatorClass(GridmixKey.Comparator.class);
   job.setGroupingComparatorClass(SpecGroupingComparator.class);
   job.setInputFormatClass(GridmixInputFormat.class);
   job.setOutputFormatClass(RawBytesOutputFormat.class);
   job.setPartitionerClass(DraftPartitioner.class);
   job.setJarByClass(GridmixJob.class);
   job.getConfiguration().setInt("gridmix.job.seq", seq);
   job.getConfiguration()
       .set(ORIGNAME, null == jobdesc.getJobID() ? "<unknown>" : jobdesc.getJobID().toString());
   job.getConfiguration().setBoolean("mapred.used.genericoptionsparser", true);
   FileInputFormat.addInputPath(job, new Path("ignored"));
   FileOutputFormat.setOutputPath(job, outdir);
   job.submit();
   return job;
 }

Esempio n. 7

Mostra file

File: JobFactoryBean.java Progetto: nellaivijay/spring-hadoop

  @SuppressWarnings("rawtypes")
  public void afterPropertiesSet() throws Exception {
    final Configuration cfg = ConfigurationUtils.createFrom(configuration, properties);

    buildGenericOptions(cfg);

    if (StringUtils.hasText(user)) {
      UserGroupInformation ugi =
          UserGroupInformation.createProxyUser(user, UserGroupInformation.getLoginUser());
      ugi.doAs(
          new PrivilegedExceptionAction<Void>() {

            @Override
            public Void run() throws Exception {
              job = new Job(cfg);
              return null;
            }
          });
    } else {
      job = new Job(cfg);
    }

    ClassLoader loader =
        (beanClassLoader != null
            ? beanClassLoader
            : org.springframework.util.ClassUtils.getDefaultClassLoader());

    if (jar != null) {
      JobConf conf = (JobConf) job.getConfiguration();
      conf.setJar(jar.getURI().toString());
      loader = ExecutionUtils.createParentLastClassLoader(jar, beanClassLoader, cfg);
      conf.setClassLoader(loader);
    }

    // set first to enable auto-detection of K/V to skip the key/value types to be specified
    if (mapper != null) {
      Class<? extends Mapper> mapperClass = resolveClass(mapper, loader, Mapper.class);
      job.setMapperClass(mapperClass);
      configureMapperTypesIfPossible(job, mapperClass);
    }

    if (reducer != null) {
      Class<? extends Reducer> reducerClass = resolveClass(reducer, loader, Reducer.class);
      job.setReducerClass(reducerClass);
      configureReducerTypesIfPossible(job, reducerClass);
    }

    if (StringUtils.hasText(name)) {
      job.setJobName(name);
    }
    if (combiner != null) {
      job.setCombinerClass(resolveClass(combiner, loader, Reducer.class));
    }
    if (groupingComparator != null) {
      job.setGroupingComparatorClass(resolveClass(groupingComparator, loader, RawComparator.class));
    }
    if (inputFormat != null) {
      job.setInputFormatClass(resolveClass(inputFormat, loader, InputFormat.class));
    }
    if (mapKey != null) {
      job.setMapOutputKeyClass(resolveClass(mapKey, loader, Object.class));
    }
    if (mapValue != null) {
      job.setMapOutputValueClass(resolveClass(mapValue, loader, Object.class));
    }
    if (numReduceTasks != null) {
      job.setNumReduceTasks(numReduceTasks);
    }
    if (key != null) {
      job.setOutputKeyClass(resolveClass(key, loader, Object.class));
    }
    if (value != null) {
      job.setOutputValueClass(resolveClass(value, loader, Object.class));
    }
    if (outputFormat != null) {
      job.setOutputFormatClass(resolveClass(outputFormat, loader, OutputFormat.class));
    }
    if (partitioner != null) {
      job.setPartitionerClass(resolveClass(partitioner, loader, Partitioner.class));
    }
    if (sortComparator != null) {
      job.setSortComparatorClass(resolveClass(sortComparator, loader, RawComparator.class));
    }
    if (StringUtils.hasText(workingDir)) {
      job.setWorkingDirectory(new Path(workingDir));
    }
    if (jarClass != null) {
      job.setJarByClass(jarClass);
    }

    if (!CollectionUtils.isEmpty(inputPaths)) {
      for (String path : inputPaths) {
        FileInputFormat.addInputPath(job, new Path(path));
      }
    }

    if (StringUtils.hasText(outputPath)) {
      FileOutputFormat.setOutputPath(job, new Path(outputPath));
    }

    if (compressOutput != null) {
      FileOutputFormat.setCompressOutput(job, compressOutput);
    }

    if (codecClass != null) {
      FileOutputFormat.setOutputCompressorClass(
          job, resolveClass(codecClass, loader, CompressionCodec.class));
    }

    processJob(job);
  }

Esempio n. 8

Mostra file

File: CSRConverter.java Progetto: KGayan/Acacia

  public static void main(String[] args) throws Exception {
    if (!validArgs(args)) {
      printUsage();
      return;
    }
    // These are the temp paths that are created on HDFS
    String dir1 = "/user/miyuru/csrconverter-output";
    String dir2 = "/user/miyuru/csrconverter-output-sorted";

    // We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());

    System.out.println("Deleting the dir : " + dir1);

    if (fs1.exists(new Path(dir1))) {
      fs1.delete(new Path(dir1), true);
    }

    System.out.println("Done deleting the dir : " + dir1);
    System.out.println("Deleting the dir : " + dir2);
    if (fs1.exists(new Path(dir2))) {
      fs1.delete(new Path(dir2), true);
    }

    Path notinPath = new Path("/user/miyuru/notinverts/notinverts");

    if (!fs1.exists(notinPath)) {
      fs1.create(notinPath);
    }

    System.out.println("Done deleting the dir : " + dir2);

    // Note on Aug 23 2014: Sometimes after this the mapReduce job hangs. need to see why.

    VertexCounterClient.setDefaultGraphID(args[3], args[2]);

    // First job creates the inverted index

    JobConf conf = new JobConf(CSRConverter.class);
    conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[1]);
    conf.set("org.acacia.partitioner.hbase.table", args[2]);
    conf.set("org.acacia.partitioner.hbase.contacthost", args[3]);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    // conf.setMapperClass(InvertedMapper.class);
    conf.setReducerClass(InvertedReducer.class);
    // conf.setInputFormat(TextInputFormat.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    // FileInputFormat.setInputPaths(conf, new Path(args[0]));
    MultipleInputs.addInputPath(
        conf, new Path(args[0]), NLinesInputFormat.class, InvertedMapper.class);
    MultipleInputs.addInputPath(
        conf,
        new Path("/user/miyuru/notinverts/notinverts"),
        TextInputFormat.class,
        InvertedMapper.class);
    FileOutputFormat.setOutputPath(conf, new Path(dir1));

    // Also for the moment we turn-off the speculative execution
    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    conf.setNumMapTasks(96);
    conf.setNumReduceTasks(96);
    conf.setPartitionerClass(VertexPartitioner.class);
    conf.set("vertex-count", args[4]);
    conf.set("zero-flag", args[5]);
    Job job = new Job(conf, "csr_inverter");
    job.setSortComparatorClass(SortComparator.class);
    job.waitForCompletion(true);
  }

Esempio n. 9

Mostra file

File: FrameworkDriver.java Progetto: jdstapleton/amino

  private int setJobParameters(Job job, AminoJob aj) throws Exception {
    final Configuration conf = job.getConfiguration();
    final Class<? extends DataLoader> dataLoaderClass = aj.getDataLoaderClass();
    AminoInputFormat.setDataLoader(job.getConfiguration(), dataLoaderClass.newInstance());

    if (aj instanceof AminoEnrichmentJob) {
      String output = "";
      int returnType = JOB_TYPE_ENRICHMENT;

      if (aj instanceof AminoReuseEnrichmentJob) {
        System.out.println("Running REUSE Enrichment Join Job");

        AminoReuseEnrichmentJob reuseJob = (AminoReuseEnrichmentJob) aj;
        AminoInputFormat.setDataLoader(
            job.getConfiguration(), reuseJob.getFirstPhaseDataLoaderClass().newInstance());

        String root = conf.get(AminoDriverUtils.ENRICHMENT_ROOT_OUTPUT);
        String front = "";
        if (!root.endsWith("/")) front = "/";
        root += front;
        String dir = reuseJob.getOutputSubDirectory(conf);
        output += root + dir;

        returnType = JOB_TYPE_REUSE_ENRICHMENT;
      } else {
        System.out.println("Running Enrichment Join Job");
      }

      int numReducers =
          conf.getInt(
              AMINO_NUM_REDUCERS_ENRICH_PHASE1,
              conf.getInt(AMINO_NUM_REDUCERS, DEFAULT_NUM_REDUCERS));
      job.setNumReduceTasks(numReducers);

      // Our Framework mapper and reducer
      job.setMapperClass(FrameworkEnrichmentJoinMapper.class);
      job.setCombinerClass(FrameworkEnrichmentJoinCombiner.class);
      job.setReducerClass(FrameworkEnrichmentJoinReducer.class);

      job.setMapOutputKeyClass(EnrichmentJoinKey.class); // Different
      job.setMapOutputValueClass(MapWritable.class);

      job.setOutputKeyClass(BucketStripped.class);
      job.setOutputValueClass(MapWritable.class); // Different

      job.setPartitionerClass(NaturalKeyPartitioner.class);
      job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
      job.setSortComparatorClass(CompositeKeyComparator.class);

      job.setInputFormatClass(AminoMultiInputFormat.class);

      AminoEnrichmentJob aej = (AminoEnrichmentJob) aj;
      // AminoMultiInputFormat.setJoinDataLoader(conf, aej.getEnrichmentDataLoader().newInstance());
      AminoMultiInputFormat.setJoinDataLoaders(conf, aej.getEnrichmentDataLoaders());
      AminoMultiInputFormat.setEnrichWorker(conf, aej.getEnrichWorker().newInstance());

      job.setOutputFormatClass(SequenceFileOutputFormat.class);

      // TODO If it already exists, and its age is less than job running frequency, just reuse it
      // instead of doing the above job...
      if (output.length() == 0) {
        output = getEnrichmentOutputPath(aej, conf);
      }
      System.out.println("Output will be written to: " + PathUtils.getJobDataPath(output));

      SequenceFileOutputFormat.setOutputPath(job, new Path(PathUtils.getJobDataPath(output)));
      JobUtilities.deleteDirectory(conf, output);

      CacheBuilder.buildCaches(AminoDataUtils.getDataLoader(conf), aj, output, conf);

      return returnType;

    } else {
      System.out.println("\n==================== Running Amino Job =================\n");

      // Our Framework mapper and reducer
      job.setMapperClass(FrameworkMapper.class);
      job.setReducerClass(FrameworkReducer.class);

      job.setMapOutputKeyClass(BucketStripped.class);
      job.setMapOutputValueClass(MapWritable.class);

      job.setOutputKeyClass(BucketStripped.class);
      job.setOutputValueClass(AminoWritable.class);

      job.setInputFormatClass(AminoInputFormat.class);

      job.setOutputFormatClass(AminoOutputFormat.class);
      job.setNumReduceTasks(conf.getInt(AMINO_NUM_REDUCERS, DEFAULT_NUM_REDUCERS));

      AminoOutputFormat.setAminoConfigPath(
          job, job.getConfiguration().get(AminoConfiguration.DEFAULT_CONFIGURATION_PATH_KEY));

      String output = conf.get("amino.output");
      System.out.println("Output will be written to: " + PathUtils.getJobDataPath(output));
      AminoOutputFormat.setOutputPath(job, new Path(PathUtils.getJobDataPath(output)));
      JobUtilities.deleteDirectory(conf, output);

      CacheBuilder.buildCaches(AminoDataUtils.getDataLoader(conf), aj, output, conf);
      return JOB_TYPE_NORMAL;
    }
  }

Esempio n. 10

Mostra file

File: ExtractGlobalStats.java Progetto: xuerenlv/mavuno

  @SuppressWarnings("unchecked")
  public int run() throws ClassNotFoundException, InterruptedException, IOException {
    Configuration conf = getConf();

    String inputPath = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.InputPath", conf);
    String corpusPath = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.CorpusPath", conf);
    String corpusClass =
        MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.CorpusClass", conf);
    String extractorClass =
        MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.ExtractorClass", conf);
    String extractorArgs =
        MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.ExtractorArgs", conf);
    String extractorTarget =
        MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.ExtractorTarget", conf)
            .toLowerCase();
    String outputPath = MavunoUtils.getRequiredParam("Mavuno.ExtractGlobalStats.OutputPath", conf);

    // split examples
    conf.set("Mavuno.Split.InputPath", inputPath);
    conf.set("Mavuno.Split.OutputPath", outputPath + "/../split");
    conf.set("Mavuno.Split.SplitKey", extractorTarget);
    new Split(conf).run();

    // get splits
    FileStatus[] files = MavunoUtils.getDirectoryListing(conf, outputPath + "/../split");
    int split = 0;
    for (FileStatus file : files) {
      if (!file.getPath().getName().endsWith(".examples")) {
        continue;
      }

      conf.set("Mavuno.ExtractGlobalStats.ExamplesPath", file.getPath().toString());

      sLogger.info("Tool name: ExtractGlobalStats");
      sLogger.info(" - Input path: " + inputPath);
      sLogger.info(" - Examples path: " + file.getPath());
      sLogger.info(" - Example split: " + split);
      sLogger.info(" - Corpus path: " + corpusPath);
      sLogger.info(" - Corpus class: " + corpusClass);
      sLogger.info(" - Extractor class: " + extractorClass);
      sLogger.info(" - Extractor class: " + extractorArgs);
      sLogger.info(" - Extractor target: " + extractorTarget);
      sLogger.info(" - Output path: " + outputPath);

      Job job = new Job(conf);
      job.setJobName("ExtractGlobalStats");
      job.setJarByClass(ExtractGlobalStats.class);

      MavunoUtils.recursivelyAddInputPaths(job, corpusPath);
      FileOutputFormat.setOutputPath(job, new Path(outputPath + "/../split/" + split));

      job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass));
      job.setOutputFormatClass(SequenceFileOutputFormat.class);

      FileOutputFormat.setCompressOutput(job, true);
      SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

      job.setMapOutputKeyClass(ContextPatternWritable.class);
      job.setSortComparatorClass(ContextPatternWritable.Comparator.class);
      job.setPartitionerClass(ContextPatternWritable.FullPartitioner.class);
      job.setMapOutputValueClass(ContextPatternStatsWritable.class);

      job.setOutputKeyClass(ContextPatternWritable.class);
      job.setOutputValueClass(ContextPatternStatsWritable.class);

      job.setMapperClass(MyMapper.class);
      job.setReducerClass(MyReducer.class);

      job.waitForCompletion(true);

      split++;
    }

    // combine splits
    conf.setInt("Mavuno.CombineGlobalStats.TotalSplits", split);
    conf.set("Mavuno.CombineGlobalStats.InputPath", outputPath + "/../split/");
    conf.set("Mavuno.CombineGlobalStats.OutputPath", outputPath);
    new CombineGlobalStats(conf).run();

    MavunoUtils.removeDirectory(conf, outputPath + "/../split");

    return 0;
  }