@Override
  public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), getClass());
    conf.setJobName("UFO count");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: avro UFO counter <in> <out>");
      System.exit(2);
    }

    FileInputFormat.addInputPath(conf, new Path(otherArgs[0]));
    Path outputPath = new Path(otherArgs[1]);
    FileOutputFormat.setOutputPath(conf, outputPath);
    outputPath.getFileSystem(conf).delete(outputPath);
    Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc"));
    AvroJob.setInputSchema(conf, input_schema);
    AvroJob.setMapOutputSchema(
        conf,
        Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG)));

    AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA);
    AvroJob.setMapperClass(conf, AvroRecordMapper.class);
    AvroJob.setReducerClass(conf, AvroRecordReducer.class);
    conf.setInputFormat(AvroInputFormat.class);
    JobClient.runJob(conf);

    return 0;
  }
  private void testMapFileOutputCommitterInternal(int version) throws Exception {
    JobConf conf = new JobConf();
    FileOutputFormat.setOutputPath(conf, outDir);
    conf.set(JobContext.TASK_ATTEMPT_ID, attempt);
    conf.setInt(
        org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
            .FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
        version);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();

    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);

    // write output
    MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(null, conf, partFile, null);
    writeMapFileOutput(theRecordWriter, tContext);

    // do commit
    if (committer.needsTaskCommit(tContext)) {
      committer.commitTask(tContext);
    }
    committer.commitJob(jContext);

    // validate output
    validateMapFileOutputContent(FileSystem.get(conf), outDir);
    FileUtil.fullyDelete(new File(outDir.toString()));
  }
  public void testComplexNameWithRegex() throws Exception {
    OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("b a\n");
    wr.close();

    JobConf conf = createJobConf();
    conf.setJobName("name \\Evalue]");

    conf.setInputFormat(TextInputFormat.class);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(IdentityMapper.class);

    FileInputFormat.setInputPaths(conf, getInputDir());

    FileOutputFormat.setOutputPath(conf, getOutputDir());

    JobClient.runJob(conf);

    Path[] outputFiles =
        FileUtil.stat2Paths(getFileSystem().listStatus(getOutputDir(), new OutputLogFilter()));
    assertEquals(1, outputFiles.length);
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    assertEquals("0\tb a", reader.readLine());
    assertNull(reader.readLine());
    reader.close();
  }
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] remainArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (remainArgs.length != 2) {
      System.err.println("Usage: wordcount <input> <output>");
      System.exit(1);
    }

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(WordCount.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(4);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileSystem.get(conf).delete(new Path(remainArgs[1]), true);

    FileInputFormat.setInputPaths(job, new Path(remainArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(remainArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
  @Override
  public int run(String[] args) throws Exception {
    if (args.length != 2) {
      System.err.printf(
          "Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
      ToolRunner.printGenericCommandUsage(System.err);
      return -1;
    }

    JobConf conf = new JobConf(getConf(), getClass());
    conf.setJobName("Max temperature");

    FileInputFormat.addInputPath(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MaxTemperatureMapper.class);
    conf.setCombinerClass(MaxTemperatureReducer.class);
    conf.setReducerClass(MaxTemperatureReducer.class);

    // vv MaxTemperatureDriverV6
    conf.setProfileEnabled(true);
    conf.setProfileParams(
        "-agentlib:hprof=cpu=samples,heap=sites,depth=6," + "force=n,thread=y,verbose=n,file=%s");
    conf.setProfileTaskRange(true, "0-2");
    // ^^ MaxTemperatureDriverV6

    JobClient.runJob(conf);
    return 0;
  }
  public void testAbort() throws IOException {
    JobConf job = new JobConf();
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = FileSystem.getLocal(job);
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do abort
    committer.abortTask(tContext);
    File expectedFile =
        new File(new Path(committer.getTempTaskOutputPath(tContext), file).toString());
    assertFalse("task temp dir still exists", expectedFile.exists());

    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString());
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
    FileUtil.fullyDelete(new File(outDir.toString()));
  }
Beispiel #7
0
  private String runJob() throws Exception {
    OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("hello1\n");
    wr.write("hello2\n");
    wr.write("hello3\n");
    wr.close();

    JobConf conf = createJobConf();
    conf.setJobName("mr");
    conf.setJobPriority(JobPriority.HIGH);

    conf.setInputFormat(TextInputFormat.class);

    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputFormat(TextOutputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(org.apache.hadoop.mapred.lib.IdentityMapper.class);
    conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);

    FileInputFormat.setInputPaths(conf, getInputDir());
    FileOutputFormat.setOutputPath(conf, getOutputDir());

    return JobClient.runJob(conf).getID().toString();
  }
  public static void runSortJob(String... args) throws Exception {

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);

    JobConf job = new JobConf();

    job.setNumReduceTasks(2);

    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setJarByClass(SampleJob.class);

    output.getFileSystem(job).delete(output, true);

    JobClient jc = new JobClient(job);
    JobClient.setTaskOutputFilter(job, JobClient.TaskStatusFilter.ALL);
    RunningJob rj = jc.submitJob(job);
    try {
      if (!jc.monitorAndPrintJob(job, rj)) {
        System.out.println("Job Failed: " + rj.getFailureInfo());
        throw new IOException("Job failed!");
      }
    } catch (InterruptedException ie) {
      Thread.currentThread().interrupt();
    }
  }
  @Override
  public int run(String[] arg0) throws Exception {
    // config a job and start it
    Configuration conf = getConf();
    Job job = new Job(conf, "Index construction..");
    job.setJarByClass(IndexConstructorDriver.class);
    job.setMapperClass(IndexConstructorMapper.class);
    job.setReducerClass(IndexConstructorReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(InvertedListWritable.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    // can add the dir by the config
    FileSystem fs = FileSystem.get(conf);
    String workdir = conf.get("org.joy.crawler.dir", "crawler/");
    fs.delete(new Path(workdir + "indexOutput/"), true);
    FileInputFormat.addInputPath(job, new Path(workdir + "content/"));
    FileOutputFormat.setOutputPath(job, new Path(workdir + "indexOutput/"));
    System.out.println(
        "indexer starts to work, it begins to construct the index, please wait ...\n");
    return job.waitForCompletion(true) ? 0 : 1;
  }
  private void testAbortInternal(int version) throws IOException, InterruptedException {
    JobConf conf = new JobConf();
    FileOutputFormat.setOutputPath(conf, outDir);
    conf.set(JobContext.TASK_ATTEMPT_ID, attempt);
    conf.setInt(
        org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
            .FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
        version);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);

    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(null, conf, partFile, null);
    writeOutput(theRecordWriter, tContext);

    // do abort
    committer.abortTask(tContext);
    File out = new File(outDir.toUri().getPath());
    Path workPath = committer.getWorkPath(tContext, outDir);
    File wp = new File(workPath.toUri().getPath());
    File expectedFile = new File(wp, partFile);
    assertFalse("task temp dir still exists", expectedFile.exists());

    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(out, FileOutputCommitter.TEMP_DIR_NAME);
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, out.listFiles().length);
    FileUtil.fullyDelete(out);
  }
Beispiel #11
0
  public static void main(String[] args) throws Exception {

    final String NAME_NODE = "hdfs://sandbox.hortonworks.com:8020";
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setJarByClass(WordCount.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(NullWritable.class);
    if (args.length > 2) {
      job.setNumReduceTasks(Integer.parseInt(args[2]));
    }

    job.setMapperClass(CountMapper.class);
    job.setReducerClass(CountReducer.class);
    job.setJarByClass(WordCount.class);
    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(args[0] + "data/plot_summaries.txt"));
    FileSystem fs = FileSystem.get(conf);
    // handle (e.g. delete) existing output path
    Path outputDestination = new Path(args[0] + args[1]);
    if (fs.exists(outputDestination)) {
      fs.delete(outputDestination, true);
    }

    // set output path & start job1
    FileOutputFormat.setOutputPath(job, outputDestination);
    int jobCompletionStatus = job.waitForCompletion(true) ? 0 : 1;
  }
 /* (non-Javadoc)
  * @see eu.stratosphere.pact.common.io.FileOutputFormat#close()
  */
 @Override
 public void close() throws IOException {
   if (wrt != null) {
     this.wrt.close();
   }
   super.close();
 }
  public static void runJob(String[] args) {
    JobConf conf = new JobConf(CassandraBulkLoader.class);

    if (args.length >= 4) {
      conf.setNumReduceTasks(new Integer(args[3]));
    }

    try {
      // We store the cassandra storage-conf.xml on the HDFS cluster
      DistributedCache.addCacheFile(new URI("/cassandra/storage-conf.xml#storage-conf.xml"), conf);
    } catch (URISyntaxException e) {
      throw new RuntimeException(e);
    }
    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setJobName("CassandraBulkLoader_v2");
    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(conf, new Path(args[1]));
    FileOutputFormat.setOutputPath(conf, new Path(args[2]));
    try {
      JobClient.runJob(conf);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }
Beispiel #14
0
  public void inject(Path crawlDb, Path urlDir) throws IOException {
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: starting at " + sdf.format(start));
      LOG.info("Injector: crawlDb: " + crawlDb);
      LOG.info("Injector: urlDir: " + urlDir);
    }

    Path tempDir =
        new Path(
            getConf().get("mapred.temp.dir", ".")
                + "/inject-temp-"
                + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    // map text input file to a <url,CrawlDatum> file
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: Converting injected urls to crawl db entries.");
    }
    JobConf sortJob = new NutchJob(getConf());
    sortJob.setJobName("inject " + urlDir);
    FileInputFormat.addInputPath(sortJob, urlDir);
    sortJob.setMapperClass(InjectMapper.class);

    FileOutputFormat.setOutputPath(sortJob, tempDir);
    sortJob.setOutputFormat(SequenceFileOutputFormat.class);
    sortJob.setOutputKeyClass(Text.class);
    sortJob.setOutputValueClass(CrawlDatum.class);
    sortJob.setLong("injector.current.time", System.currentTimeMillis());
    RunningJob mapJob = JobClient.runJob(sortJob);

    long urlsInjected = mapJob.getCounters().findCounter("injector", "urls_injected").getValue();
    long urlsFiltered = mapJob.getCounters().findCounter("injector", "urls_filtered").getValue();
    LOG.info("Injector: total number of urls rejected by filters: " + urlsFiltered);
    LOG.info(
        "Injector: total number of urls injected after normalization and filtering: "
            + urlsInjected);

    // merge with existing crawl db
    if (LOG.isInfoEnabled()) {
      LOG.info("Injector: Merging injected urls into crawl db.");
    }
    JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb);
    FileInputFormat.addInputPath(mergeJob, tempDir);
    mergeJob.setReducerClass(InjectReducer.class);
    JobClient.runJob(mergeJob);
    CrawlDb.install(mergeJob, crawlDb);

    // clean up
    FileSystem fs = FileSystem.get(getConf());
    fs.delete(tempDir, true);

    long end = System.currentTimeMillis();
    LOG.info(
        "Injector: finished at "
            + sdf.format(end)
            + ", elapsed: "
            + TimingUtil.elapsedTime(start, end));
  }
Beispiel #15
0
  /**
   * @param process
   * @param tap
   * @param conf
   */
  @Override
  public void sinkConfInit(
      FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
    conf.setOutputFormat(MongoOutputFormat.class);
    MongoConfigUtil.setOutputURI(conf, this.mongoUri);

    FileOutputFormat.setOutputPath(conf, getPath());
  }
 /* (non-Javadoc)
  * @see eu.stratosphere.pact.common.io.FileOutputFormat#open(int)
  */
 @Override
 public void open(int taskNumber) throws IOException {
   super.open(taskNumber);
   this.wrt =
       this.charsetName == null
           ? new OutputStreamWriter(new BufferedOutputStream(this.stream, 4096))
           : new OutputStreamWriter(new BufferedOutputStream(this.stream, 4096), this.charsetName);
 }
  public static int main(String[] args) throws Exception {

    int i;
    String outPath;
    int numMaps = 0, numReds = 0;

    List<String> other_args = new ArrayList<String>();
    for (i = 0; i < args.length; ++i) {
      try {
        if ("-m".equals(args[i])) {
          numMaps = Integer.parseInt(args[++i]);
        } else if ("-r".equals(args[i])) {
          numReds = Integer.parseInt(args[++i]);
        } else {
          other_args.add(args[i]);
        }
      } catch (NumberFormatException except) {
        System.out.println("ERROR: Integer expected instead of " + args[i]);
        printUsage();
      } catch (ArrayIndexOutOfBoundsException except) {
        System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
        printUsage(); // exits
      }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
      System.out.println(
          "ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
      printUsage();
    }

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    Date startIteration;
    Date endIteration;
    JobConf conf = new JobConf(Kmeans.class);
    conf.setJobName("kmeans");
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(ClusterWritable.class);
    conf.setMapperClass(MapClass.class);
    conf.setReducerClass(Reduce.class);
    conf.setNumMapTasks(numMaps);
    conf.setNumReduceTasks(numReds);
    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    outPath = new String(other_args.get(1));
    FileOutputFormat.setOutputPath(conf, new Path(outPath));
    startIteration = new Date();
    JobClient.runJob(conf);
    endIteration = new Date();
    System.out.println(
        "The iteration took "
            + (endIteration.getTime() - startIteration.getTime()) / 1000
            + " seconds.");
    return 0;
  }
Beispiel #18
0
  public static void main(String[] args) throws IOException {

    /*JobConf conf = new JobConf();

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(IpCounterMapper.class);
    conf.setCombinerClass(IpCounterReducer.class);
    conf.setReducerClass(IpCounterReducer.class);


    String inputDir = args[0];
    String outputDir = args[1];

    FileInputFormat.setInputPaths(conf, inputDir);
    FileOutputFormat.setOutputPath(conf, new Path(outputDir));

    boolean flag = JobClient.runJob(conf).isSuccessful();

    System.out.println(args.length);*/

    if (args.length < 2) {
      System.out.println("args not right!");
      return;
    }

    JobConf conf = new JobConf(IpCount1.class);

    // set output key class
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    // set mapper & reducer class
    conf.setMapperClass(IpCounterMapper.class);
    conf.setCombinerClass(IpCounterReducer.class);
    conf.setReducerClass(IpCounterReducer.class);

    // set format
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    String inputDir = args[0];
    String outputDir = args[1];

    // FileInputFormat.setInputPaths(conf, "/user/hadoop/rongxin/locationinput/");
    FileInputFormat.setInputPaths(conf, inputDir);
    FileOutputFormat.setOutputPath(conf, new Path(outputDir));

    boolean flag = JobClient.runJob(conf).isSuccessful();
  }
  @Override
  public void configure(Configuration parameters) {
    super.configure(parameters);

    // read own parameters
    this.blockSize = parameters.getLong(BLOCK_SIZE_PARAMETER_KEY, NATIVE_BLOCK_SIZE);
    if (this.blockSize < 1 && this.blockSize != NATIVE_BLOCK_SIZE)
      throw new IllegalArgumentException("The block size parameter must be set and larger than 0.");
    if (this.blockSize > Integer.MAX_VALUE)
      throw new UnsupportedOperationException(
          "Currently only block size up to Integer.MAX_VALUE are supported");
  }
  @Override
  public void open(int taskNumber, int numTasks) throws IOException {
    super.open(taskNumber, numTasks);

    final long blockSize =
        this.blockSize == NATIVE_BLOCK_SIZE
            ? this.outputFilePath.getFileSystem().getDefaultBlockSize()
            : this.blockSize;

    this.blockBasedInput = new BlockBasedOutput(this.stream, (int) blockSize);
    this.dataOutputStream = new DataOutputStream(this.blockBasedInput);
  }
 /** Configure a job given argv. */
 public static boolean parseArgs(String[] argv, JobConf job) throws IOException {
   if (argv.length < 1) {
     return 0 == printUsage();
   }
   for (int i = 0; i < argv.length; ++i) {
     if (argv.length == i + 1) {
       System.out.println("ERROR: Required parameter missing from " + argv[i]);
       return 0 == printUsage();
     }
     try {
       if ("-m".equals(argv[i])) {
         job.setNumMapTasks(Integer.parseInt(argv[++i]));
       } else if ("-r".equals(argv[i])) {
         job.setNumReduceTasks(Integer.parseInt(argv[++i]));
       } else if ("-inFormat".equals(argv[i])) {
         job.setInputFormat(Class.forName(argv[++i]).asSubclass(InputFormat.class));
       } else if ("-outFormat".equals(argv[i])) {
         job.setOutputFormat(Class.forName(argv[++i]).asSubclass(OutputFormat.class));
       } else if ("-outKey".equals(argv[i])) {
         job.setOutputKeyClass(Class.forName(argv[++i]).asSubclass(WritableComparable.class));
       } else if ("-outValue".equals(argv[i])) {
         job.setOutputValueClass(Class.forName(argv[++i]).asSubclass(Writable.class));
       } else if ("-keepmap".equals(argv[i])) {
         job.set(
             org.apache.hadoop.mapreduce.GenericMRLoadGenerator.MAP_PRESERVE_PERCENT, argv[++i]);
       } else if ("-keepred".equals(argv[i])) {
         job.set(
             org.apache.hadoop.mapreduce.GenericMRLoadGenerator.REDUCE_PRESERVE_PERCENT,
             argv[++i]);
       } else if ("-outdir".equals(argv[i])) {
         FileOutputFormat.setOutputPath(job, new Path(argv[++i]));
       } else if ("-indir".equals(argv[i])) {
         FileInputFormat.addInputPaths(job, argv[++i]);
       } else if ("-inFormatIndirect".equals(argv[i])) {
         job.setClass(
             org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FORMAT,
             Class.forName(argv[++i]).asSubclass(InputFormat.class),
             InputFormat.class);
         job.setInputFormat(IndirectInputFormat.class);
       } else {
         System.out.println("Unexpected argument: " + argv[i]);
         return 0 == printUsage();
       }
     } catch (NumberFormatException except) {
       System.out.println("ERROR: Integer expected instead of " + argv[i]);
       return 0 == printUsage();
     } catch (Exception e) {
       throw (IOException) new IOException().initCause(e);
     }
   }
   return true;
 }
  public void testFailAbort() throws IOException {
    JobConf job = new JobConf();
    job.set(FileSystem.FS_DEFAULT_NAME_KEY, "faildel:///");
    job.setClass("fs.faildel.impl", FakeFileSystem.class, FileSystem.class);
    setConfForFileOutputCommitter(job);
    JobContext jContext = new JobContextImpl(job, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
    FileOutputCommitter committer = new FileOutputCommitter();
    FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext));

    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    String file = "test.txt";

    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    // write output
    FileSystem localFs = new FakeFileSystem();
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter);
    writeOutput(theRecordWriter, reporter);

    // do abort
    Throwable th = null;
    try {
      committer.abortTask(tContext);
    } catch (IOException ie) {
      th = ie;
    }
    assertNotNull(th);
    assertTrue(th instanceof IOException);
    assertTrue(th.getMessage().contains("fake delete failed"));
    File jobTmpDir = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString());
    File taskTmpDir = new File(jobTmpDir, "_" + taskID);
    File expectedFile = new File(taskTmpDir, file);
    assertTrue(expectedFile + " does not exists", expectedFile.exists());

    th = null;
    try {
      committer.abortJob(jContext, JobStatus.State.FAILED);
    } catch (IOException ie) {
      th = ie;
    }
    assertNotNull(th);
    assertTrue(th instanceof IOException);
    assertTrue(th.getMessage().contains("fake delete failed"));
    assertTrue("job temp dir does not exists", jobTmpDir.exists());
  }
Beispiel #23
0
  private static void runIOTest(Class<? extends Mapper> mapperClass, Path outputDir)
      throws IOException {
    JobConf job = new JobConf(fsConfig, DFSCIOTest.class);

    FileInputFormat.setInputPaths(job, CONTROL_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(mapperClass);
    job.setReducerClass(AccumulatingReducer.class);

    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
  }
Beispiel #24
0
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    job.setJarByClass(HadoopNBFilter.class);
    job.setJobName("hadoopnbfilter");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    boolean jobCompleted = job.waitForCompletion(true);
    return jobCompleted ? 0 : 1;
  }
Beispiel #25
0
  public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(Add1.class);
    conf.setJobName("sumar1");

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
  }
  // Main function
  public static void main(String[] args) throws Exception {
    // TODO Auto-generated method stub

    JobConf conf = new JobConf(ProcessUnits.class);
    conf.setJobName("max_eletricityunits");
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setMapperClass(EE_Mapper.class);
    conf.setCombinerClass(EE_Reducer.class);
    conf.setReducerClass(EE_Reducer.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
  }
  public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(WordCount.class);
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
  }
  public static void seekTest(FileSystem fs, boolean fastCheck) throws Exception {

    fs.delete(READ_DIR, true);

    JobConf job = new JobConf(conf, TestFileSystem.class);
    job.setBoolean("fs.test.fastCheck", fastCheck);

    FileInputFormat.setInputPaths(job, CONTROL_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(SeekMapper.class);
    job.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(job, READ_DIR);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
  }
Beispiel #29
0
  public int run(String[] args) throws Exception {
    if (args.length < 4) {
      System.out.println("ERROR: Please Enter args : input output type(text|seq) splitChar(9=\t)");
      return JobClient.SUCCESS;
    }
    String input = args[0];
    String output = args[1];
    String type = args[2];
    String splitChar = args[3];

    JobConf config = new JobConf(getConf(), getClass());
    config.set("user.split", splitChar);

    config.setJobName("File Filter -" + System.currentTimeMillis());
    config.setNumReduceTasks(10);
    config.setReducerClass(IdentityReducer.class);
    config.setMapperClass(FileTestMapper.class);
    if ("text".equals(type)) {
      config.setInputFormat(TextInputFormat.class);
      TextInputFormat.addInputPath(config, new Path(input));
    } else {
      config.setInputFormat(SequenceFileInputFormat.class);
      SequenceFileInputFormat.addInputPath(config, new Path(input));
    }
    config.setMapOutputKeyClass(Text.class);
    config.setMapOutputValueClass(Text.class);

    config.setOutputKeyClass(Text.class);
    config.setOutputValueClass(Text.class);

    // if output path exists then return
    FileSystem fs = FileSystem.get(config);
    Path outputPath = new Path(output);
    FileOutputFormat.setOutputPath(config, outputPath);

    if (!fs.exists(outputPath)) {
      JobClient.runJob(config);
    } else {
      System.out.println("You has finished this job today ! " + outputPath);
    }

    return JobClient.SUCCESS;
  }
Beispiel #30
0
  public int run(String[] args) throws Exception {
    Path tempDir = new Path("/user/akhfa/temp");

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(AuthorCounter.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, tempDir);
    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;
  }