@Override
  public int run(final String[] args) throws Exception {
    createTable();
    final Configuration config = getConf();
    final Pipeline pipeline =
        new MRPipeline(CrunchStockDateInserter.class, "PipelineWithFilterFn", config);
    PCollection<String> lines = pipeline.readTextFile(Constants.HDFS_INPUT_PATH + "/2004_2014.csv");
    PCollection<Put> resultPut = CrunchUtils.returnDates(lines);
    System.out.println("********** size ************ : " + resultPut.getSize());

    pipeline.write(resultPut, new HBaseTarget(Constants.STOCK_DATES_TABLE));
    PipelineResult result = pipeline.done();
    return result.succeeded() ? 0 : 1;
  }
  public int run(String[] args) throws Exception {

    Pipeline pipeline = new MRPipeline(SecondarySortingExample.class);
    // Read input
    PCollection<String> lines = pipeline.readTextFile(args[0]);
    // Split each line and count them
    PTable<String, Long> wordcount = lines.parallelDo(new Tokenizer(), Writables.strings()).count();
    // Sort
    PCollection<Pair<String, Long>> sorted =
        Sort.sortPairs(wordcount, ColumnOrder.by(1, Sort.Order.DESCENDING));
    // Write the output
    sorted.write(To.textFile(args[0]));
    // Kick off execution
    PipelineResult result = pipeline.done();
    return result.succeeded() ? 0 : 1;
  }
Пример #3
0
  @Override
  public int execute(Configuration conf) throws Exception {
    Pipeline p = pipelineParams.create(SampleCommand.class, conf);
    PCollection<Record> elements = inputParams.getRecords(p);

    if (sampleSize > 0 && samplingProbability > 0.0) {
      throw new IllegalArgumentException("--size and --prob are mutually exclusive options.");
    }
    PCollection<Record> sample;
    if (sampleSize > 0) {
      sample = ReservoirSampling.sample(elements, sampleSize);
    } else if (samplingProbability > 0.0 && samplingProbability < 1.0) {
      sample = Sample.sample(elements, samplingProbability);
    } else {
      throw new IllegalArgumentException(
          String.format(
              "Invalid input args: sample size = %d, sample prob = %.4f",
              sampleSize, samplingProbability));
    }
    outputParams.write(sample, sampleFile);

    PipelineResult pr = p.done();
    return pr.succeeded() ? 0 : 1;
  }