Пример #1
0
 private PTable<Integer, String> readTable(Pipeline pipeline, String filename) {
   try {
     return pipeline
         .readTextFile(tmpDir.copyResourceFileName(filename))
         .parallelDo(
             "asTable",
             new LineSplitter(),
             Writables.tableOf(Writables.ints(), Writables.strings()));
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
 }
  @Override
  public int run(final String[] args) throws Exception {
    createTable();
    final Configuration config = getConf();
    final Pipeline pipeline =
        new MRPipeline(CrunchStockDateInserter.class, "PipelineWithFilterFn", config);
    PCollection<String> lines = pipeline.readTextFile(Constants.HDFS_INPUT_PATH + "/2004_2014.csv");
    PCollection<Put> resultPut = CrunchUtils.returnDates(lines);
    System.out.println("********** size ************ : " + resultPut.getSize());

    pipeline.write(resultPut, new HBaseTarget(Constants.STOCK_DATES_TABLE));
    PipelineResult result = pipeline.done();
    return result.succeeded() ? 0 : 1;
  }
  public int run(String[] args) throws Exception {

    Pipeline pipeline = new MRPipeline(SecondarySortingExample.class);
    // Read input
    PCollection<String> lines = pipeline.readTextFile(args[0]);
    // Split each line and count them
    PTable<String, Long> wordcount = lines.parallelDo(new Tokenizer(), Writables.strings()).count();
    // Sort
    PCollection<Pair<String, Long>> sorted =
        Sort.sortPairs(wordcount, ColumnOrder.by(1, Sort.Order.DESCENDING));
    // Write the output
    sorted.write(To.textFile(args[0]));
    // Kick off execution
    PipelineResult result = pipeline.done();
    return result.succeeded() ? 0 : 1;
  }
Пример #4
0
  public static void main(String[] args) throws Exception {

    Pipeline pipeline = new MRPipeline(WordCount.class);
    PCollection<String> lines = pipeline.readTextFile(args[0]);

    PCollection<String> words =
        lines.parallelDo(
            "my splitter",
            new DoFn<String, String>() {
              public void process(String line, Emitter<String> emitter) {
                for (String word : line.split("\\s+")) {
                  emitter.emit(word);
                }
              }
            },
            Writables.strings());

    PTable<String, Long> counts = Aggregate.count(words);

    pipeline.writeTextFile(counts, args[1]);
    pipeline.run();
  }
Пример #5
0
 private PCollection<String> getPCollection(Pipeline pipeline) throws IOException {
   String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
   PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath);
   return shakespeare;
 }