private PTable<Integer, String> readTable(Pipeline pipeline, String filename) { try { return pipeline .readTextFile(tmpDir.copyResourceFileName(filename)) .parallelDo( "asTable", new LineSplitter(), Writables.tableOf(Writables.ints(), Writables.strings())); } catch (IOException e) { throw new RuntimeException(e); } }
@Override public int run(final String[] args) throws Exception { createTable(); final Configuration config = getConf(); final Pipeline pipeline = new MRPipeline(CrunchStockDateInserter.class, "PipelineWithFilterFn", config); PCollection<String> lines = pipeline.readTextFile(Constants.HDFS_INPUT_PATH + "/2004_2014.csv"); PCollection<Put> resultPut = CrunchUtils.returnDates(lines); System.out.println("********** size ************ : " + resultPut.getSize()); pipeline.write(resultPut, new HBaseTarget(Constants.STOCK_DATES_TABLE)); PipelineResult result = pipeline.done(); return result.succeeded() ? 0 : 1; }
public int run(String[] args) throws Exception { Pipeline pipeline = new MRPipeline(SecondarySortingExample.class); // Read input PCollection<String> lines = pipeline.readTextFile(args[0]); // Split each line and count them PTable<String, Long> wordcount = lines.parallelDo(new Tokenizer(), Writables.strings()).count(); // Sort PCollection<Pair<String, Long>> sorted = Sort.sortPairs(wordcount, ColumnOrder.by(1, Sort.Order.DESCENDING)); // Write the output sorted.write(To.textFile(args[0])); // Kick off execution PipelineResult result = pipeline.done(); return result.succeeded() ? 0 : 1; }
public static void main(String[] args) throws Exception { Pipeline pipeline = new MRPipeline(WordCount.class); PCollection<String> lines = pipeline.readTextFile(args[0]); PCollection<String> words = lines.parallelDo( "my splitter", new DoFn<String, String>() { public void process(String line, Emitter<String> emitter) { for (String word : line.split("\\s+")) { emitter.emit(word); } } }, Writables.strings()); PTable<String, Long> counts = Aggregate.count(words); pipeline.writeTextFile(counts, args[1]); pipeline.run(); }
private PCollection<String> getPCollection(Pipeline pipeline) throws IOException { String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt"); PCollection<String> shakespeare = pipeline.readTextFile(shakesInputPath); return shakespeare; }