@Test public void unionWriteShouldNotThrowNPE() throws IOException { String outputPath1 = tmpDir.getFileName("output1"); String outputPath2 = tmpDir.getFileName("output2"); String outputPath3 = tmpDir.getFileName("output3"); if (typeFamily == AvroTypeFamily.getInstance()) { union.write(To.avroFile(outputPath1)); pipeline.write(union, To.avroFile(outputPath2)); pipeline.run(); checkFileContents(outputPath1); checkFileContents(outputPath2); } else { union.write(To.textFile(outputPath1)); pipeline.write(union, To.textFile(outputPath2)); pipeline.writeTextFile(union, outputPath3); pipeline.run(); checkFileContents(outputPath1); checkFileContents(outputPath2); checkFileContents(outputPath3); } }
@Test public void materializedColShouldBeWritten() throws Exception { File textFile = tmpDir.copyResourceFile("shakes.txt"); Pipeline pipeline = new MRPipeline(MRPipelineIT.class, tmpDir.getDefaultConfiguration()); PCollection<String> genericCollection = pipeline.readTextFile(textFile.getAbsolutePath()); pipeline.run(); PCollection<String> filter = genericCollection.filter("Filtering data", FilterFns.<String>ACCEPT_ALL()); filter.materialize(); pipeline.run(); File file = tmpDir.getFile("output.txt"); Target outFile = To.textFile(file.getAbsolutePath()); PCollection<String> write = filter.write(outFile); write.materialize(); pipeline.run(); }
public int run(String[] args) throws Exception { Pipeline pipeline = new MRPipeline(SecondarySortingExample.class); // Read input PCollection<String> lines = pipeline.readTextFile(args[0]); // Split each line and count them PTable<String, Long> wordcount = lines.parallelDo(new Tokenizer(), Writables.strings()).count(); // Sort PCollection<Pair<String, Long>> sorted = Sort.sortPairs(wordcount, ColumnOrder.by(1, Sort.Order.DESCENDING)); // Write the output sorted.write(To.textFile(args[0])); // Kick off execution PipelineResult result = pipeline.done(); return result.succeeded() ? 0 : 1; }