예제 #1
0
  @Test
  public void unionWriteShouldNotThrowNPE() throws IOException {
    String outputPath1 = tmpDir.getFileName("output1");
    String outputPath2 = tmpDir.getFileName("output2");
    String outputPath3 = tmpDir.getFileName("output3");

    if (typeFamily == AvroTypeFamily.getInstance()) {
      union.write(To.avroFile(outputPath1));
      pipeline.write(union, To.avroFile(outputPath2));

      pipeline.run();

      checkFileContents(outputPath1);
      checkFileContents(outputPath2);

    } else {

      union.write(To.textFile(outputPath1));
      pipeline.write(union, To.textFile(outputPath2));
      pipeline.writeTextFile(union, outputPath3);

      pipeline.run();

      checkFileContents(outputPath1);
      checkFileContents(outputPath2);
      checkFileContents(outputPath3);
    }
  }
예제 #2
0
 @Test
 public void materializedColShouldBeWritten() throws Exception {
   File textFile = tmpDir.copyResourceFile("shakes.txt");
   Pipeline pipeline = new MRPipeline(MRPipelineIT.class, tmpDir.getDefaultConfiguration());
   PCollection<String> genericCollection = pipeline.readTextFile(textFile.getAbsolutePath());
   pipeline.run();
   PCollection<String> filter =
       genericCollection.filter("Filtering data", FilterFns.<String>ACCEPT_ALL());
   filter.materialize();
   pipeline.run();
   File file = tmpDir.getFile("output.txt");
   Target outFile = To.textFile(file.getAbsolutePath());
   PCollection<String> write = filter.write(outFile);
   write.materialize();
   pipeline.run();
 }
  public int run(String[] args) throws Exception {

    Pipeline pipeline = new MRPipeline(SecondarySortingExample.class);
    // Read input
    PCollection<String> lines = pipeline.readTextFile(args[0]);
    // Split each line and count them
    PTable<String, Long> wordcount = lines.parallelDo(new Tokenizer(), Writables.strings()).count();
    // Sort
    PCollection<Pair<String, Long>> sorted =
        Sort.sortPairs(wordcount, ColumnOrder.by(1, Sort.Order.DESCENDING));
    // Write the output
    sorted.write(To.textFile(args[0]));
    // Kick off execution
    PipelineResult result = pipeline.done();
    return result.succeeded() ? 0 : 1;
  }