Exemplo n.º 1
0
  public static void main(String[] args) {
    WordCountOptions options =
        PipelineOptionsFactory.fromArgs(args).withValidation().as(WordCountOptions.class);
    Pipeline p = Pipeline.create(options);

    // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the
    // static FormatAsTextFn() to the ParDo transform.
    p.apply("ReadLines", TextIO.Read.from(options.getInputFile()))
        .apply(new CountWords())
        .apply(MapElements.via(new FormatAsTextFn()))
        .apply("WriteCounts", TextIO.Write.to(options.getOutput()));

    p.run().waitUntilFinish();
  }
Exemplo n.º 2
0
  @Test
  public void testE2EBigtableWrite() throws Exception {
    final String tableName = bigtableOptions.getInstanceName().toTableNameStr(tableId);
    final String instanceName = bigtableOptions.getInstanceName().toString();
    final int numRows = 1000;
    final List<KV<ByteString, ByteString>> testData = generateTableData(numRows);

    createEmptyTable(instanceName, tableId);

    Pipeline p = Pipeline.create(options);
    p.apply(CountingInput.upTo(numRows))
        .apply(
            ParDo.of(
                new DoFn<Long, KV<ByteString, Iterable<Mutation>>>() {
                  @ProcessElement
                  public void processElement(ProcessContext c) {
                    int index = c.element().intValue();

                    Iterable<Mutation> mutations =
                        ImmutableList.of(
                            Mutation.newBuilder()
                                .setSetCell(
                                    Mutation.SetCell.newBuilder()
                                        .setValue(testData.get(index).getValue())
                                        .setFamilyName(COLUMN_FAMILY_NAME))
                                .build());
                    c.output(KV.of(testData.get(index).getKey(), mutations));
                  }
                }))
        .apply(BigtableIO.write().withBigtableOptions(bigtableOptions).withTableId(tableId));
    p.run();

    // Test number of column families and column family name equality
    Table table = getTable(tableName);
    assertThat(table.getColumnFamilies().keySet(), Matchers.hasSize(1));
    assertThat(table.getColumnFamilies(), Matchers.hasKey(COLUMN_FAMILY_NAME));

    // Test table data equality
    List<KV<ByteString, ByteString>> tableData = getTableData(tableName);
    assertThat(tableData, Matchers.containsInAnyOrder(testData.toArray()));
  }
  @Test
  public void testSequenceFile() throws Exception {
    populateFile();

    Pipeline p = Pipeline.create(pipelineOptions.getOptions());
    @SuppressWarnings("unchecked")
    Class<? extends FileInputFormat<IntWritable, Text>> inputFormatClass =
        (Class<? extends FileInputFormat<IntWritable, Text>>)
            (Class<?>) SequenceFileInputFormat.class;
    HadoopIO.Read.Bound<IntWritable, Text> read =
        HadoopIO.Read.from(
            inputFile.getAbsolutePath(), inputFormatClass, IntWritable.class, Text.class);
    PCollection<KV<IntWritable, Text>> input =
        p.apply(read)
            .setCoder(
                KvCoder.of(WritableCoder.of(IntWritable.class), WritableCoder.of(Text.class)));
    @SuppressWarnings("unchecked")
    Class<? extends FileOutputFormat<IntWritable, Text>> outputFormatClass =
        (Class<? extends FileOutputFormat<IntWritable, Text>>)
            (Class<?>) TemplatedSequenceFileOutputFormat.class;
    @SuppressWarnings("unchecked")
    HadoopIO.Write.Bound<IntWritable, Text> write =
        HadoopIO.Write.to(
            outputFile.getAbsolutePath(), outputFormatClass, IntWritable.class, Text.class);
    input.apply(write.withoutSharding());
    p.run();

    IntWritable key = new IntWritable();
    Text value = new Text();
    try (Reader reader =
        new Reader(new Configuration(), Reader.file(new Path(outputFile.toURI())))) {
      int i = 0;
      while (reader.next(key, value)) {
        assertEquals(i, key.get());
        assertEquals("value-" + i, value.toString());
        i++;
      }
    }
  }