Java TextIO.Write примеры использования

Язык программирования: Java

Пространство имен/Пакет: com.google.cloud.dataflow.sdk.io

Класс/Тип: TextIO.Write

Примеров на hotexamples.com: 6

Java TextIO.Write - 6 примеров найдено. Это лучшие примеры Java кода для com.google.cloud.dataflow.sdk.io.TextIO.Write, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

named(3)

to(3)

Пример #1

Показать файл

Файл: DataflowPipelineTranslatorTest.java Проект: Jason-CloudTP/DataflowJavaSDK

  private DataflowPipeline buildPipeline(DataflowPipelineOptions options) {
    DataflowPipeline p = DataflowPipeline.create(options);

    p.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/object"))
        .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/object"));

    return p;
  }

Пример #2

Показать файл

Файл: JoinExamplesITCase.java Проект: liaobs/flink-dataflow

  @Override
  protected void testProgram() throws Exception {

    Pipeline p = FlinkTestPipeline.create();

    PCollection<TableRow> input1 = p.apply(Create.of(EVENT_ARRAY));
    PCollection<TableRow> input2 = p.apply(Create.of(CC_ARRAY));

    PCollection<String> output = JoinExamples.joinEvents(input1, input2);

    output.apply(TextIO.Write.to(resultPath));

    p.run();
  }

Пример #3

Показать файл

Файл: KafkaWindowedWordCountExample.java Проект: Ravi-Macha/flink-dataflow

  public static void main(String[] args) {
    PipelineOptionsFactory.register(KafkaStreamingWordCountOptions.class);
    KafkaStreamingWordCountOptions options =
        PipelineOptionsFactory.fromArgs(args).as(KafkaStreamingWordCountOptions.class);
    options.setJobName("KafkaExample - WindowSize: " + options.getWindowSize() + " seconds");
    options.setStreaming(true);
    options.setCheckpointingInterval(1000L);
    options.setNumberOfExecutionRetries(5);
    options.setExecutionRetryDelay(3000L);
    options.setRunner(FlinkPipelineRunner.class);

    System.out.println(
        options.getKafkaTopic()
            + " "
            + options.getZookeeper()
            + " "
            + options.getBroker()
            + " "
            + options.getGroup());
    Pipeline pipeline = Pipeline.create(options);

    Properties p = new Properties();
    p.setProperty("zookeeper.connect", options.getZookeeper());
    p.setProperty("bootstrap.servers", options.getBroker());
    p.setProperty("group.id", options.getGroup());

    // this is the Flink consumer that reads the input to
    // the program from a kafka topic.
    FlinkKafkaConsumer08<String> kafkaConsumer =
        new FlinkKafkaConsumer08<>(options.getKafkaTopic(), new SimpleStringSchema(), p);

    PCollection<String> words =
        pipeline
            .apply(Read.from(new UnboundedFlinkSource<>(kafkaConsumer)).named("StreamingWordCount"))
            .apply(ParDo.of(new ExtractWordsFn()))
            .apply(
                Window.<String>into(
                        FixedWindows.of(Duration.standardSeconds(options.getWindowSize())))
                    .triggering(AfterWatermark.pastEndOfWindow())
                    .withAllowedLateness(Duration.ZERO)
                    .discardingFiredPanes());

    PCollection<KV<String, Long>> wordCounts = words.apply(Count.<String>perElement());

    wordCounts.apply(ParDo.of(new FormatAsStringFn())).apply(TextIO.Write.to("./outputKafka.txt"));

    pipeline.run();
  }

Пример #4

Показать файл

Файл: DataflowPipelineTranslatorTest.java Проект: Jason-CloudTP/DataflowJavaSDK

  /** Returns a Step for a DoFn by creating and translating a pipeline. */
  private static Step createPredefinedStep() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    DataflowPipeline pipeline = DataflowPipeline.create(options);
    String stepName = "DoFn1";
    pipeline
        .apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/in"))
        .apply(ParDo.of(new NoOpFn()).named(stepName))
        .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/out"));
    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList()).getJob();

    assertEquals(3, job.getSteps().size());
    Step step = job.getSteps().get(1);
    assertEquals(stepName, getString(step.getProperties(), PropertyNames.USER_NAME));
    return step;
  }

Пример #5

Показать файл

Файл: DataflowPipelineTranslatorTest.java Проект: Jason-CloudTP/DataflowJavaSDK

  @Test
  public void testPredefinedAddStep() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();

    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    DataflowPipelineTranslator.registerTransformTranslator(
        EmbeddedTransform.class, new EmbeddedTranslator());

    // Create a predefined step using another pipeline
    Step predefinedStep = createPredefinedStep();

    // Create a pipeline that the predefined step will be embedded into
    DataflowPipeline pipeline = DataflowPipeline.create(options);
    pipeline
        .apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/in"))
        .apply(ParDo.of(new NoOpFn()))
        .apply(new EmbeddedTransform(predefinedStep.clone()))
        .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/out"));
    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList()).getJob();

    List<Step> steps = job.getSteps();
    assertEquals(4, steps.size());

    // The input to the embedded step should match the output of the step before
    Map<String, Object> step1Out = getOutputPortReference(steps.get(1));
    Map<String, Object> step2In =
        getDictionary(steps.get(2).getProperties(), PropertyNames.PARALLEL_INPUT);
    assertEquals(step1Out, step2In);

    // The output from the embedded step should match the input of the step after
    Map<String, Object> step2Out = getOutputPortReference(steps.get(2));
    Map<String, Object> step3In =
        getDictionary(steps.get(3).getProperties(), PropertyNames.PARALLEL_INPUT);
    assertEquals(step2Out, step3In);

    // The step should not have been modified other than remapping the input
    Step predefinedStepClone = predefinedStep.clone();
    Step embeddedStepClone = steps.get(2).clone();
    predefinedStepClone.getProperties().remove(PropertyNames.PARALLEL_INPUT);
    embeddedStepClone.getProperties().remove(PropertyNames.PARALLEL_INPUT);
    assertEquals(predefinedStepClone, embeddedStepClone);
  }

Пример #6

Показать файл

Файл: TfIdf.java Проект: ssesha/DataflowJavaSDK

    @Override
    public PDone apply(PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf) {
      return wordToUriAndTfIdf
          .apply(
              ParDo.named("Format")
                  .of(
                      new DoFn<KV<String, KV<URI, Double>>, String>() {
                        private static final long serialVersionUID = 0;

                        @Override
                        public void processElement(ProcessContext c) {
                          c.output(
                              String.format(
                                  "%s,\t%s,\t%f",
                                  c.element().getKey(),
                                  c.element().getValue().getKey(),
                                  c.element().getValue().getValue()));
                        }
                      }))
          .apply(TextIO.Write.to(output).withSuffix(".csv"));
    }