示例#1
0
  public static void main(String[] args) throws Exception {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));

    pipeline
        .apply(new ReadDocuments(listInputDocuments(options)))
        .apply(new ComputeTfIdf())
        .apply(new WriteTfIdf(options.getOutput()));

    pipeline.run();
  }
  /** Sets up and starts streaming pipeline. */
  public static void main(String[] args) {
    PubsubFileInjectorOptions options =
        PipelineOptionsFactory.fromArgs(args).withValidation().as(PubsubFileInjectorOptions.class);

    Pipeline pipeline = Pipeline.create(options);

    pipeline
        .apply(TextIO.Read.from(options.getInput()))
        .apply(
            IntraBundleParallelization.of(PubsubFileInjector.publish(options.getOutputTopic()))
                .withMaxParallelism(20));

    pipeline.run();
  }
  public static void main(String[] args) {
    PipelineOptionsFactory.register(KafkaStreamingWordCountOptions.class);
    KafkaStreamingWordCountOptions options =
        PipelineOptionsFactory.fromArgs(args).as(KafkaStreamingWordCountOptions.class);
    options.setJobName("KafkaExample - WindowSize: " + options.getWindowSize() + " seconds");
    options.setStreaming(true);
    options.setCheckpointingInterval(1000L);
    options.setNumberOfExecutionRetries(5);
    options.setExecutionRetryDelay(3000L);
    options.setRunner(FlinkPipelineRunner.class);

    System.out.println(
        options.getKafkaTopic()
            + " "
            + options.getZookeeper()
            + " "
            + options.getBroker()
            + " "
            + options.getGroup());
    Pipeline pipeline = Pipeline.create(options);

    Properties p = new Properties();
    p.setProperty("zookeeper.connect", options.getZookeeper());
    p.setProperty("bootstrap.servers", options.getBroker());
    p.setProperty("group.id", options.getGroup());

    // this is the Flink consumer that reads the input to
    // the program from a kafka topic.
    FlinkKafkaConsumer08<String> kafkaConsumer =
        new FlinkKafkaConsumer08<>(options.getKafkaTopic(), new SimpleStringSchema(), p);

    PCollection<String> words =
        pipeline
            .apply(Read.from(new UnboundedFlinkSource<>(kafkaConsumer)).named("StreamingWordCount"))
            .apply(ParDo.of(new ExtractWordsFn()))
            .apply(
                Window.<String>into(
                        FixedWindows.of(Duration.standardSeconds(options.getWindowSize())))
                    .triggering(AfterWatermark.pastEndOfWindow())
                    .withAllowedLateness(Duration.ZERO)
                    .discardingFiredPanes());

    PCollection<KV<String, Long>> wordCounts = words.apply(Count.<String>perElement());

    wordCounts.apply(ParDo.of(new FormatAsStringFn())).apply(TextIO.Write.to("./outputKafka.txt"));

    pipeline.run();
  }