@Override protected void testProgram() throws Exception { Pipeline p = FlinkTestPipeline.create(); PCollection<TableRow> input1 = p.apply(Create.of(EVENT_ARRAY)); PCollection<TableRow> input2 = p.apply(Create.of(CC_ARRAY)); PCollection<String> output = JoinExamples.joinEvents(input1, input2); output.apply(TextIO.Write.to(resultPath)); p.run(); }
public static void main(String[] args) { PipelineOptionsFactory.register(KafkaStreamingWordCountOptions.class); KafkaStreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args).as(KafkaStreamingWordCountOptions.class); options.setJobName("KafkaExample - WindowSize: " + options.getWindowSize() + " seconds"); options.setStreaming(true); options.setCheckpointingInterval(1000L); options.setNumberOfExecutionRetries(5); options.setExecutionRetryDelay(3000L); options.setRunner(FlinkPipelineRunner.class); System.out.println( options.getKafkaTopic() + " " + options.getZookeeper() + " " + options.getBroker() + " " + options.getGroup()); Pipeline pipeline = Pipeline.create(options); Properties p = new Properties(); p.setProperty("zookeeper.connect", options.getZookeeper()); p.setProperty("bootstrap.servers", options.getBroker()); p.setProperty("group.id", options.getGroup()); // this is the Flink consumer that reads the input to // the program from a kafka topic. FlinkKafkaConsumer08<String> kafkaConsumer = new FlinkKafkaConsumer08<>(options.getKafkaTopic(), new SimpleStringSchema(), p); PCollection<String> words = pipeline .apply(Read.from(new UnboundedFlinkSource<>(kafkaConsumer)).named("StreamingWordCount")) .apply(ParDo.of(new ExtractWordsFn())) .apply( Window.<String>into( FixedWindows.of(Duration.standardSeconds(options.getWindowSize()))) .triggering(AfterWatermark.pastEndOfWindow()) .withAllowedLateness(Duration.ZERO) .discardingFiredPanes()); PCollection<KV<String, Long>> wordCounts = words.apply(Count.<String>perElement()); wordCounts.apply(ParDo.of(new FormatAsStringFn())).apply(TextIO.Write.to("./outputKafka.txt")); pipeline.run(); }
@Override public PDone apply(PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf) { return wordToUriAndTfIdf .apply( ParDo.named("Format") .of( new DoFn<KV<String, KV<URI, Double>>, String>() { private static final long serialVersionUID = 0; @Override public void processElement(ProcessContext c) { c.output( String.format( "%s,\t%s,\t%f", c.element().getKey(), c.element().getValue().getKey(), c.element().getValue().getValue())); } })) .apply(TextIO.Write.to(output).withSuffix(".csv")); }