@Override public void run() { // we manually feed data into the Kafka sink FlinkKafkaProducer<String> producer = null; try { producer = new FlinkKafkaProducer<>(kafkaConnectionString, topic, new SimpleStringSchema()); producer.setRuntimeContext(new MockRuntimeContext(1, 0)); producer.open(new Configuration()); final StringBuilder bld = new StringBuilder(); final Random rnd = new Random(); while (running) { bld.setLength(0); int len = rnd.nextInt(100) + 1; for (int i = 0; i < len; i++) { bld.append((char) (rnd.nextInt(20) + 'a')); } String next = bld.toString(); producer.invoke(next); } } catch (Throwable t) { this.error = t; } finally { if (producer != null) { try { producer.close(); } catch (Throwable t) { // ignore } } } }
public static void generateLongStringTupleSequence( StreamExecutionEnvironment env, String brokerConnection, String topic, int numPartitions, final int from, final int to) throws Exception { TypeInformation<Tuple2<Integer, Integer>> resultType = TypeInfoParser.parse("Tuple2<Integer, Integer>"); env.setParallelism(numPartitions); env.getConfig().disableSysoutLogging(); env.setNumberOfExecutionRetries(0); DataStream<Tuple2<Integer, Integer>> stream = env.addSource( new RichParallelSourceFunction<Tuple2<Integer, Integer>>() { private volatile boolean running = true; @Override public void run(SourceContext<Tuple2<Integer, Integer>> ctx) throws Exception { int cnt = from; int partition = getRuntimeContext().getIndexOfThisSubtask(); while (running && cnt <= to) { ctx.collect(new Tuple2<Integer, Integer>(partition, cnt)); cnt++; } } @Override public void cancel() { running = false; } }); stream.addSink( new FlinkKafkaProducer<>( topic, new TypeInformationSerializationSchema<>(resultType, env.getConfig()), FlinkKafkaProducer.getPropertiesFromBrokerList(brokerConnection), new Tuple2Partitioner(numPartitions))); env.execute("Data generator (Int, Int) stream to topic " + topic); }
public static void generateRandomizedIntegerSequence( StreamExecutionEnvironment env, String brokerConnection, String topic, final int numPartitions, final int numElements, final boolean randomizeOrder) throws Exception { env.setParallelism(numPartitions); env.getConfig().disableSysoutLogging(); env.setNumberOfExecutionRetries(0); DataStream<Integer> stream = env.addSource( new RichParallelSourceFunction<Integer>() { private volatile boolean running = true; @Override public void run(SourceContext<Integer> ctx) { // create a sequence int[] elements = new int[numElements]; for (int i = 0, val = getRuntimeContext().getIndexOfThisSubtask(); i < numElements; i++, val += getRuntimeContext().getNumberOfParallelSubtasks()) { elements[i] = val; } // scramble the sequence if (randomizeOrder) { Random rnd = new Random(); for (int i = 0; i < elements.length; i++) { int otherPos = rnd.nextInt(elements.length); int tmp = elements[i]; elements[i] = elements[otherPos]; elements[otherPos] = tmp; } } // emit the sequence int pos = 0; while (running && pos < elements.length) { ctx.collect(elements[pos++]); } } @Override public void cancel() { running = false; } }); stream .rebalance() .addSink( new FlinkKafkaProducer<>( topic, new TypeInformationSerializationSchema<>( BasicTypeInfo.INT_TYPE_INFO, env.getConfig()), FlinkKafkaProducer.getPropertiesFromBrokerList(brokerConnection), new KafkaPartitioner() { @Override public int partition(Object key, int numPartitions) { return ((Integer) key) % numPartitions; } })); env.execute("Scrambles int sequence generator"); }