@Override @SuppressWarnings("unchecked") public void translateNode( Flatten.FlattenPCollectionList<T> transform, FlinkBatchTranslationContext context) { List<TaggedPValue> allInputs = context.getInputs(transform); DataSet<WindowedValue<T>> result = null; if (allInputs.isEmpty()) { // create an empty dummy source to satisfy downstream operations // we cannot create an empty source in Flink, therefore we have to // add the flatMap that simply never forwards the single element DataSource<String> dummySource = context.getExecutionEnvironment().fromElements("dummy"); result = dummySource .flatMap( new FlatMapFunction<String, WindowedValue<T>>() { @Override public void flatMap(String s, Collector<WindowedValue<T>> collector) throws Exception { // never return anything } }) .returns( new CoderTypeInformation<>( WindowedValue.getFullCoder( (Coder<T>) VoidCoder.of(), GlobalWindow.Coder.INSTANCE))); } else { for (TaggedPValue taggedPc : allInputs) { checkArgument( taggedPc.getValue() instanceof PCollection, "Got non-PCollection input to flatten: %s of type %s", taggedPc.getValue(), taggedPc.getValue().getClass().getSimpleName()); PCollection<T> collection = (PCollection<T>) taggedPc.getValue(); DataSet<WindowedValue<T>> current = context.getInputDataSet(collection); if (result == null) { result = current; } else { result = result.union(current); } } } // insert a dummy filter, there seems to be a bug in Flink // that produces duplicate elements after the union in some cases // if we don't result = result .filter( new FilterFunction<WindowedValue<T>>() { @Override public boolean filter(WindowedValue<T> tWindowedValue) throws Exception { return true; } }) .name("UnionFixFilter"); context.setOutputDataSet(context.getOutput(transform), result); }
public static void main(String args[]) { String path = "src/main/resources/gmm_data2.txt"; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSource<String> data = env.readTextFile(path); DataSet<DenseVector> parsedData = data.map(new ReadVectors()); // MultivariateNormalDistribution mnd= new MultivariateNormalDistribution(null, null); DataSet<Tuple1<Double>> val = parsedData.map(new TestVectors()); // DataSet<Tuple1<Double>> val2 = parsedData.map(new TestDot()); try { val.print(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } // DataSet<DenseMatrix> matrix = data.map(new ReadMatrix()); }