private static <IN, OUT, K> PlanUnwrappingReduceGroupOperator<IN, OUT, K> translateSelectorFunctionDistinct( Keys.SelectorFunctionKeys<IN, ?> rawKeys, RichGroupReduceFunction<IN, OUT> function, TypeInformation<IN> inputType, TypeInformation<OUT> outputType, String name, Operator<IN> input) { @SuppressWarnings("unchecked") final Keys.SelectorFunctionKeys<IN, K> keys = (Keys.SelectorFunctionKeys<IN, K>) rawKeys; TypeInformation<Tuple2<K, IN>> typeInfoWithKey = new TupleTypeInfo<Tuple2<K, IN>>(keys.getKeyType(), inputType); KeyExtractingMapper<IN, K> extractor = new KeyExtractingMapper<IN, K>(keys.getKeyExtractor()); PlanUnwrappingReduceGroupOperator<IN, OUT, K> reducer = new PlanUnwrappingReduceGroupOperator<IN, OUT, K>( function, keys, name, outputType, typeInfoWithKey, true); MapOperatorBase<IN, Tuple2<K, IN>, MapFunction<IN, Tuple2<K, IN>>> mapper = new MapOperatorBase<IN, Tuple2<K, IN>, MapFunction<IN, Tuple2<K, IN>>>( extractor, new UnaryOperatorInformation<IN, Tuple2<K, IN>>(inputType, typeInfoWithKey), "Key Extractor"); reducer.setInput(mapper); mapper.setInput(input); // set the mapper's parallelism to the input parallelism to make sure it is chained mapper.setDegreeOfParallelism(input.getDegreeOfParallelism()); return reducer; }
@Override protected org.apache.flink.api.common.operators.base.GroupReduceOperatorBase<?, T, ?> translateToDataFlow(Operator<T> input) { final RichGroupReduceFunction<T, T> function = new DistinctFunction<T>(); String name = getName() != null ? getName() : "Distinct at " + distinctLocationName; if (keys instanceof Keys.ExpressionKeys) { int[] logicalKeyPositions = keys.computeLogicalKeyPositions(); UnaryOperatorInformation<T, T> operatorInfo = new UnaryOperatorInformation<T, T>(getInputType(), getResultType()); GroupReduceOperatorBase<T, T, GroupReduceFunction<T, T>> po = new GroupReduceOperatorBase<T, T, GroupReduceFunction<T, T>>( function, operatorInfo, logicalKeyPositions, name); po.setCombinable(true); po.setInput(input); po.setDegreeOfParallelism(getParallelism()); // make sure that distinct preserves the partitioning for the fields on which they operate if (getType().isTupleType()) { SingleInputSemanticProperties sProps = new SingleInputSemanticProperties(); for (int field : keys.computeLogicalKeyPositions()) { sProps.addForwardedField(field, field); } po.setSemanticProperties(sProps); } return po; } else if (keys instanceof Keys.SelectorFunctionKeys) { @SuppressWarnings("unchecked") Keys.SelectorFunctionKeys<T, ?> selectorKeys = (Keys.SelectorFunctionKeys<T, ?>) keys; PlanUnwrappingReduceGroupOperator<T, T, ?> po = translateSelectorFunctionDistinct( selectorKeys, function, getInputType(), getResultType(), name, input); po.setDegreeOfParallelism(this.getParallelism()); return po; } else { throw new UnsupportedOperationException("Unrecognized key type."); } }