@Override protected org.apache.flink.api.common.operators.base.GroupReduceOperatorBase<?, T, ?> translateToDataFlow(Operator<T> input) { final RichGroupReduceFunction<T, T> function = new DistinctFunction<T>(); String name = getName() != null ? getName() : "Distinct at " + distinctLocationName; if (keys instanceof Keys.ExpressionKeys) { int[] logicalKeyPositions = keys.computeLogicalKeyPositions(); UnaryOperatorInformation<T, T> operatorInfo = new UnaryOperatorInformation<T, T>(getInputType(), getResultType()); GroupReduceOperatorBase<T, T, GroupReduceFunction<T, T>> po = new GroupReduceOperatorBase<T, T, GroupReduceFunction<T, T>>( function, operatorInfo, logicalKeyPositions, name); po.setCombinable(true); po.setInput(input); po.setDegreeOfParallelism(getParallelism()); // make sure that distinct preserves the partitioning for the fields on which they operate if (getType().isTupleType()) { SingleInputSemanticProperties sProps = new SingleInputSemanticProperties(); for (int field : keys.computeLogicalKeyPositions()) { sProps.addForwardedField(field, field); } po.setSemanticProperties(sProps); } return po; } else if (keys instanceof Keys.SelectorFunctionKeys) { @SuppressWarnings("unchecked") Keys.SelectorFunctionKeys<T, ?> selectorKeys = (Keys.SelectorFunctionKeys<T, ?>) keys; PlanUnwrappingReduceGroupOperator<T, T, ?> po = translateSelectorFunctionDistinct( selectorKeys, function, getInputType(), getResultType(), name, input); po.setDegreeOfParallelism(this.getParallelism()); return po; } else { throw new UnsupportedOperationException("Unrecognized key type."); } }
private <P> PartitionOperator( DataSet<T> input, PartitionMethod pMethod, Keys<T> pKeys, Partitioner<P> customPartitioner, TypeInformation<P> partitionerTypeInfo, String partitionLocationName) { super(input, input.getType()); Preconditions.checkNotNull(pMethod); Preconditions.checkArgument( pKeys != null || pMethod == PartitionMethod.REBALANCE, "Partitioning requires keys"); Preconditions.checkArgument( pMethod != PartitionMethod.CUSTOM || customPartitioner != null, "Custom partioning requires a partitioner."); Preconditions.checkArgument( pMethod != PartitionMethod.RANGE, "Range partitioning is not yet supported"); if (pKeys instanceof Keys.ExpressionKeys<?> && !(input.getType() instanceof CompositeType)) { throw new IllegalArgumentException( "Hash Partitioning with key fields only possible on Tuple or POJO DataSets"); } if (customPartitioner != null) { pKeys.validateCustomPartitioner(customPartitioner, partitionerTypeInfo); } this.pMethod = pMethod; this.pKeys = pKeys; this.partitionLocationName = partitionLocationName; this.customPartitioner = customPartitioner; }
protected org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> translateToDataFlow( Operator<T> input) { String name = "Partition at " + partitionLocationName; // distinguish between partition types if (pMethod == PartitionMethod.REBALANCE) { UnaryOperatorInformation<T, T> operatorInfo = new UnaryOperatorInformation<T, T>(getType(), getType()); PartitionOperatorBase<T> noop = new PartitionOperatorBase<T>(operatorInfo, pMethod, name); noop.setInput(input); noop.setParallelism(getParallelism()); return noop; } else if (pMethod == PartitionMethod.HASH || pMethod == PartitionMethod.CUSTOM) { if (pKeys instanceof Keys.ExpressionKeys) { int[] logicalKeyPositions = pKeys.computeLogicalKeyPositions(); UnaryOperatorInformation<T, T> operatorInfo = new UnaryOperatorInformation<T, T>(getType(), getType()); PartitionOperatorBase<T> noop = new PartitionOperatorBase<T>(operatorInfo, pMethod, logicalKeyPositions, name); noop.setInput(input); noop.setParallelism(getParallelism()); noop.setCustomPartitioner(customPartitioner); return noop; } else if (pKeys instanceof Keys.SelectorFunctionKeys) { @SuppressWarnings("unchecked") Keys.SelectorFunctionKeys<T, ?> selectorKeys = (Keys.SelectorFunctionKeys<T, ?>) pKeys; MapOperatorBase<?, T, ?> po = translateSelectorFunctionPartitioner( selectorKeys, pMethod, getType(), name, input, getParallelism(), customPartitioner); return po; } else { throw new UnsupportedOperationException("Unrecognized key type."); } } else if (pMethod == PartitionMethod.RANGE) { throw new UnsupportedOperationException("Range partitioning not yet supported"); } else { throw new UnsupportedOperationException("Unsupported partitioning method: " + pMethod.name()); } }
/** Check if two sets of keys are compatible to each other (matching types, key counts) */ public boolean areCompatible(Keys<?> other) throws IncompatibleKeysException { TypeInformation<?>[] thisKeyFieldTypes = this.getKeyFieldTypes(); TypeInformation<?>[] otherKeyFieldTypes = other.getKeyFieldTypes(); if (thisKeyFieldTypes.length != otherKeyFieldTypes.length) { throw new IncompatibleKeysException(IncompatibleKeysException.SIZE_MISMATCH_MESSAGE); } else { for (int i = 0; i < thisKeyFieldTypes.length; i++) { if (!thisKeyFieldTypes[i].equals(otherKeyFieldTypes[i])) { throw new IncompatibleKeysException(thisKeyFieldTypes[i], otherKeyFieldTypes[i]); } } } return true; }
@Override public boolean areCompatible(Keys<?> other) throws IncompatibleKeysException { if (other instanceof ExpressionKeys) { ExpressionKeys<?> oKey = (ExpressionKeys<?>) other; if (oKey.getNumberOfKeyFields() != this.getNumberOfKeyFields()) { throw new IncompatibleKeysException(IncompatibleKeysException.SIZE_MISMATCH_MESSAGE); } for (int i = 0; i < this.keyFields.size(); i++) { if (!this.keyFields.get(i).getType().equals(oKey.keyFields.get(i).getType())) { throw new IncompatibleKeysException( this.keyFields.get(i).getType(), oKey.keyFields.get(i).getType()); } } return true; } else if (other instanceof SelectorFunctionKeys<?, ?>) { return other.areCompatible(this); } else { throw new IncompatibleKeysException("The key is not compatible with " + other); } }