/** * Sets a custom partitioner for the CoGroup operation. The partitioner will be called on the join * keys to determine the partition a key should be assigned to. The partitioner is evaluated on * both inputs in the same way. * * <p>NOTE: A custom partitioner can only be used with single-field CoGroup keys, not with * composite CoGroup keys. * * @param partitioner The custom partitioner to be used. * @return This CoGroup operator, to allow for function chaining. */ public CoGroupOperator<I1, I2, OUT> withPartitioner(Partitioner<?> partitioner) { if (partitioner != null) { keys1.validateCustomPartitioner(partitioner, null); keys2.validateCustomPartitioner(partitioner, null); } this.customPartitioner = getInput1().clean(partitioner); return this; }
public CoGroupOperator( DataSet<I1> input1, DataSet<I2> input2, Keys<I1> keys1, Keys<I2> keys2, CoGroupFunction<I1, I2, OUT> function, TypeInformation<OUT> returnType, List<Pair<Integer, Order>> groupSortKeyOrderFirst, List<Pair<Integer, Order>> groupSortKeyOrderSecond, Partitioner<?> customPartitioner, String defaultName) { super(input1, input2, returnType); this.function = function; this.customPartitioner = customPartitioner; this.defaultName = defaultName; this.groupSortKeyOrderFirst = groupSortKeyOrderFirst == null ? Collections.<Pair<Integer, Order>>emptyList() : groupSortKeyOrderFirst; this.groupSortKeyOrderSecond = groupSortKeyOrderSecond == null ? Collections.<Pair<Integer, Order>>emptyList() : groupSortKeyOrderSecond; if (keys1 == null || keys2 == null) { throw new NullPointerException(); } // sanity check solution set key mismatches if (input1 instanceof SolutionSetPlaceHolder) { if (keys1 instanceof ExpressionKeys) { int[] positions = keys1.computeLogicalKeyPositions(); ((SolutionSetPlaceHolder<?>) input1).checkJoinKeyFields(positions); } else { throw new InvalidProgramException( "Currently, the solution set may only be CoGrouped with using tuple field positions."); } } if (input2 instanceof SolutionSetPlaceHolder) { if (keys2 instanceof ExpressionKeys) { int[] positions = keys2.computeLogicalKeyPositions(); ((SolutionSetPlaceHolder<?>) input2).checkJoinKeyFields(positions); } else { throw new InvalidProgramException( "Currently, the solution set may only be CoGrouped with using tuple field positions."); } } this.keys1 = keys1; this.keys2 = keys2; UdfOperatorUtils.analyzeDualInputUdf( this, CoGroupFunction.class, defaultName, function, keys1, keys2); }
/** * Sets a custom partitioner for the CoGroup operation. The partitioner will be called on * the join keys to determine the partition a key should be assigned to. The partitioner is * evaluated on both inputs in the same way. * * <p>NOTE: A custom partitioner can only be used with single-field CoGroup keys, not with * composite CoGroup keys. * * @param partitioner The custom partitioner to be used. * @return This CoGroup operator, to allow for function chaining. */ public CoGroupOperatorWithoutFunction withPartitioner(Partitioner<?> partitioner) { if (partitioner != null) { keys1.validateCustomPartitioner(partitioner, null); keys2.validateCustomPartitioner(partitioner, null); } this.customPartitioner = input1.clean(partitioner); return this; }
/** * Intermediate step of a CoGroup transformation. <br> * To continue the CoGroup transformation, provide a {@link * org.apache.flink.api.common.functions.RichCoGroupFunction} by calling {@link * org.apache.flink.api.java.operators.CoGroupOperator.CoGroupOperatorSets.CoGroupOperatorSetsPredicate.CoGroupOperatorWithoutFunction#with(org.apache.flink.api.common.functions.CoGroupFunction)}. */ private CoGroupOperatorWithoutFunction createCoGroupOperator(Keys<I2> keys2) { if (keys2 == null) { throw new NullPointerException(); } if (keys2.isEmpty()) { throw new InvalidProgramException("The co-group keys must not be empty."); } try { keys1.areCompatible(keys2); } catch (IncompatibleKeysException ike) { throw new InvalidProgramException( "The pair of co-group keys are not compatible with each other.", ike); } return new CoGroupOperatorWithoutFunction(keys2); }
private CoGroupOperatorSetsPredicate(Keys<I1> keys1) { if (keys1 == null) { throw new NullPointerException(); } if (keys1.isEmpty()) { throw new InvalidProgramException("The co-group keys must not be empty."); } this.keys1 = keys1; }
private CoGroupOperatorWithoutFunction(Keys<I2> keys2) { if (keys2 == null) { throw new NullPointerException(); } if (keys2.isEmpty()) { throw new InvalidProgramException("The co-group keys must not be empty."); } this.keys2 = keys2; this.groupSortKeyOrderFirst = new ArrayList<>(); this.groupSortKeyOrderSecond = new ArrayList<>(); }
@Override protected org.apache.flink.api.common.operators.base.CoGroupOperatorBase<?, ?, OUT, ?> translateToDataFlow(Operator<I1> input1, Operator<I2> input2) { String name = getName() != null ? getName() : "CoGroup at " + defaultName; try { keys1.areCompatible(keys2); } catch (IncompatibleKeysException e) { throw new InvalidProgramException("The types of the key fields do not match.", e); } final org.apache.flink.api.common.operators.base.CoGroupOperatorBase<?, ?, OUT, ?> po; if (keys1 instanceof SelectorFunctionKeys && keys2 instanceof SelectorFunctionKeys) { @SuppressWarnings("unchecked") SelectorFunctionKeys<I1, ?> selectorKeys1 = (SelectorFunctionKeys<I1, ?>) keys1; @SuppressWarnings("unchecked") SelectorFunctionKeys<I2, ?> selectorKeys2 = (SelectorFunctionKeys<I2, ?>) keys2; po = translateSelectorFunctionCoGroup( selectorKeys1, selectorKeys2, function, getResultType(), name, input1, input2); po.setParallelism(getParallelism()); po.setCustomPartitioner(customPartitioner); } else if (keys2 instanceof SelectorFunctionKeys) { int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions(); @SuppressWarnings("unchecked") SelectorFunctionKeys<I2, ?> selectorKeys2 = (SelectorFunctionKeys<I2, ?>) keys2; po = translateSelectorFunctionCoGroupRight( logicalKeyPositions1, selectorKeys2, function, getInput1Type(), getResultType(), name, input1, input2); po.setParallelism(getParallelism()); po.setCustomPartitioner(customPartitioner); } else if (keys1 instanceof SelectorFunctionKeys) { @SuppressWarnings("unchecked") SelectorFunctionKeys<I1, ?> selectorKeys1 = (SelectorFunctionKeys<I1, ?>) keys1; int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions(); po = translateSelectorFunctionCoGroupLeft( selectorKeys1, logicalKeyPositions2, function, getInput2Type(), getResultType(), name, input1, input2); } else if (keys1 instanceof Keys.ExpressionKeys && keys2 instanceof Keys.ExpressionKeys) { try { keys1.areCompatible(keys2); } catch (IncompatibleKeysException e) { throw new InvalidProgramException("The types of the key fields do not match.", e); } int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions(); int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions(); CoGroupOperatorBase<I1, I2, OUT, CoGroupFunction<I1, I2, OUT>> op = new CoGroupOperatorBase<>( function, new BinaryOperatorInformation<>(getInput1Type(), getInput2Type(), getResultType()), logicalKeyPositions1, logicalKeyPositions2, name); op.setFirstInput(input1); op.setSecondInput(input2); po = op; } else { throw new UnsupportedOperationException("Unrecognized or incompatible key types."); } // configure shared characteristics po.setParallelism(getParallelism()); po.setCustomPartitioner(customPartitioner); if (groupSortKeyOrderFirst.size() > 0) { Ordering o = new Ordering(); for (Pair<Integer, Order> entry : groupSortKeyOrderFirst) { o.appendOrdering(entry.getLeft(), null, entry.getRight()); } po.setGroupOrderForInputOne(o); } if (groupSortKeyOrderSecond.size() > 0) { Ordering o = new Ordering(); for (Pair<Integer, Order> entry : groupSortKeyOrderSecond) { o.appendOrdering(entry.getLeft(), null, entry.getRight()); } po.setGroupOrderForInputTwo(o); } return po; }