@Test public void testDistinctPreservesPartitioningOfDistinctFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)) .map(new IdentityMapper<Tuple2<Long, Long>>()) .setParallelism(4); data.distinct(0).groupBy(0).sum(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // reducer can be forward, reuses partitioning from distinct assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy()); // distinct reducer is partitioned assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testCustomPartitioningNotReused() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L)); input .partitionCustom( new Partitioner<Long>() { @Override public int partition(Long key, int numPartitions) { return 0; } }, 0) .map(new IdentityMapper<Tuple3<Long, Long, Long>>()) .withForwardedFields("0", "1", "2") .groupBy(0, 1) .reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long, Long, Long>>()) .withForwardedFields("0", "1", "2") .groupBy(1) .reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long, Long, Long>>()) .output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer2 = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer2.getInput().getSource(); SingleInputPlanNode reducer1 = (SingleInputPlanNode) combiner.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // should be locally forwarding, reusing sort and partitioning assertEquals(ShipStrategyType.PARTITION_HASH, reducer2.getInput().getShipStrategy()); assertEquals(LocalStrategy.COMBININGSORT, reducer2.getInput().getLocalStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); assertEquals(LocalStrategy.NONE, combiner.getInput().getLocalStrategy()); assertEquals(ShipStrategyType.FORWARD, reducer1.getInput().getShipStrategy()); assertEquals(LocalStrategy.COMBININGSORT, reducer1.getInput().getLocalStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
private void checkStandardStrategies( SingleInputPlanNode map, DualInputPlanNode join, SingleInputPlanNode combiner, SingleInputPlanNode reducer, SinkPlanNode sink) { // check ship strategies that are always fix Assert.assertEquals(ShipStrategyType.FORWARD, map.getInput().getShipStrategy()); Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // check the driver strategies that are always fix Assert.assertEquals(DriverStrategy.COLLECTOR_MAP, map.getDriverStrategy()); Assert.assertEquals(DriverStrategy.SORTED_GROUP_REDUCE, reducer.getDriverStrategy()); Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy()); if (combiner != null) { Assert.assertEquals(DriverStrategy.SORTED_GROUP_COMBINE, combiner.getDriverStrategy()); Assert.assertEquals(LocalStrategy.NONE, combiner.getInput().getLocalStrategy()); } }