@Test public void testDistinctPreservesPartitioningOfDistinctFields() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)) .map(new IdentityMapper<Tuple2<Long, Long>>()) .setParallelism(4); data.distinct(0).groupBy(0).sum(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // reducer can be forward, reuses partitioning from distinct assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy()); // distinct reducer is partitioned assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Override public ExecutionEnvironment createExecutionEnvironment() { if (isDetached && lastEnvCreated != null) { throw new InvalidProgramException("Multiple enviornments cannot be created in detached mode"); } lastEnvCreated = isDetached ? new DetachedEnvironment( client, jarFilesToAttach, classpathsToAttach, userCodeClassLoader, savepointPath) : new ContextEnvironment( client, jarFilesToAttach, classpathsToAttach, userCodeClassLoader, savepointPath); if (defaultParallelism > 0) { lastEnvCreated.setParallelism(defaultParallelism); } return lastEnvCreated; }
private Plan getTestPlanRightStatic(String strategy) { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Tuple3<Long, Long, Long>> bigInput = env.readCsvFile("file://bigFile").types(Long.class, Long.class, Long.class).name("bigFile"); DataSet<Tuple3<Long, Long, Long>> smallInput = env.readCsvFile("file://smallFile") .types(Long.class, Long.class, Long.class) .name("smallFile"); IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10); Configuration joinStrategy = new Configuration(); joinStrategy.setString( Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH); if (!strategy.equals("")) { joinStrategy.setString(Optimizer.HINT_LOCAL_STRATEGY, strategy); } DataSet<Tuple3<Long, Long, Long>> inner = iteration .join(smallInput) .where(0) .equalTo(0) .with(new DummyJoiner()) .name("DummyJoiner") .withParameters(joinStrategy); DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner); output.output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); return env.createProgramPlan(); }
private Plan getTestPlanLeftStatic(String strategy) { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> bigInput = env.fromElements( new Tuple3<Long, Long, Long>(1L, 2L, 3L), new Tuple3<Long, Long, Long>(1L, 2L, 3L), new Tuple3<Long, Long, Long>(1L, 2L, 3L)) .name("Big"); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> smallInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Small"); IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10); Configuration joinStrategy = new Configuration(); joinStrategy.setString(Optimizer.HINT_LOCAL_STRATEGY, strategy); DataSet<Tuple3<Long, Long, Long>> inner = smallInput .join(iteration) .where(0) .equalTo(0) .with(new DummyJoiner()) .name("DummyJoiner") .withParameters(joinStrategy); DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner); output.output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); return env.createProgramPlan(); }
private Plan getJavaTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> solutionSetInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Solution Set"); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> worksetInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Workset"); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> invariantInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Invariant Input"); DeltaIteration<Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>> iter = solutionSetInput.iterateDelta(worksetInput, 100, 1, 2); DataSet<Tuple3<Long, Long, Long>> joinedWithSolutionSet = iter.getWorkset() .join(invariantInput) .where(1, 2) .equalTo(1, 2) .with( new RichJoinFunction< Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>>() { public Tuple3<Long, Long, Long> join( Tuple3<Long, Long, Long> first, Tuple3<Long, Long, Long> second) { return first; } }) .name(JOIN_WITH_INVARIANT_NAME) .join(iter.getSolutionSet()) .where(1, 0) .equalTo(1, 2) .with( new RichJoinFunction< Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>>() { public Tuple3<Long, Long, Long> join( Tuple3<Long, Long, Long> first, Tuple3<Long, Long, Long> second) { return second; } }) .name(JOIN_WITH_SOLUTION_SET) .withForwardedFieldsSecond( joinPreservesSolutionSet ? new String[] {"0->0", "1->1", "2->2"} : null); DataSet<Tuple3<Long, Long, Long>> nextWorkset = joinedWithSolutionSet .groupBy(1, 2) .reduceGroup( new RichGroupReduceFunction<Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>>() { public void reduce( Iterable<Tuple3<Long, Long, Long>> values, Collector<Tuple3<Long, Long, Long>> out) {} }) .name(NEXT_WORKSET_REDUCER_NAME) .withForwardedFields("1->1", "2->2", "0->0"); DataSet<Tuple3<Long, Long, Long>> nextSolutionSet = mapBeforeSolutionDelta ? joinedWithSolutionSet .map( new RichMapFunction<Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>>() { public Tuple3<Long, Long, Long> map(Tuple3<Long, Long, Long> value) { return value; } }) .name(SOLUTION_DELTA_MAPPER_NAME) .withForwardedFields("0->0", "1->1", "2->2") : joinedWithSolutionSet; iter.closeWith(nextSolutionSet, nextWorkset) .output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); return env.createProgramPlan(); }
@Test public void testRejectPlanIfSolutionSetKeysAndJoinKeysDontMatch() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> solutionSetInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Solution Set"); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> worksetInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Workset"); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> invariantInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Invariant Input"); DeltaIteration<Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>> iter = solutionSetInput.iterateDelta(worksetInput, 100, 1, 2); DataSet<Tuple3<Long, Long, Long>> result = iter.getWorkset() .join(invariantInput) .where(1, 2) .equalTo(1, 2) .with( new JoinFunction< Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>>() { public Tuple3<Long, Long, Long> join( Tuple3<Long, Long, Long> first, Tuple3<Long, Long, Long> second) { return first; } }); try { result .join(iter.getSolutionSet()) .where(1, 0) .equalTo(0, 2) .with( new JoinFunction< Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>>() { public Tuple3<Long, Long, Long> join( Tuple3<Long, Long, Long> first, Tuple3<Long, Long, Long> second) { return second; } }); fail("The join should be rejected with key type mismatches."); } catch (InvalidProgramException e) { // expected! } } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test errored: " + e.getMessage()); } }