Пример #1
0
  @Test
  public void testCrossLambda() {
    try {
      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

      DataSet<Tuple2<Integer, String>> left =
          env.fromElements(
              new Tuple2<Integer, String>(1, "hello"),
              new Tuple2<Integer, String>(2, "what's"),
              new Tuple2<Integer, String>(2, "up"));
      DataSet<Tuple2<Integer, String>> right =
          env.fromElements(
              new Tuple2<Integer, String>(1, "not"),
              new Tuple2<Integer, String>(1, "much"),
              new Tuple2<Integer, String>(2, "really"));
      DataSet<Tuple2<Integer, String>> joined =
          left.cross(right)
              .with((t, s) -> new Tuple2<Integer, String>(t.f0 + s.f0, t.f1 + " " + s.f1));

    } catch (UnsupportedLambdaExpressionException e) {
      // Success
      return;
    } catch (Exception e) {
      Assert.fail();
    }
  }
  @SuppressWarnings("serial")
  @Test
  public void testRun() throws Exception {
    int verticesCount = 5000;
    int edgesCount = verticesCount * 2;

    ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
    environment.getConfig().disableSysoutLogging();

    Graph<Long, Long, Long> graph =
        GraphGenerator.generateGraph(verticesCount, edgesCount, environment);

    PCConnectedComponents<Long, Long> algo = new PCConnectedComponents<>(verticesCount);

    List<Tuple2<Long, Long>> result =
        algo.run(graph)
            .map(
                new RichMapFunction<Vertex<Long, Long>, Tuple2<Long, Long>>() {
                  @Override
                  public Tuple2<Long, Long> map(Vertex<Long, Long> value) throws Exception {
                    return new Tuple2<>(value.getId(), value.getValue());
                  }
                })
            .collect();

    ConnectedComponentsData.checkOddEvenResult(result);
  }
  @Test
  public void testAggregationTypes() {
    try {
      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs =
          env.fromCollection(emptyTupleData, tupleTypeInfo);

      // should work: multiple aggregates
      tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4);

      // should work: nested aggregates
      tupleDs.aggregate(Aggregations.MIN, 2).aggregate(Aggregations.SUM, 1);

      // should not work: average on string
      try {
        tupleDs.aggregate(Aggregations.SUM, 2);
        Assert.fail();
      } catch (UnsupportedAggregationTypeException iae) {
        // we're good here
      }
    } catch (Exception e) {
      System.err.println(e.getMessage());
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
  @SuppressWarnings("serial")
  public static void main(String[] args) throws Exception {

    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    DataSet<Vertex<Long, Double>> pages = getPagesDataSet(env);

    DataSet<Edge<Long, Double>> links = getLinksDataSet(env);

    Graph<Long, Double, Double> network = new Graph<Long, Double, Double>(pages, links, env);

    DataSet<Tuple2<Long, Long>> vertexOutDegrees = network.outDegrees();

    // assign the transition probabilities as the edge weights
    Graph<Long, Double, Double> networkWithWeights =
        network.joinWithEdgesOnSource(
            vertexOutDegrees,
            new MapFunction<Tuple2<Double, Long>, Double>() {
              public Double map(Tuple2<Double, Long> value) {
                return value.f0 / value.f1;
              }
            });

    DataSet<Vertex<Long, Double>> pageRanks =
        networkWithWeights
            .run(new PageRank<Long>(numPages, DAMPENING_FACTOR, maxIterations))
            .getVertices();

    pageRanks.print();

    env.execute();
  }
  @Test
  public void testJoinWithTuples() {
    try {
      final Partitioner<Long> partitioner = new TestPartitionerLong();

      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

      DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
      DataSet<Tuple3<Long, Long, Long>> input2 =
          env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));

      input1
          .join(input2, JoinHint.REPARTITION_HASH_FIRST)
          .where(1)
          .equalTo(0)
          .withPartitioner(partitioner)
          .print();

      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);

      SinkPlanNode sink = op.getDataSinks().iterator().next();
      DualInputPlanNode join = (DualInputPlanNode) sink.getInput().getSource();

      assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput1().getShipStrategy());
      assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput2().getShipStrategy());
      assertEquals(partitioner, join.getInput1().getPartitioner());
      assertEquals(partitioner, join.getInput2().getPartitioner());
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
Пример #6
0
  @SuppressWarnings("serial")
  public static void main(String[] args) throws Exception {

    if (!parseParameters(args)) {
      return;
    }

    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    DataSet<Edge<Long, NullValue>> edges = getEdgesDataSet(env);

    Graph<Long, Long, NullValue> graph =
        Graph.fromDataSet(
            edges,
            new MapFunction<Long, Long>() {
              @Override
              public Long map(Long value) throws Exception {
                return value;
              }
            },
            env);

    DataSet<Vertex<Long, Long>> verticesWithMinIds =
        graph.run(new GSAConnectedComponents<Long, Long, NullValue>(maxIterations));

    // emit result
    if (fileOutput) {
      verticesWithMinIds.writeAsCsv(outputPath, "\n", ",");

      // since file sinks are lazy, we trigger the execution explicitly
      env.execute("Connected Components Example");
    } else {
      verticesWithMinIds.print();
    }
  }
  @Test
  public void testRejectWhenSolutionSetKeysDontMatchJoin() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Double, Long, String>> initialSolutionSet =
          env.fromElements(new Tuple3<Double, Long, String>(3.44, 5L, "abc"));

      @SuppressWarnings("unchecked")
      DataSet<Tuple2<Double, String>> initialWorkSet =
          env.fromElements(new Tuple2<Double, String>(1.23, "abc"));

      DeltaIteration<Tuple3<Double, Long, String>, Tuple2<Double, String>> iteration =
          initialSolutionSet.iterateDelta(initialWorkSet, 10, 1);

      try {
        iteration.getWorkset().join(iteration.getSolutionSet()).where(1).equalTo(2);
        fail("Accepted invalid program.");
      } catch (InvalidProgramException e) {
        // all good!
      }

      try {
        iteration.getSolutionSet().join(iteration.getWorkset()).where(2).equalTo(1);
        fail("Accepted invalid program.");
      } catch (InvalidProgramException e) {
        // all good!
      }
    } catch (Exception e) {
      System.err.println(e.getMessage());
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  @Test
  public void testJoinWithKeySelectorsWrongType() {
    try {
      final Partitioner<Long> partitioner = new TestPartitionerLong();

      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

      DataSet<Pojo2> input1 = env.fromElements(new Pojo2());
      DataSet<Pojo3> input2 = env.fromElements(new Pojo3());

      try {
        input1
            .join(input2, JoinHint.REPARTITION_HASH_FIRST)
            .where(new Pojo2KeySelector())
            .equalTo(new Pojo3KeySelector())
            .withPartitioner(partitioner);

        fail("should throw an exception");
      } catch (InvalidProgramException e) {
        // expected
      }
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  @Test
  public void testDistinctPreservesPartitioningOfDistinctFields() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setParallelism(4);

      @SuppressWarnings("unchecked")
      DataSet<Tuple2<Long, Long>> data =
          env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L))
              .map(new IdentityMapper<Tuple2<Long, Long>>())
              .setParallelism(4);

      data.distinct(0).groupBy(0).sum(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());

      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);

      SinkPlanNode sink = op.getDataSinks().iterator().next();
      SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
      SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource();

      assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());

      // reducer can be forward, reuses partitioning from distinct
      assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());

      // distinct reducer is partitioned
      assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  @Test
  public void testJoinWithTuplesWrongType() {
    try {
      final Partitioner<Integer> partitioner = new TestPartitionerInt();

      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

      DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
      DataSet<Tuple3<Long, Long, Long>> input2 =
          env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));

      try {
        input1
            .join(input2, JoinHint.REPARTITION_HASH_FIRST)
            .where(1)
            .equalTo(0)
            .withPartitioner(partitioner);

        fail("should throw an exception");
      } catch (InvalidProgramException e) {
        // expected
      }
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  @Test
  public void testFieldsAggregate() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs =
        env.fromCollection(emptyTupleData, tupleTypeInfo);

    // should work
    try {
      tupleDs.aggregate(Aggregations.SUM, 1);
    } catch (Exception e) {
      Assert.fail();
    }

    // should not work: index out of bounds
    try {
      tupleDs.aggregate(Aggregations.SUM, 10);
      Assert.fail();
    } catch (IllegalArgumentException iae) {
      // we're good here
    } catch (Exception e) {
      Assert.fail();
    }

    // should not work: not applied to tuple dataset
    DataSet<Long> longDs = env.fromCollection(emptyLongData, BasicTypeInfo.LONG_TYPE_INFO);
    try {
      longDs.aggregate(Aggregations.MIN, 1);
      Assert.fail();
    } catch (InvalidProgramException uoe) {
      // we're good here
    } catch (Exception e) {
      Assert.fail();
    }
  }
Пример #12
0
  @Test
  public void testJoinProjection6() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<CustomType> ds1 = env.fromCollection(customTypeData);
    DataSet<CustomType> ds2 = env.fromCollection(customTypeData);

    // should work
    try {
      ds1.join(ds2)
          .where(
              new KeySelector<CustomType, Long>() {

                @Override
                public Long getKey(CustomType value) {
                  return value.myLong;
                }
              })
          .equalTo(
              new KeySelector<CustomType, Long>() {

                @Override
                public Long getKey(CustomType value) {
                  return value.myLong;
                }
              })
          .projectFirst()
          .projectSecond()
          .types(CustomType.class, CustomType.class);
    } catch (Exception e) {
      System.out.println("FAILED: " + e);
      Assert.fail();
    }
  }
Пример #13
0
  @Test
  public void testJoinKeySelectors1() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<CustomType> ds1 = env.fromCollection(customTypeData);
    DataSet<CustomType> ds2 = env.fromCollection(customTypeData);

    // should work
    try {
      ds1.join(ds2)
          .where(
              new KeySelector<CustomType, Long>() {

                @Override
                public Long getKey(CustomType value) {
                  return value.myLong;
                }
              })
          .equalTo(
              new KeySelector<CustomType, Long>() {

                @Override
                public Long getKey(CustomType value) {
                  return value.myLong;
                }
              });
    } catch (Exception e) {
      Assert.fail();
    }
  }
Пример #14
0
  @Test(expected = IllegalArgumentException.class)
  public void testJoinKeyExpressions4() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<CustomType> ds1 = env.fromCollection(customTypeData);
    DataSet<CustomType> ds2 = env.fromCollection(customTypeData);

    // should not work, join key non-existent
    ds1.join(ds2).where("myNonExistent").equalTo("myInt");
  }
Пример #15
0
  @Test(expected = InvalidProgramException.class)
  public void testJoinKeyExpressions3() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<CustomType> ds1 = env.fromCollection(customTypeData);
    DataSet<CustomType> ds2 = env.fromCollection(customTypeData);

    // should not work, incompatible number of join keys
    ds1.join(ds2).where("myInt", "myString").equalTo("myString");
  }
  @Test
  public void testRightOuter4() {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);

    // should work
    ds1.rightOuterJoin(ds2).where(0).equalTo(new IntKeySelector()).with(new DummyJoin());
  }
Пример #17
0
  @Test(expected = IllegalArgumentException.class)
  public void testJoinKeyFields6() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<CustomType> ds2 = env.fromCollection(customTypeData);

    // should not work, join key fields on custom type
    ds1.join(ds2).where(5).equalTo(0);
  }
  @Test(expected = InvalidProgramException.class)
  public void testRightOuter9() {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);

    // key types do not match
    ds1.rightOuterJoin(ds2).where(0).equalTo(1).with(new DummyJoin());
  }
  @Test(expected = CompositeType.InvalidFieldReferenceException.class)
  public void testRightOuter8() {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);

    // invalid key reference
    ds1.rightOuterJoin(ds2).where(1).equalTo("f5").with(new DummyJoin());
  }
  @Test(expected = IllegalArgumentException.class)
  public void testRightOuter7() {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);

    // invalid key position
    ds1.rightOuterJoin(ds2).where(5).equalTo(0).with(new DummyJoin());
  }
  private void testRightOuterStrategies(JoinHint hint) {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);

    // should work
    ds1.rightOuterJoin(ds2, hint).where(0).equalTo(4).with(new DummyJoin());
  }
Пример #22
0
  @Test(expected = IndexOutOfBoundsException.class)
  public void testJoinProjection14() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);

    // should not work, index out of range
    ds1.join(ds2).where(0).equalTo(0).projectFirst(0).projectSecond(5).types(Integer.class);
  }
Пример #23
0
  @Test(expected = IllegalArgumentException.class)
  public void testJoinKeyFields5() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);

    // should not work, negative key field position
    ds1.join(ds2).where(-1).equalTo(-1);
  }
Пример #24
0
  @Test(expected = IllegalArgumentException.class)
  public void testJoinProjection12() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);

    // should not work, number of types and fields does not match
    ds1.join(ds2).where(0).equalTo(0).projectSecond(2).projectFirst(1).types(String.class);
  }
Пример #25
0
  @Test(expected = InvalidProgramException.class)
  public void testJoinKeyFields3() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 =
        env.fromCollection(emptyTupleData, tupleTypeInfo);

    // should not work, incompatible number of join keys
    ds1.join(ds2).where(0, 1).equalTo(2);
  }
Пример #26
0
  public static void main(String[] args) throws Exception {

    // Checking input parameters
    final ParameterTool params = ParameterTool.fromArgs(args);
    System.out.println(
        "Usage: KMeans --points <path> --centroids <path> --output <path> --iterations <n>");

    // set up execution environment
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig()
        .setGlobalJobParameters(params); // make parameters available in the web interface

    // get input data:
    // read the points and centroids from the provided paths or fall back to default data
    DataSet<Point> points = getPointDataSet(params, env);
    DataSet<Centroid> centroids = getCentroidDataSet(params, env);

    // set number of bulk iterations for KMeans algorithm
    IterativeDataSet<Centroid> loop = centroids.iterate(params.getInt("iterations", 10));

    DataSet<Centroid> newCentroids =
        points
            // compute closest centroid for each point
            .map(new SelectNearestCenter())
            .withBroadcastSet(loop, "centroids")
            // count and sum point coordinates for each centroid
            .map(new CountAppender())
            .groupBy(0)
            .reduce(new CentroidAccumulator())
            // compute new centroids from point counts and coordinate sums
            .map(new CentroidAverager());

    // feed new centroids back into next iteration
    DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);

    DataSet<Tuple2<Integer, Point>> clusteredPoints =
        points
            // assign points to final clusters
            .map(new SelectNearestCenter())
            .withBroadcastSet(finalCentroids, "centroids");

    // emit result
    if (params.has("output")) {
      clusteredPoints.writeAsCsv(params.get("output"), "\n", " ");

      // since file sinks are lazy, we trigger the execution explicitly
      env.execute("KMeans Example");
    } else {
      System.out.println("Printing result to stdout. Use --output to specify output path.");
      clusteredPoints.print();
    }
  }
  @Test
  public void testCustomPartitioningNotReused() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, Long, Long>> input =
          env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));

      input
          .partitionCustom(
              new Partitioner<Long>() {
                @Override
                public int partition(Long key, int numPartitions) {
                  return 0;
                }
              },
              0)
          .map(new IdentityMapper<Tuple3<Long, Long, Long>>())
          .withForwardedFields("0", "1", "2")
          .groupBy(0, 1)
          .reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long, Long, Long>>())
          .withForwardedFields("0", "1", "2")
          .groupBy(1)
          .reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long, Long, Long>>())
          .output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());

      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);

      SinkPlanNode sink = op.getDataSinks().iterator().next();
      SingleInputPlanNode reducer2 = (SingleInputPlanNode) sink.getInput().getSource();
      SingleInputPlanNode combiner = (SingleInputPlanNode) reducer2.getInput().getSource();
      SingleInputPlanNode reducer1 = (SingleInputPlanNode) combiner.getInput().getSource();

      assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());

      // should be locally forwarding, reusing sort and partitioning
      assertEquals(ShipStrategyType.PARTITION_HASH, reducer2.getInput().getShipStrategy());
      assertEquals(LocalStrategy.COMBININGSORT, reducer2.getInput().getLocalStrategy());

      assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
      assertEquals(LocalStrategy.NONE, combiner.getInput().getLocalStrategy());

      assertEquals(ShipStrategyType.FORWARD, reducer1.getInput().getShipStrategy());
      assertEquals(LocalStrategy.COMBININGSORT, reducer1.getInput().getLocalStrategy());
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  @Test
  public void testIncompatibleHashAndCustomPartitioning() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

      DataSet<Tuple3<Long, Long, Long>> input =
          env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));

      DataSet<Tuple3<Long, Long, Long>> partitioned =
          input
              .partitionCustom(
                  new Partitioner<Long>() {
                    @Override
                    public int partition(Long key, int numPartitions) {
                      return 0;
                    }
                  },
                  0)
              .map(new IdentityMapper<Tuple3<Long, Long, Long>>())
              .withForwardedFields("0", "1", "2");

      DataSet<Tuple3<Long, Long, Long>> grouped =
          partitioned
              .distinct(0, 1)
              .groupBy(1)
              .sortGroup(0, Order.ASCENDING)
              .reduceGroup(new IdentityGroupReducer<Tuple3<Long, Long, Long>>())
              .withForwardedFields("0", "1");

      grouped
          .join(partitioned, JoinHint.REPARTITION_HASH_FIRST)
          .where(0)
          .equalTo(0)
          .with(new DummyFlatJoinFunction<Tuple3<Long, Long, Long>>())
          .print();

      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);

      SinkPlanNode sink = op.getDataSinks().iterator().next();
      DualInputPlanNode coGroup = (DualInputPlanNode) sink.getInput().getSource();

      assertEquals(ShipStrategyType.PARTITION_HASH, coGroup.getInput1().getShipStrategy());
      assertTrue(
          coGroup.getInput2().getShipStrategy() == ShipStrategyType.PARTITION_HASH
              || coGroup.getInput2().getShipStrategy() == ShipStrategyType.FORWARD);
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
Пример #29
0
  @Test
  public void testJoinKeyExpressions1() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<CustomType> ds1 = env.fromCollection(customTypeData);
    DataSet<CustomType> ds2 = env.fromCollection(customTypeData);

    // should work
    try {
      ds1.join(ds2).where("myInt").equalTo("myInt");
    } catch (Exception e) {
      Assert.fail();
    }
  }
  @Override
  protected void testProgram() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    DataSet<Record> initialInput =
        env.readFile(new PointInFormat(), this.dataPath).setParallelism(1);

    IterativeDataSet<Record> iteration = initialInput.iterate(2);

    DataSet<Record> result = iteration.union(iteration).map(new IdentityMapper());

    iteration.closeWith(result).write(new PointOutFormat(), this.resultPath);

    env.execute();
  }