@SuppressWarnings("unchecked")
  public static Plan getPlan(
      int numSubTasks, String verticesInput, String edgeInput, String output, int maxIterations) {

    // data source for initial vertices
    FileDataSource initialVertices =
        new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");

    MapOperator verticesWithId =
        MapOperator.builder(DuplicateLongMap.class)
            .input(initialVertices)
            .name("Assign Vertex Ids")
            .build();

    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);

    // create DataSourceContract for the edges
    FileDataSource edges =
        new FileDataSource(
            new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // create CrossOperator for distance computation
    JoinOperator joinWithNeighbors =
        JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
            .input1(iteration.getWorkset())
            .input2(edges)
            .name("Join Candidate Id With Neighbor")
            .build();

    CoGroupOperator minAndUpdate =
        CoGroupOperator.builder(new MinIdAndUpdate(), LongValue.class, 0, 0)
            .input1(joinWithNeighbors)
            .input2(iteration.getSolutionSet())
            .name("Min Id and Update")
            .build();

    iteration.setNextWorkset(minAndUpdate);
    iteration.setSolutionSetDelta(minAndUpdate);

    // create DataSinkContract for writing the new cluster positions
    FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result");
    CsvOutputFormat.configureRecordFormat(result)
        .recordDelimiter('\n')
        .fieldDelimiter(' ')
        .field(LongValue.class, 0)
        .field(LongValue.class, 1);

    // return the PACT plan
    Plan plan = new Plan(result, "Workset Connected Components");
    plan.setDefaultParallelism(numSubTasks);
    return plan;
  }
コード例 #2
0
  /**
   * Statistics that push towards a repartition merge join. If the join blows the data volume up
   * significantly, re-exploiting the sorted order is cheaper.
   */
  @Test
  public void testQueryWithStatsForRepartitionMerge() {
    TPCHQuery3 query = new TPCHQuery3();
    Plan p = query.getPlan(DEFAULT_PARALLELISM_STRING, IN_FILE, IN_FILE, OUT_FILE);
    p.setExecutionConfig(defaultExecutionConfig);
    // set compiler hints
    OperatorResolver cr = getContractResolver(p);
    JoinOperator match = cr.getNode("JoinLiO");
    match.getCompilerHints().setFilterFactor(100f);

    testQueryGeneric(
        100l * 1024 * 1024 * 1024 * 1024,
        100l * 1024 * 1024 * 1024 * 1024,
        0.05f,
        100f,
        false,
        true,
        false,
        false,
        true);
  }
コード例 #3
0
  private void testQueryGeneric(
      Plan p,
      long orderSize,
      long lineitemSize,
      float orderSelectivity,
      float joinSelectivity,
      boolean broadcastOkay,
      boolean partitionedOkay,
      boolean hashJoinFirstOkay,
      boolean hashJoinSecondOkay,
      boolean mergeJoinOkay) {
    try {
      // set statistics
      OperatorResolver cr = getContractResolver(p);
      FileDataSource ordersSource = cr.getNode(ORDERS);
      FileDataSource lineItemSource = cr.getNode(LINEITEM);
      MapOperator mapper = cr.getNode(MAPPER_NAME);
      JoinOperator joiner = cr.getNode(JOIN_NAME);
      setSourceStatistics(ordersSource, orderSize, 100f);
      setSourceStatistics(lineItemSource, lineitemSize, 140f);
      mapper.getCompilerHints().setAvgOutputRecordSize(16f);
      mapper.getCompilerHints().setFilterFactor(orderSelectivity);
      joiner.getCompilerHints().setFilterFactor(joinSelectivity);

      // compile
      final OptimizedPlan plan = compileWithStats(p);
      final OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(plan);

      // get the nodes from the final plan
      final SinkPlanNode sink = or.getNode("Output");
      final SingleInputPlanNode reducer = or.getNode("AggLio");
      final SingleInputPlanNode combiner =
          reducer.getPredecessor() instanceof SingleInputPlanNode
              ? (SingleInputPlanNode) reducer.getPredecessor()
              : null;
      final DualInputPlanNode join = or.getNode("JoinLiO");
      final SingleInputPlanNode filteringMapper = or.getNode("FilterO");

      checkStandardStrategies(filteringMapper, join, combiner, reducer, sink);

      // check the possible variants and that the variant ia allowed in this specific setting
      if (checkBroadcastShipStrategies(join, reducer, combiner)) {
        Assert.assertTrue("Broadcast join incorrectly chosen.", broadcastOkay);

        if (checkHashJoinStrategies(join, reducer, true)) {
          Assert.assertTrue("Hash join (build orders) incorrectly chosen", hashJoinFirstOkay);
        } else if (checkHashJoinStrategies(join, reducer, false)) {
          Assert.assertTrue("Hash join (build lineitem) incorrectly chosen", hashJoinSecondOkay);
        } else if (checkBroadcastMergeJoin(join, reducer)) {
          Assert.assertTrue("Merge join incorrectly chosen", mergeJoinOkay);
        } else {
          Assert.fail("Plan has no correct hash join or merge join strategies.");
        }
      } else if (checkRepartitionShipStrategies(join, reducer, combiner)) {
        Assert.assertTrue("Partitioned join incorrectly chosen.", partitionedOkay);

        if (checkHashJoinStrategies(join, reducer, true)) {
          Assert.assertTrue("Hash join (build orders) incorrectly chosen", hashJoinFirstOkay);
        } else if (checkHashJoinStrategies(join, reducer, false)) {
          Assert.assertTrue("Hash join (build lineitem) incorrectly chosen", hashJoinSecondOkay);
        } else if (checkRepartitionMergeJoin(join, reducer)) {
          Assert.assertTrue("Merge join incorrectly chosen", mergeJoinOkay);
        } else {
          Assert.fail("Plan has no correct hash join or merge join strategies.");
        }
      } else {
        Assert.fail("Plan has neither correct BC join or partitioned join configuration.");
      }
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }