コード例 #1
0
  /**
   * Creates a new node with a single input for the optimizer plan.
   *
   * @param iteration The iteration operator that the node represents.
   */
  public WorksetIterationNode(DeltaIteration iteration) {
    super(iteration);

    final int[] ssKeys = iteration.getSolutionSetKeyFields();
    if (ssKeys == null || ssKeys.length == 0) {
      throw new CompilerException(
          "Invalid WorksetIteration: No key fields defined for the solution set.");
    }
    this.solutionSetKeyFields = new FieldList(ssKeys);
    this.partitionedProperties = new GlobalProperties();
    this.partitionedProperties.setHashPartitioned(this.solutionSetKeyFields);

    int weight =
        iteration.getMaximumNumberOfIterations() > 0
            ? iteration.getMaximumNumberOfIterations()
            : DEFAULT_COST_WEIGHT;

    if (weight > OptimizerNode.MAX_DYNAMIC_PATH_COST_WEIGHT) {
      weight = OptimizerNode.MAX_DYNAMIC_PATH_COST_WEIGHT;
    }
    this.costWeight = weight;

    this.possibleProperties.add(new WorksetOpDescriptor(this.solutionSetKeyFields));
  }
コード例 #2
0
  @SuppressWarnings("unchecked")
  @Override
  public Plan getPlan(String... args) {
    // parse job parameters
    final int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    final String verticesInput = (args.length > 1 ? args[1] : "");
    final String edgeInput = (args.length > 2 ? args[2] : "");
    final String output = (args.length > 3 ? args[3] : "");
    final int maxIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 1);

    // data source for initial vertices
    FileDataSource initialVertices =
        new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");

    MapOperator verticesWithId =
        MapOperator.builder(DuplicateLongMap.class)
            .input(initialVertices)
            .name("Assign Vertex Ids")
            .build();

    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);

    // create DataSourceContract for the edges
    FileDataSource edges =
        new FileDataSource(
            new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // create CrossOperator for distance computation
    JoinOperator joinWithNeighbors =
        JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
            .input1(iteration.getWorkset())
            .input2(edges)
            .name("Join Candidate Id With Neighbor")
            .build();

    CoGroupOperator minAndUpdate =
        CoGroupOperator.builder(new MinIdAndUpdate(), LongValue.class, 0, 0)
            .input1(joinWithNeighbors)
            .input2(iteration.getSolutionSet())
            .name("Min Id and Update")
            .build();

    iteration.setNextWorkset(minAndUpdate);
    iteration.setSolutionSetDelta(minAndUpdate);

    // create DataSinkContract for writing the new cluster positions
    FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result");
    CsvOutputFormat.configureRecordFormat(result)
        .recordDelimiter('\n')
        .fieldDelimiter(' ')
        .field(LongValue.class, 0)
        .field(LongValue.class, 1);

    // return the PACT plan
    Plan plan = new Plan(result, "Workset Connected Components");
    plan.setDefaultParallelism(numSubTasks);
    return plan;
  }