Ejemplo n.º 1
0
  /**
   * Assembles the Plan of the Pairwise Shortest Paths example Pact program. The program computes
   * one iteration of the Pairwise Shortest Paths algorithm.
   *
   * <p>For the first iteration, two input formats can be chosen: 1) RDF triples with foaf:knows
   * predicates 2) Text-serialized paths (see PathInFormat and PathOutFormat)
   *
   * <p>To choose 1) set the forth parameter to "true". If set to "false" 2) will be used.
   */
  @Override
  public Plan getPlan(String... args) {

    // parse job parameters
    int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    String paths = (args.length > 1 ? args[1] : "");
    String output = (args.length > 2 ? args[2] : "");
    boolean rdfInput = (args.length > 3 ? Boolean.parseBoolean(args[3]) : false);

    FileDataSource pathsInput;

    if (rdfInput) {
      pathsInput = new FileDataSource(new RDFTripleInFormat(), paths, "RDF Triples");
    } else {
      pathsInput = new FileDataSource(new PathInFormat(), paths, "Paths");
    }
    pathsInput.setDegreeOfParallelism(numSubTasks);

    JoinOperator concatPaths =
        JoinOperator.builder(new ConcatPaths(), StringValue.class, 0, 1)
            .name("Concat Paths")
            .build();

    concatPaths.setDegreeOfParallelism(numSubTasks);

    CoGroupOperator findShortestPaths =
        CoGroupOperator.builder(new FindShortestPath(), StringValue.class, 0, 0)
            .keyField(StringValue.class, 1, 1)
            .name("Find Shortest Paths")
            .build();
    findShortestPaths.setDegreeOfParallelism(numSubTasks);

    FileDataSink result = new FileDataSink(new PathOutFormat(), output, "New Paths");
    result.setDegreeOfParallelism(numSubTasks);

    result.setInput(findShortestPaths);
    findShortestPaths.setFirstInput(pathsInput);
    findShortestPaths.setSecondInput(concatPaths);
    concatPaths.setFirstInput(pathsInput);
    concatPaths.setSecondInput(pathsInput);

    return new Plan(result, "Pairwise Shortest Paths");
  }
  @SuppressWarnings("unchecked")
  @Override
  public Plan getPlan(String... args) {
    // parse job parameters
    final int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    final String verticesInput = (args.length > 1 ? args[1] : "");
    final String edgeInput = (args.length > 2 ? args[2] : "");
    final String output = (args.length > 3 ? args[3] : "");
    final int maxIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 1);

    // data source for initial vertices
    FileDataSource initialVertices =
        new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");

    MapOperator verticesWithId =
        MapOperator.builder(DuplicateLongMap.class)
            .input(initialVertices)
            .name("Assign Vertex Ids")
            .build();

    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);

    // create DataSourceContract for the edges
    FileDataSource edges =
        new FileDataSource(
            new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // create CrossOperator for distance computation
    JoinOperator joinWithNeighbors =
        JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
            .input1(iteration.getWorkset())
            .input2(edges)
            .name("Join Candidate Id With Neighbor")
            .build();

    CoGroupOperator minAndUpdate =
        CoGroupOperator.builder(new MinIdAndUpdate(), LongValue.class, 0, 0)
            .input1(joinWithNeighbors)
            .input2(iteration.getSolutionSet())
            .name("Min Id and Update")
            .build();

    iteration.setNextWorkset(minAndUpdate);
    iteration.setSolutionSetDelta(minAndUpdate);

    // create DataSinkContract for writing the new cluster positions
    FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result");
    CsvOutputFormat.configureRecordFormat(result)
        .recordDelimiter('\n')
        .fieldDelimiter(' ')
        .field(LongValue.class, 0)
        .field(LongValue.class, 1);

    // return the PACT plan
    Plan plan = new Plan(result, "Workset Connected Components");
    plan.setDefaultParallelism(numSubTasks);
    return plan;
  }