/** * Assembles the Plan of the Pairwise Shortest Paths example Pact program. The program computes * one iteration of the Pairwise Shortest Paths algorithm. * * <p>For the first iteration, two input formats can be chosen: 1) RDF triples with foaf:knows * predicates 2) Text-serialized paths (see PathInFormat and PathOutFormat) * * <p>To choose 1) set the forth parameter to "true". If set to "false" 2) will be used. */ @Override public Plan getPlan(String... args) { // parse job parameters int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); String paths = (args.length > 1 ? args[1] : ""); String output = (args.length > 2 ? args[2] : ""); boolean rdfInput = (args.length > 3 ? Boolean.parseBoolean(args[3]) : false); FileDataSource pathsInput; if (rdfInput) { pathsInput = new FileDataSource(new RDFTripleInFormat(), paths, "RDF Triples"); } else { pathsInput = new FileDataSource(new PathInFormat(), paths, "Paths"); } pathsInput.setDegreeOfParallelism(numSubTasks); JoinOperator concatPaths = JoinOperator.builder(new ConcatPaths(), StringValue.class, 0, 1) .name("Concat Paths") .build(); concatPaths.setDegreeOfParallelism(numSubTasks); CoGroupOperator findShortestPaths = CoGroupOperator.builder(new FindShortestPath(), StringValue.class, 0, 0) .keyField(StringValue.class, 1, 1) .name("Find Shortest Paths") .build(); findShortestPaths.setDegreeOfParallelism(numSubTasks); FileDataSink result = new FileDataSink(new PathOutFormat(), output, "New Paths"); result.setDegreeOfParallelism(numSubTasks); result.setInput(findShortestPaths); findShortestPaths.setFirstInput(pathsInput); findShortestPaths.setSecondInput(concatPaths); concatPaths.setFirstInput(pathsInput); concatPaths.setSecondInput(pathsInput); return new Plan(result, "Pairwise Shortest Paths"); }
@SuppressWarnings("unchecked") @Override public Plan getPlan(String... args) { // parse job parameters final int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); final String verticesInput = (args.length > 1 ? args[1] : ""); final String edgeInput = (args.length > 2 ? args[2] : ""); final String output = (args.length > 3 ? args[3] : ""); final int maxIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 1); // data source for initial vertices FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices"); MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class) .input(initialVertices) .name("Assign Vertex Ids") .build(); DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration"); iteration.setInitialSolutionSet(verticesWithId); iteration.setInitialWorkset(verticesWithId); iteration.setMaximumNumberOfIterations(maxIterations); // create DataSourceContract for the edges FileDataSource edges = new FileDataSource( new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges"); // create CrossOperator for distance computation JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0) .input1(iteration.getWorkset()) .input2(edges) .name("Join Candidate Id With Neighbor") .build(); CoGroupOperator minAndUpdate = CoGroupOperator.builder(new MinIdAndUpdate(), LongValue.class, 0, 0) .input1(joinWithNeighbors) .input2(iteration.getSolutionSet()) .name("Min Id and Update") .build(); iteration.setNextWorkset(minAndUpdate); iteration.setSolutionSetDelta(minAndUpdate); // create DataSinkContract for writing the new cluster positions FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result"); CsvOutputFormat.configureRecordFormat(result) .recordDelimiter('\n') .fieldDelimiter(' ') .field(LongValue.class, 0) .field(LongValue.class, 1); // return the PACT plan Plan plan = new Plan(result, "Workset Connected Components"); plan.setDefaultParallelism(numSubTasks); return plan; }