/** * Creates a new node with a single input for the optimizer plan. * * @param iteration The iteration operator that the node represents. */ public WorksetIterationNode(DeltaIteration iteration) { super(iteration); final int[] ssKeys = iteration.getSolutionSetKeyFields(); if (ssKeys == null || ssKeys.length == 0) { throw new CompilerException( "Invalid WorksetIteration: No key fields defined for the solution set."); } this.solutionSetKeyFields = new FieldList(ssKeys); this.partitionedProperties = new GlobalProperties(); this.partitionedProperties.setHashPartitioned(this.solutionSetKeyFields); int weight = iteration.getMaximumNumberOfIterations() > 0 ? iteration.getMaximumNumberOfIterations() : DEFAULT_COST_WEIGHT; if (weight > OptimizerNode.MAX_DYNAMIC_PATH_COST_WEIGHT) { weight = OptimizerNode.MAX_DYNAMIC_PATH_COST_WEIGHT; } this.costWeight = weight; this.possibleProperties.add(new WorksetOpDescriptor(this.solutionSetKeyFields)); }
@SuppressWarnings("unchecked") @Override public Plan getPlan(String... args) { // parse job parameters final int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); final String verticesInput = (args.length > 1 ? args[1] : ""); final String edgeInput = (args.length > 2 ? args[2] : ""); final String output = (args.length > 3 ? args[3] : ""); final int maxIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 1); // data source for initial vertices FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices"); MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class) .input(initialVertices) .name("Assign Vertex Ids") .build(); DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration"); iteration.setInitialSolutionSet(verticesWithId); iteration.setInitialWorkset(verticesWithId); iteration.setMaximumNumberOfIterations(maxIterations); // create DataSourceContract for the edges FileDataSource edges = new FileDataSource( new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges"); // create CrossOperator for distance computation JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0) .input1(iteration.getWorkset()) .input2(edges) .name("Join Candidate Id With Neighbor") .build(); CoGroupOperator minAndUpdate = CoGroupOperator.builder(new MinIdAndUpdate(), LongValue.class, 0, 0) .input1(joinWithNeighbors) .input2(iteration.getSolutionSet()) .name("Min Id and Update") .build(); iteration.setNextWorkset(minAndUpdate); iteration.setSolutionSetDelta(minAndUpdate); // create DataSinkContract for writing the new cluster positions FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result"); CsvOutputFormat.configureRecordFormat(result) .recordDelimiter('\n') .fieldDelimiter(' ') .field(LongValue.class, 0) .field(LongValue.class, 1); // return the PACT plan Plan plan = new Plan(result, "Workset Connected Components"); plan.setDefaultParallelism(numSubTasks); return plan; }