public static void run( SIRStream str, JInterfaceDeclaration[] interfaces, SIRInterfaceTable[] interfaceTables, SIRStructure[] structs, SIRHelper[] helpers, SIRGlobal global) { System.out.println("Entry to SMP Backend..."); checkArguments(); setScheduler(); if (KjcOptions.smp > 16) { setupLargeConfig(); } // create cores in desired amount and order int[] cores = new int[KjcOptions.smp]; for (int x = 0; x < KjcOptions.smp; x++) cores[x] = coreOrder[x]; chip = new SMPMachine(cores); // create a new structs.h file for typedefs etc. structs_h = new Structs_h(structs); // The usual optimizations and transformation to slice graph CommonPasses commonPasses = new CommonPasses(); // perform standard optimizations, use the number of cores the user wants to target commonPasses.run(str, interfaces, interfaceTables, structs, helpers, global, chip.size()); // perform some standard cleanup on the slice graph. commonPasses.simplifySlices(); // dump slice graph to dot file commonPasses.getSlicer().dumpGraph("traces.dot", null); // partition the slice graph based on the scheduling policy SpaceTimeScheduleAndSlicer graphSchedule = new SpaceTimeScheduleAndSlicer(commonPasses.getSlicer()); scheduler.setGraphSchedule(graphSchedule); scheduler.run(chip.size()); FilterInfo.reset(); // generate schedules for initialization, primepump and steady-state scheduleSlices(graphSchedule); // generate layout for filters scheduler.runLayout(); // dump final slice graph to dot file graphSchedule.getSlicer().dumpGraph("after_slice_partition.dot", scheduler); graphSchedule.getSlicer().dumpGraph("slice_graph.dot", scheduler, false); // if load balancing, find candidiate fission groups to load balance if (KjcOptions.loadbalance) { LoadBalancer.findCandidates(); LoadBalancer.instrumentMainMethods(); } // create all buffers and set the rotation lengths RotatingBuffer.createBuffers(graphSchedule); // now convert to Kopi code plus communication commands backEndBits = new SMPBackEndFactory(chip, scheduler); backEndBits.getBackEndMain().run(graphSchedule, backEndBits); // generate code for file writer CoreCodeStore.generatePrintOutputCode(); if (KjcOptions.numbers > 0) chip.getNthComputeNode(0).getComputeCode().generateNumbersCode(); // emit c code for all cores EmitSMPCode.doit(backEndBits); // dump structs.h file structs_h.writeToFile(); // display final assignment of filters to cores System.out.println("Final filter assignments:"); System.out.println("========================================"); for (int x = 0; x < KjcOptions.smp; x++) { Core core = chip.getNthComputeNode(x); Set<FilterSliceNode> filters = core.getComputeCode().getFilters(); long totalWork = 0; System.out.println("Core " + core.getCoreID() + ": "); for (FilterSliceNode filter : filters) { long work = SliceWorkEstimate.getWork(filter.getParent()); System.out.format("%16d | " + filter + "\n", work); totalWork += work; } System.out.format("%16d | Total\n", totalWork); } // calculate computation to communication ratio if (KjcOptions.sharedbufs) { LinkedList<Slice> slices = DataFlowOrder.getTraversal(graphSchedule.getSlicer().getTopSlices()); HashSet<Slice> compProcessed = new HashSet<Slice>(); HashSet<Slice> commProcessed = new HashSet<Slice>(); long comp = 0; long comm = 0; for (Slice slice : slices) { if (compProcessed.contains(slice)) continue; comp += SliceWorkEstimate.getWork(slice); compProcessed.add(slice); } /* for(Slice slice : slices) { if(commProcessed.contains(slice)) continue; FilterInfo info = FilterInfo.getFilterInfo(slice.getFirstFilter()); int totalItemsReceived = info.totalItemsReceived(SchedulingPhase.STEADY); if(totalItemsReceived == 0) continue; InputSliceNode input = slice.getHead(); Set<InterSliceEdge> sources = input.getSourceSet(SchedulingPhase.STEADY); int numInputRots = totalItemsReceived / input.totalWeights(SchedulingPhase.STEADY); if(!FissionGroupStore.isFizzed(slice)) { for(InterSliceEdge source : sources) { Slice srcSlice = source.getSrc().getParent(); // if(srcSlice.getFirstFilter().isFileInput()) // continue; if(FissionGroupStore.isFizzed(srcSlice)) { // Filter is not fizzed, source is fizzed // Filter must receive (N-1)/N of inputs from different cores comm += numInputRots * input.getWeight(source, SchedulingPhase.STEADY) / KjcOptions.smp * (KjcOptions.smp - 1); } else { // Filter is not fizzed, source is not fizzed // Check to see if on same core // If not, must communicate all elements if(!scheduler.getComputeNode(slice.getFirstFilter()).equals( scheduler.getComputeNode(srcSlice.getFirstFilter()))) { comm += numInputRots * input.getWeight(source, SchedulingPhase.STEADY); } } } } else { for(InterSliceEdge source : sources) { Slice srcSlice = source.getSrc().getParent(); // if(srcSlice.getFirstFilter().isFileInput()) // continue; if(FissionGroupStore.isFizzed(srcSlice)) { // Filter is fizzed, source is also fizzed int totalItemsReceivedPerFizzed = totalItemsReceived / FissionGroupStore.getFissionGroup(slice).fizzedSlices.length; int numInputRotsPerFizzed = numInputRots / FissionGroupStore.getFissionGroup(slice).fizzedSlices.length; System.out.println("totalItemsReceivedPerFizzed: " + totalItemsReceivedPerFizzed); System.out.println("numInputRotsPerFizzed: " + numInputRotsPerFizzed); int inputWeightBeforeSrc = input.weightBefore(source, SchedulingPhase.STEADY); int inputWeightSrc = input.getWeight(source, SchedulingPhase.STEADY); int inputTotalWeight = input.totalWeights(SchedulingPhase.STEADY); System.out.println("inputWeightBeforeSrc: " + inputWeightBeforeSrc); System.out.println("inputWeightSrc: " + inputWeightSrc); System.out.println("copyDown: " + info.copyDown); int numXmit = 0; for(int rot = 0 ; rot < numInputRotsPerFizzed ; rot++) { numXmit += Math.min(inputWeightSrc, Math.max(0, info.copyDown + rot * inputTotalWeight + inputWeightBeforeSrc + inputWeightSrc - totalItemsReceivedPerFizzed)); } System.out.println("numXmit: " + numXmit); comm += KjcOptions.smp * numXmit; } else { // Filter is fizzed, source is not fizzed // Source must send (N-1)/N of outputs to different cores comm += numInputRots * input.getWeight(source, SchedulingPhase.STEADY) / KjcOptions.smp * (KjcOptions.smp - 1); } } } commProcessed.add(slice); } */ // Simple communication estimation for (Slice slice : slices) { if (commProcessed.contains(slice)) continue; FilterInfo info = FilterInfo.getFilterInfo(slice.getFirstFilter()); int totalItemsReceived = info.totalItemsReceived(SchedulingPhase.STEADY); if (totalItemsReceived == 0) continue; comm += totalItemsReceived; if (FissionGroupStore.isFizzed(slice)) { assert info.peek >= info.pop; comm += (info.peek - info.pop) * KjcOptions.smp; } commProcessed.add(slice); } // Simple communication estimation 2 /* for(Slice slice : slices) { if(commProcessed.contains(slice)) continue; FilterInfo info = FilterInfo.getFilterInfo(slice.getFirstFilter()); int totalItemsReceived = info.totalItemsReceived(SchedulingPhase.STEADY); int totalItemsSent = info.totalItemsSent(SchedulingPhase.STEADY); comm += totalItemsReceived; comm += totalItemsSent; if(totalItemsReceived == 0) continue; if(FissionGroupStore.isFizzed(slice)) { assert info.peek >= info.pop; comm += (info.peek - info.pop) * KjcOptions.smp; } commProcessed.add(slice); } */ System.out.println("Final Computation: " + comp); System.out.println("Final Communication: " + comm); System.out.println("Final Comp/Comm Ratio: " + (float) comp / (float) comm); } System.exit(0); }
private void flattenInternal(FlatNode top) { Iterator<FlatNode> dataFlow = DataFlowTraversal.getTraversal(top).iterator(); while (dataFlow.hasNext()) { FlatNode node = dataFlow.next(); System.out.println(node); InputSliceNode input = sliceNodes.inputNodes.get(node.contents); OutputSliceNode output = sliceNodes.outputNodes.get(node.contents); FilterSliceNode filterNode = sliceNodes.filterNodes.get(node.contents); assert input != null && output != null && filterNode != null; // set up the slice Slice slice = new Slice(input); input.setNext(filterNode); filterNode.setPrevious(input); filterNode.setNext(output); output.setPrevious(filterNode); input.setParent(slice); output.setParent(slice); filterNode.setParent(slice); System.out.println(" outputs: " + node.ways); if (node.ways != 0) { assert node.ways == node.getEdges().length && node.ways == node.weights.length; // set up the i/o arcs // set up the splitting... LinkedList<InterSliceEdge> outEdges = new LinkedList<InterSliceEdge>(); LinkedList<Integer> outWeights = new LinkedList<Integer>(); HashMap<InputSliceNode, InterSliceEdge> newEdges = new HashMap<InputSliceNode, InterSliceEdge>(); for (int i = 0; i < node.ways; i++) { if (node.weights[i] == 0) continue; InterSliceEdge edge = new InterSliceEdge(output, sliceNodes.inputNodes.get(node.getEdges()[i].contents)); newEdges.put(sliceNodes.inputNodes.get(node.getEdges()[i].contents), edge); outEdges.add(edge); outWeights.add(node.weights[i]); } edges.put(output, newEdges); LinkedList<LinkedList<InterSliceEdge>> translatedEdges = new LinkedList<LinkedList<InterSliceEdge>>(); if (node.isDuplicateSplitter()) { outWeights = new LinkedList<Integer>(); outWeights.add(new Integer(1)); translatedEdges.add(outEdges); } else { for (int i = 0; i < outEdges.size(); i++) { LinkedList<InterSliceEdge> link = new LinkedList<InterSliceEdge>(); link.add(outEdges.get(i)); translatedEdges.add(link); } } output.set(outWeights, translatedEdges); } else { // no outputs output.setWeights(new int[0]); output.setDests(new InterSliceEdge[0][0]); } if (node.isFilter()) { if (node.getFilter().getPushInt() == 0) { output.setWeights(new int[0]); output.setDests(new InterSliceEdge[0][0]); } } // set up the joining, the edges should exist already from upstream System.out.println(" inputs: " + node.inputs); if (node.inputs != 0) { assert node.inputs == node.incoming.length && node.inputs == node.incomingWeights.length; LinkedList<Integer> inWeights = new LinkedList<Integer>(); LinkedList<InterSliceEdge> inEdges = new LinkedList<InterSliceEdge>(); for (int i = 0; i < node.inputs; i++) { if (node.incomingWeights[i] == 0) continue; inEdges.add(edges.get(sliceNodes.outputNodes.get(node.incoming[i].contents)).get(input)); inWeights.add(node.incomingWeights[i]); } input.set(inWeights, inEdges); } else { input.setWeights(new int[0]); input.setSources(new InterSliceEdge[0]); } if (node.isFilter() && node.getFilter().getPopInt() == 0) { input.setWeights(new int[0]); input.setSources(new InterSliceEdge[0]); } // set up the work hashmaps int workEst = 0; if (sliceNodes.generatedIds.contains(filterNode)) { workEst = 3 * filterNode.getFilter().getSteadyMult(); } else { assert node.isFilter(); workEst = work.getWork((SIRFilter) node.contents); } bottleNeckFilter.put(slice, filterNode); sliceBNWork.put(slice, workEst); workEstimation.put(filterNode.getFilter(), workEst); slice.finish(); if (node.contents instanceof SIRFileReader || node.contents instanceof SIRFileWriter) { System.out.println("Found io " + node.contents); ioList.add(slice); } if (topSlice == null) topSlice = slice; sliceList.add(slice); } }