/** Set the scheduler field to the correct leaf class that implements a scheduling policy. */ private static void setScheduler() { if (KjcOptions.partitioner.equals("tmd")) { scheduler = new TMDBinPackFissAll(); } else if (KjcOptions.partitioner.equals("oldtmd")) { scheduler = new TMD(); } else if (KjcOptions.partitioner.equals("smd")) { scheduler = new SMD(); } else { System.err.println("Unknown Scheduler Type!"); System.exit(1); } }
/* * File writer code currently works by using fwrite. * * The File* is declared outside any function / method. * * There is special case code for FileReader<bit> since individual * bits can not be read in by any system routine that I know. * The current bits and the count of unprocessed bits are created * outside of */ private static void genFileWriterWork( SIRFileWriter fw, Tape inputTape, int selfID, CodegenPrintWriter p) { if (KjcOptions.asciifileio) { System.err.println("Error: -asciifileio not supported in cluster backend."); System.err.println("Exiting..."); System.exit(1); } String theType = "" + fw.getInputType(); if (theType.equals("bit")) { // the bit type is special since you can not just read or // write a bit. It requires buffering in some larger // integer type. String bits_to_go = bitsToGoName(fw); String the_bits = theBitsName(fw); p.println("unsigned char __buffer[____n/8+1];"); p.println("int __index = 0;"); p.println("for (; 0 < ____n; ____n--) {"); p.indent(); p.println( the_bits + " = (" + bits_type + ") ((" + the_bits + " << 1) | (" + inputTape.getPopName() + "() & 1));"); p.println(bits_to_go + "--;"); p.println("if (" + bits_to_go + " == 0) {"); p.indent(); p.println("__buffer[__index++] = " + the_bits + ";"); p.println(the_bits + " = 0;"); p.println(bits_to_go + " = 8 * sizeof(" + the_bits + ");"); p.outdent(); p.println("}"); p.outdent(); p.println("}"); p.println( "fwrite(__buffer, " + "sizeof(" + the_bits + "), " + "__index, " + fpName(fw) + ");"); } else { // not a bit type. write directly to file without needing to buffer bits. p.println("\n int __index;"); p.println(" for (__index=0; __index < ____n; __index++) {"); if (KjcOptions.compressed) { Tape out = RegisterStreams.getFilterInStream(fw); int s = out.getSource(); int d = out.getDest(); String BUFFER = "BUFFER_" + s + "_" + d; String TAIL = "TAIL_" + s + "_" + d; String pop = inputTape.getPopName() + "()"; p.println(" unsigned char __temp = " + pop + ";"); p.println(" FileWriter_write<unsigned char>(__file_descr__" + selfID + ", __temp);"); p.println(" unsigned int __frame_size = __temp;"); p.println(" __temp = " + pop + ";"); p.println(" FileWriter_write<unsigned char>(__file_descr__" + selfID + ", __temp);"); p.println(" __frame_size <<= 8;"); p.println(" __frame_size += __temp;"); p.println(" __temp = " + pop + ";"); p.println(" FileWriter_write<unsigned char>(__file_descr__" + selfID + ", __temp);"); p.println(" __frame_size <<= 8;"); p.println(" __frame_size += __temp;"); p.println(" __temp = " + pop + ";"); p.println(" FileWriter_write<unsigned char>(__file_descr__" + selfID + ", __temp);"); p.println(" __frame_size <<= 8;"); p.println(" __frame_size += __temp;"); // the frame size includes the four bytes used to state the frame size p.println( " FileWriter_write(__file_descr__" + selfID + ", (void *)(" + BUFFER + " + " + TAIL + "), __frame_size - 4);"); p.println(" " + TAIL + " += __frame_size - 4;"); } else { p.println( " FileWriter_write<" + theType + ">(__file_descr__" + selfID + ", " + inputTape.getPopName() + "());"); } p.println(" }\n"); /* NetStream in = RegisterStreams.getFilterInStream(fw); // source and destination of incoming stream int s = in.getSource(); int d = in.getDest(); p.println("#ifdef __FUSED_" + s + "_" + d); p.indent(); { p.println("fwrite(&(BUFFER_" + s + "_" + d + "[TAIL_" + s + "_" + d + "]), " + "sizeof(BUFFER_" + s + "_" + d + "[TAIL_" + s + "_" + d + "]), " + "____n, " + fpName(fw) + ");"); p.println("TAIL_" + s + "_" + d + "+=____n;"); } p.outdent(); p.println("#else"); p.indent(); { p.println("fwrite(&(__pop_buf__" + selfID + "[__tail__" + selfID + "]), " + "sizeof(__pop_buf__" + selfID + "[__tail__" + selfID + "]), " + "____n, " + fpName(fw) + ");"); p.println("__tail__" + selfID + "+=____n;"); } p.outdent(); p.println("#endif"); */ if (KjcOptions.numbers > 0) { p.println(" stats_output_count++;"); } } }
public static void run( SIRStream str, JInterfaceDeclaration[] interfaces, SIRInterfaceTable[] interfaceTables, SIRStructure[] structs, SIRHelper[] helpers, SIRGlobal global) { System.out.println("Entry to SMP Backend..."); checkArguments(); setScheduler(); if (KjcOptions.smp > 16) { setupLargeConfig(); } // create cores in desired amount and order int[] cores = new int[KjcOptions.smp]; for (int x = 0; x < KjcOptions.smp; x++) cores[x] = coreOrder[x]; chip = new SMPMachine(cores); // create a new structs.h file for typedefs etc. structs_h = new Structs_h(structs); // The usual optimizations and transformation to slice graph CommonPasses commonPasses = new CommonPasses(); // perform standard optimizations, use the number of cores the user wants to target commonPasses.run(str, interfaces, interfaceTables, structs, helpers, global, chip.size()); // perform some standard cleanup on the slice graph. commonPasses.simplifySlices(); // dump slice graph to dot file commonPasses.getSlicer().dumpGraph("traces.dot", null); // partition the slice graph based on the scheduling policy SpaceTimeScheduleAndSlicer graphSchedule = new SpaceTimeScheduleAndSlicer(commonPasses.getSlicer()); scheduler.setGraphSchedule(graphSchedule); scheduler.run(chip.size()); FilterInfo.reset(); // generate schedules for initialization, primepump and steady-state scheduleSlices(graphSchedule); // generate layout for filters scheduler.runLayout(); // dump final slice graph to dot file graphSchedule.getSlicer().dumpGraph("after_slice_partition.dot", scheduler); graphSchedule.getSlicer().dumpGraph("slice_graph.dot", scheduler, false); // if load balancing, find candidiate fission groups to load balance if (KjcOptions.loadbalance) { LoadBalancer.findCandidates(); LoadBalancer.instrumentMainMethods(); } // create all buffers and set the rotation lengths RotatingBuffer.createBuffers(graphSchedule); // now convert to Kopi code plus communication commands backEndBits = new SMPBackEndFactory(chip, scheduler); backEndBits.getBackEndMain().run(graphSchedule, backEndBits); // generate code for file writer CoreCodeStore.generatePrintOutputCode(); if (KjcOptions.numbers > 0) chip.getNthComputeNode(0).getComputeCode().generateNumbersCode(); // emit c code for all cores EmitSMPCode.doit(backEndBits); // dump structs.h file structs_h.writeToFile(); // display final assignment of filters to cores System.out.println("Final filter assignments:"); System.out.println("========================================"); for (int x = 0; x < KjcOptions.smp; x++) { Core core = chip.getNthComputeNode(x); Set<FilterSliceNode> filters = core.getComputeCode().getFilters(); long totalWork = 0; System.out.println("Core " + core.getCoreID() + ": "); for (FilterSliceNode filter : filters) { long work = SliceWorkEstimate.getWork(filter.getParent()); System.out.format("%16d | " + filter + "\n", work); totalWork += work; } System.out.format("%16d | Total\n", totalWork); } // calculate computation to communication ratio if (KjcOptions.sharedbufs) { LinkedList<Slice> slices = DataFlowOrder.getTraversal(graphSchedule.getSlicer().getTopSlices()); HashSet<Slice> compProcessed = new HashSet<Slice>(); HashSet<Slice> commProcessed = new HashSet<Slice>(); long comp = 0; long comm = 0; for (Slice slice : slices) { if (compProcessed.contains(slice)) continue; comp += SliceWorkEstimate.getWork(slice); compProcessed.add(slice); } /* for(Slice slice : slices) { if(commProcessed.contains(slice)) continue; FilterInfo info = FilterInfo.getFilterInfo(slice.getFirstFilter()); int totalItemsReceived = info.totalItemsReceived(SchedulingPhase.STEADY); if(totalItemsReceived == 0) continue; InputSliceNode input = slice.getHead(); Set<InterSliceEdge> sources = input.getSourceSet(SchedulingPhase.STEADY); int numInputRots = totalItemsReceived / input.totalWeights(SchedulingPhase.STEADY); if(!FissionGroupStore.isFizzed(slice)) { for(InterSliceEdge source : sources) { Slice srcSlice = source.getSrc().getParent(); // if(srcSlice.getFirstFilter().isFileInput()) // continue; if(FissionGroupStore.isFizzed(srcSlice)) { // Filter is not fizzed, source is fizzed // Filter must receive (N-1)/N of inputs from different cores comm += numInputRots * input.getWeight(source, SchedulingPhase.STEADY) / KjcOptions.smp * (KjcOptions.smp - 1); } else { // Filter is not fizzed, source is not fizzed // Check to see if on same core // If not, must communicate all elements if(!scheduler.getComputeNode(slice.getFirstFilter()).equals( scheduler.getComputeNode(srcSlice.getFirstFilter()))) { comm += numInputRots * input.getWeight(source, SchedulingPhase.STEADY); } } } } else { for(InterSliceEdge source : sources) { Slice srcSlice = source.getSrc().getParent(); // if(srcSlice.getFirstFilter().isFileInput()) // continue; if(FissionGroupStore.isFizzed(srcSlice)) { // Filter is fizzed, source is also fizzed int totalItemsReceivedPerFizzed = totalItemsReceived / FissionGroupStore.getFissionGroup(slice).fizzedSlices.length; int numInputRotsPerFizzed = numInputRots / FissionGroupStore.getFissionGroup(slice).fizzedSlices.length; System.out.println("totalItemsReceivedPerFizzed: " + totalItemsReceivedPerFizzed); System.out.println("numInputRotsPerFizzed: " + numInputRotsPerFizzed); int inputWeightBeforeSrc = input.weightBefore(source, SchedulingPhase.STEADY); int inputWeightSrc = input.getWeight(source, SchedulingPhase.STEADY); int inputTotalWeight = input.totalWeights(SchedulingPhase.STEADY); System.out.println("inputWeightBeforeSrc: " + inputWeightBeforeSrc); System.out.println("inputWeightSrc: " + inputWeightSrc); System.out.println("copyDown: " + info.copyDown); int numXmit = 0; for(int rot = 0 ; rot < numInputRotsPerFizzed ; rot++) { numXmit += Math.min(inputWeightSrc, Math.max(0, info.copyDown + rot * inputTotalWeight + inputWeightBeforeSrc + inputWeightSrc - totalItemsReceivedPerFizzed)); } System.out.println("numXmit: " + numXmit); comm += KjcOptions.smp * numXmit; } else { // Filter is fizzed, source is not fizzed // Source must send (N-1)/N of outputs to different cores comm += numInputRots * input.getWeight(source, SchedulingPhase.STEADY) / KjcOptions.smp * (KjcOptions.smp - 1); } } } commProcessed.add(slice); } */ // Simple communication estimation for (Slice slice : slices) { if (commProcessed.contains(slice)) continue; FilterInfo info = FilterInfo.getFilterInfo(slice.getFirstFilter()); int totalItemsReceived = info.totalItemsReceived(SchedulingPhase.STEADY); if (totalItemsReceived == 0) continue; comm += totalItemsReceived; if (FissionGroupStore.isFizzed(slice)) { assert info.peek >= info.pop; comm += (info.peek - info.pop) * KjcOptions.smp; } commProcessed.add(slice); } // Simple communication estimation 2 /* for(Slice slice : slices) { if(commProcessed.contains(slice)) continue; FilterInfo info = FilterInfo.getFilterInfo(slice.getFirstFilter()); int totalItemsReceived = info.totalItemsReceived(SchedulingPhase.STEADY); int totalItemsSent = info.totalItemsSent(SchedulingPhase.STEADY); comm += totalItemsReceived; comm += totalItemsSent; if(totalItemsReceived == 0) continue; if(FissionGroupStore.isFizzed(slice)) { assert info.peek >= info.pop; comm += (info.peek - info.pop) * KjcOptions.smp; } commProcessed.add(slice); } */ System.out.println("Final Computation: " + comp); System.out.println("Final Communication: " + comm); System.out.println("Final Comp/Comm Ratio: " + (float) comp / (float) comm); } System.exit(0); }
/* * File reader code currently works by using fread. * * The File* is declared outside any function / method. * * This code follows the model in the library of stalling (not pushing) * at end of file (detected by fread returning 0). * */ private static void genFileReaderWork( SIRFileReader filter, Tape outputTape, int selfID, CodegenPrintWriter p) { if (KjcOptions.asciifileio) { System.err.println("Error: -asciifileio not supported in cluster backend."); System.err.println("Exiting..."); System.exit(1); } String theType = "" + filter.getOutputType(); // dispatch to special routine for bit type if (theType.equals("bit")) { genFileReaderWorkBit(filter, outputTape, selfID, p); return; } // get source and destination ids of outgoing tape Tape out = RegisterStreams.getFilterOutStream(filter); int s = out.getSource(); int d = out.getDest(); // template code to generate, using symbolic free vars above. StringBuilder sb = new StringBuilder(); sb.append("\n int __index;\n"); sb.append(" for (__index=0; __index < ____n; __index++) {\n"); if (KjcOptions.compressed) { sb.append(" unsigned char __temp = 0;\n"); sb.append(" FileReader_read(__file_descr__"); sb.append(selfID); sb.append(", &__temp, 1);\n"); sb.append(" PUSH(__temp);\n"); sb.append(" unsigned int __frame_size = __temp;\n"); sb.append(" FileReader_read(__file_descr__"); sb.append(selfID); sb.append(", &__temp, 1);\n"); sb.append(" PUSH(__temp);\n"); sb.append(" __frame_size <<= 8;\n"); sb.append(" __frame_size += __temp;\n"); sb.append(" FileReader_read(__file_descr__"); sb.append(selfID); sb.append(", &__temp, 1);\n"); sb.append(" PUSH(__temp);\n"); sb.append(" __frame_size <<= 8;\n"); sb.append(" __frame_size += __temp;\n"); sb.append(" FileReader_read(__file_descr__"); sb.append(selfID); sb.append(", &__temp, 1);\n"); sb.append(" PUSH(__temp);\n"); sb.append(" __frame_size <<= 8;\n"); sb.append(" __frame_size += __temp;\n"); // the frame size includes the four bytes used to state the frame size sb.append(" FileReader_read(__file_descr__"); sb.append(selfID); sb.append(", (void *)(BUFFER + HEAD), __frame_size - 4);\n"); sb.append(" HEAD += __frame_size - 4;\n"); } else { sb.append(" PUSH(FileReader_read<"); sb.append(theType); sb.append(">(__file_descr__"); sb.append(selfID); sb.append("));\n"); } sb.append(" }\n"); String template = sb.toString(); /* " #ifdef FUSED" + "\n" + " #ifdef NOMOD" + "\n" + " // read directly into buffer" + "\n" + " if (fread(&(BUFFER[HEAD]), sizeof(BUFFER[HEAD]), ____n, FILEREADER)) {" + "\n" + " HEAD+=____n;" + "\n" + " }" + "\n" + " #else" + "\n" + " // wraparound buffer, might have to read in two pieces (but never" + "\n" + " // more, since we would overwrote what we already wrote on this call)" + "\n" + " if (HEAD+____n <= __BUF_SIZE_MASK) {" + "\n" + " // no overflow" + "\n" + " if (fread(&(BUFFER[HEAD]), sizeof(BUFFER[HEAD]), ____n, FILEREADER)) {" + "\n" + " HEAD+=____n;" + "\n" + " }" + "\n" + " } else {" + "\n" + " // overflow, need two pieces" + "\n" + " int piece1 = __BUF_SIZE_MASK - HEAD;" + "\n" + " int piece2 = ____n - piece1;" + "\n" + " if (fread(&(BUFFER[HEAD]), sizeof(BUFFER[HEAD]), piece1, FILEREADER)) {" + "\n" + " if (fread(&(BUFFER[0]), sizeof(BUFFER[HEAD]), piece2, FILEREADER)) {" + "\n" + " HEAD+=____n;" + "\n" + " HEAD&=__BUF_SIZE_MASK;" + "\n" + " }" + "\n" + " }" + "\n" + " }" + "\n" + " #endif" + "\n" + " #else" + "\n" + " // read as a block" + "\n" + " TYPE __buffer[____n];" + "\n" + " int __index;" + "\n" + " if (fread(__buffer, sizeof(__buffer[0]), ____n, FILEREADER)) {" + "\n" + " for (__index=0; __index < ____n; __index++) {" + "\n" + " // should move push out of loop, but not clear if mult-push implemented yet" + "\n" + " PUSH(__buffer[__index]);" + "\n" + " }" + "\n" + " }" + "\n" + " #endif"; */ // set values of free variables String TYPE = theType; String FILEREADER = fpName(filter); String FUSED = "__FUSED_" + s + "_" + d; String NOMOD = "__NOMOD_" + s + "_" + d; String BUFFER = "BUFFER_" + s + "_" + d; String HEAD = "HEAD_" + s + "_" + d; String BUF_SIZE_MASK = "__BUF_SIZE_MASK_" + s + "_" + d; String PUSH = outputTape.getPushName(); // replace templates with correct values template = Utils.replaceAll(template, "TYPE", TYPE); template = Utils.replaceAll(template, "FILEREADER", FILEREADER); template = Utils.replaceAll(template, "FUSED", FUSED); template = Utils.replaceAll(template, "NOMOD", NOMOD); template = Utils.replaceAll(template, "BUFFER", BUFFER); template = Utils.replaceAll(template, "HEAD", HEAD); template = Utils.replaceAll(template, "BUF_SIZE_MASK", BUF_SIZE_MASK); template = Utils.replaceAll(template, "PUSH", PUSH); // output code p.println(template); }