Example #1
0
 /** Set the scheduler field to the correct leaf class that implements a scheduling policy. */
 private static void setScheduler() {
   if (KjcOptions.partitioner.equals("tmd")) {
     scheduler = new TMDBinPackFissAll();
   } else if (KjcOptions.partitioner.equals("oldtmd")) {
     scheduler = new TMD();
   } else if (KjcOptions.partitioner.equals("smd")) {
     scheduler = new SMD();
   } else {
     System.err.println("Unknown Scheduler Type!");
     System.exit(1);
   }
 }
Example #2
0
  /*
   * File writer code currently works by using fwrite.
   *
   * The File* is declared outside any function / method.
   *
   * There is special case code for FileReader<bit> since individual
   * bits can not be read in by any system routine that I know.
   * The current bits and the count of unprocessed bits are created
   * outside of
   */
  private static void genFileWriterWork(
      SIRFileWriter fw, Tape inputTape, int selfID, CodegenPrintWriter p) {

    if (KjcOptions.asciifileio) {
      System.err.println("Error: -asciifileio not supported in cluster backend.");
      System.err.println("Exiting...");
      System.exit(1);
    }

    String theType = "" + fw.getInputType();
    if (theType.equals("bit")) {
      // the bit type is special since you can not just read or
      // write a bit.  It requires buffering in some larger
      // integer type.
      String bits_to_go = bitsToGoName(fw);
      String the_bits = theBitsName(fw);

      p.println("unsigned char __buffer[____n/8+1];");
      p.println("int __index = 0;");
      p.println("for (; 0 < ____n; ____n--) {");
      p.indent();

      p.println(
          the_bits
              + " = ("
              + bits_type
              + ") (("
              + the_bits
              + " << 1) | ("
              + inputTape.getPopName()
              + "() & 1));");
      p.println(bits_to_go + "--;");
      p.println("if (" + bits_to_go + " == 0) {");
      p.indent();
      p.println("__buffer[__index++] = " + the_bits + ";");
      p.println(the_bits + " = 0;");
      p.println(bits_to_go + " = 8 * sizeof(" + the_bits + ");");
      p.outdent();
      p.println("}");
      p.outdent();
      p.println("}");
      p.println(
          "fwrite(__buffer, " + "sizeof(" + the_bits + "), " + "__index, " + fpName(fw) + ");");
    } else {
      // not a bit type.  write directly to file without needing to buffer bits.

      p.println("\n  int __index;");
      p.println("  for (__index=0; __index < ____n; __index++) {");
      if (KjcOptions.compressed) {
        Tape out = RegisterStreams.getFilterInStream(fw);
        int s = out.getSource();
        int d = out.getDest();

        String BUFFER = "BUFFER_" + s + "_" + d;
        String TAIL = "TAIL_" + s + "_" + d;

        String pop = inputTape.getPopName() + "()";
        p.println("    unsigned char __temp = " + pop + ";");
        p.println("    FileWriter_write<unsigned char>(__file_descr__" + selfID + ", __temp);");
        p.println("    unsigned int __frame_size = __temp;");

        p.println("    __temp = " + pop + ";");
        p.println("    FileWriter_write<unsigned char>(__file_descr__" + selfID + ", __temp);");
        p.println("    __frame_size <<= 8;");
        p.println("    __frame_size += __temp;");

        p.println("    __temp = " + pop + ";");
        p.println("    FileWriter_write<unsigned char>(__file_descr__" + selfID + ", __temp);");
        p.println("    __frame_size <<= 8;");
        p.println("    __frame_size += __temp;");

        p.println("    __temp = " + pop + ";");
        p.println("    FileWriter_write<unsigned char>(__file_descr__" + selfID + ", __temp);");
        p.println("    __frame_size <<= 8;");
        p.println("    __frame_size += __temp;");

        // the frame size includes the four bytes used to state the frame size
        p.println(
            "    FileWriter_write(__file_descr__"
                + selfID
                + ", (void *)("
                + BUFFER
                + " + "
                + TAIL
                + "), __frame_size - 4);");
        p.println("    " + TAIL + " += __frame_size - 4;");
      } else {
        p.println(
            "    FileWriter_write<"
                + theType
                + ">(__file_descr__"
                + selfID
                + ", "
                + inputTape.getPopName()
                + "());");
      }
      p.println("  }\n");

      /*
             NetStream in = RegisterStreams.getFilterInStream(fw);
             // source and destination of incoming stream
             int s = in.getSource();
             int d = in.getDest();

             p.println("#ifdef __FUSED_" + s + "_" + d);
             p.indent();
             {
                 p.println("fwrite(&(BUFFER_" + s + "_" + d + "[TAIL_" + s + "_" + d + "]), " +
                           "sizeof(BUFFER_" + s + "_" + d + "[TAIL_" + s + "_" + d + "]), " +
                           "____n, " + fpName(fw) + ");");
                 p.println("TAIL_" + s + "_" + d + "+=____n;");
             }
             p.outdent();
             p.println("#else");
             p.indent();
             {
                 p.println("fwrite(&(__pop_buf__" + selfID + "[__tail__" + selfID + "]), " +
                           "sizeof(__pop_buf__" + selfID + "[__tail__" + selfID + "]), " +
                           "____n, " + fpName(fw) + ");");
                 p.println("__tail__" + selfID + "+=____n;");
             }
             p.outdent();
             p.println("#endif");
      */

      if (KjcOptions.numbers > 0) {
        p.println("  stats_output_count++;");
      }
    }
  }
Example #3
0
  public static void run(
      SIRStream str,
      JInterfaceDeclaration[] interfaces,
      SIRInterfaceTable[] interfaceTables,
      SIRStructure[] structs,
      SIRHelper[] helpers,
      SIRGlobal global) {
    System.out.println("Entry to SMP Backend...");

    checkArguments();
    setScheduler();

    if (KjcOptions.smp > 16) {
      setupLargeConfig();
    }

    // create cores in desired amount and order
    int[] cores = new int[KjcOptions.smp];
    for (int x = 0; x < KjcOptions.smp; x++) cores[x] = coreOrder[x];
    chip = new SMPMachine(cores);

    // create a new structs.h file for typedefs etc.
    structs_h = new Structs_h(structs);

    // The usual optimizations and transformation to slice graph
    CommonPasses commonPasses = new CommonPasses();
    // perform standard optimizations, use the number of cores the user wants to target
    commonPasses.run(str, interfaces, interfaceTables, structs, helpers, global, chip.size());
    // perform some standard cleanup on the slice graph.
    commonPasses.simplifySlices();
    // dump slice graph to dot file
    commonPasses.getSlicer().dumpGraph("traces.dot", null);

    // partition the slice graph based on the scheduling policy
    SpaceTimeScheduleAndSlicer graphSchedule =
        new SpaceTimeScheduleAndSlicer(commonPasses.getSlicer());
    scheduler.setGraphSchedule(graphSchedule);
    scheduler.run(chip.size());
    FilterInfo.reset();

    // generate schedules for initialization, primepump and steady-state
    scheduleSlices(graphSchedule);

    // generate layout for filters
    scheduler.runLayout();

    // dump final slice graph to dot file
    graphSchedule.getSlicer().dumpGraph("after_slice_partition.dot", scheduler);
    graphSchedule.getSlicer().dumpGraph("slice_graph.dot", scheduler, false);

    // if load balancing, find candidiate fission groups to load balance
    if (KjcOptions.loadbalance) {
      LoadBalancer.findCandidates();
      LoadBalancer.instrumentMainMethods();
    }

    // create all buffers and set the rotation lengths
    RotatingBuffer.createBuffers(graphSchedule);

    // now convert to Kopi code plus communication commands
    backEndBits = new SMPBackEndFactory(chip, scheduler);
    backEndBits.getBackEndMain().run(graphSchedule, backEndBits);

    // generate code for file writer
    CoreCodeStore.generatePrintOutputCode();

    if (KjcOptions.numbers > 0) chip.getNthComputeNode(0).getComputeCode().generateNumbersCode();

    // emit c code for all cores
    EmitSMPCode.doit(backEndBits);

    // dump structs.h file
    structs_h.writeToFile();

    // display final assignment of filters to cores
    System.out.println("Final filter assignments:");
    System.out.println("========================================");
    for (int x = 0; x < KjcOptions.smp; x++) {
      Core core = chip.getNthComputeNode(x);
      Set<FilterSliceNode> filters = core.getComputeCode().getFilters();
      long totalWork = 0;

      System.out.println("Core " + core.getCoreID() + ": ");
      for (FilterSliceNode filter : filters) {
        long work = SliceWorkEstimate.getWork(filter.getParent());
        System.out.format("%16d | " + filter + "\n", work);
        totalWork += work;
      }
      System.out.format("%16d | Total\n", totalWork);
    }

    // calculate computation to communication ratio
    if (KjcOptions.sharedbufs) {
      LinkedList<Slice> slices =
          DataFlowOrder.getTraversal(graphSchedule.getSlicer().getTopSlices());
      HashSet<Slice> compProcessed = new HashSet<Slice>();
      HashSet<Slice> commProcessed = new HashSet<Slice>();

      long comp = 0;
      long comm = 0;

      for (Slice slice : slices) {
        if (compProcessed.contains(slice)) continue;

        comp += SliceWorkEstimate.getWork(slice);
        compProcessed.add(slice);
      }

      /*
                  for(Slice slice : slices) {
                      if(commProcessed.contains(slice))
                          continue;

                      FilterInfo info = FilterInfo.getFilterInfo(slice.getFirstFilter());
                      int totalItemsReceived = info.totalItemsReceived(SchedulingPhase.STEADY);

                      if(totalItemsReceived == 0)
                          continue;

                      InputSliceNode input = slice.getHead();
                      Set<InterSliceEdge> sources = input.getSourceSet(SchedulingPhase.STEADY);
                      int numInputRots = totalItemsReceived / input.totalWeights(SchedulingPhase.STEADY);

                      if(!FissionGroupStore.isFizzed(slice)) {
                          for(InterSliceEdge source : sources) {
                              Slice srcSlice = source.getSrc().getParent();

      //                         if(srcSlice.getFirstFilter().isFileInput())
      //                             continue;

                              if(FissionGroupStore.isFizzed(srcSlice)) {
                                  // Filter is not fizzed, source is fizzed
                                  // Filter must receive (N-1)/N of inputs from different cores
                                  comm += numInputRots *
                                      input.getWeight(source, SchedulingPhase.STEADY) /
                                      KjcOptions.smp * (KjcOptions.smp - 1);
                              }
                              else {
                                  // Filter is not fizzed, source is not fizzed
                                  // Check to see if on same core
                                  // If not, must communicate all elements
                                  if(!scheduler.getComputeNode(slice.getFirstFilter()).equals(
                                         scheduler.getComputeNode(srcSlice.getFirstFilter()))) {
                                      comm += numInputRots *
                                          input.getWeight(source, SchedulingPhase.STEADY);
                                  }
                              }
                          }
                      }
                      else {
                          for(InterSliceEdge source : sources) {
                              Slice srcSlice = source.getSrc().getParent();

      //                         if(srcSlice.getFirstFilter().isFileInput())
      //                             continue;

                              if(FissionGroupStore.isFizzed(srcSlice)) {
                                  // Filter is fizzed, source is also fizzed
                                  int totalItemsReceivedPerFizzed = totalItemsReceived /
                                      FissionGroupStore.getFissionGroup(slice).fizzedSlices.length;
                                  int numInputRotsPerFizzed = numInputRots /
                                      FissionGroupStore.getFissionGroup(slice).fizzedSlices.length;

                                  System.out.println("totalItemsReceivedPerFizzed: " + totalItemsReceivedPerFizzed);
                                  System.out.println("numInputRotsPerFizzed: " + numInputRotsPerFizzed);

                                  int inputWeightBeforeSrc =
                                      input.weightBefore(source, SchedulingPhase.STEADY);
                                  int inputWeightSrc = input.getWeight(source, SchedulingPhase.STEADY);
                                  int inputTotalWeight = input.totalWeights(SchedulingPhase.STEADY);

                                  System.out.println("inputWeightBeforeSrc: " + inputWeightBeforeSrc);
                                  System.out.println("inputWeightSrc: " + inputWeightSrc);
                                  System.out.println("copyDown: " + info.copyDown);

                                  int numXmit = 0;

                                  for(int rot = 0 ; rot < numInputRotsPerFizzed ; rot++) {
                                      numXmit += Math.min(inputWeightSrc,
                                                          Math.max(0,
                                                                   info.copyDown +
                                                                   rot * inputTotalWeight +
                                                                   inputWeightBeforeSrc + inputWeightSrc -
                                                                   totalItemsReceivedPerFizzed));
                                  }

                                  System.out.println("numXmit: " + numXmit);

                                  comm += KjcOptions.smp * numXmit;
                              }
                              else {
                                  // Filter is fizzed, source is not fizzed
                                  // Source must send (N-1)/N of outputs to different cores
                                  comm += numInputRots *
                                      input.getWeight(source, SchedulingPhase.STEADY) /
                                      KjcOptions.smp * (KjcOptions.smp - 1);
                              }
                          }
                      }

                      commProcessed.add(slice);
                  }
                  */

      // Simple communication estimation
      for (Slice slice : slices) {
        if (commProcessed.contains(slice)) continue;

        FilterInfo info = FilterInfo.getFilterInfo(slice.getFirstFilter());
        int totalItemsReceived = info.totalItemsReceived(SchedulingPhase.STEADY);

        if (totalItemsReceived == 0) continue;

        comm += totalItemsReceived;

        if (FissionGroupStore.isFizzed(slice)) {
          assert info.peek >= info.pop;
          comm += (info.peek - info.pop) * KjcOptions.smp;
        }

        commProcessed.add(slice);
      }

      // Simple communication estimation 2
      /*
      for(Slice slice : slices) {
          if(commProcessed.contains(slice))
              continue;

          FilterInfo info = FilterInfo.getFilterInfo(slice.getFirstFilter());
          int totalItemsReceived = info.totalItemsReceived(SchedulingPhase.STEADY);
          int totalItemsSent = info.totalItemsSent(SchedulingPhase.STEADY);

          comm += totalItemsReceived;
          comm += totalItemsSent;

          if(totalItemsReceived == 0)
              continue;

          if(FissionGroupStore.isFizzed(slice)) {
              assert info.peek >= info.pop;
              comm += (info.peek - info.pop) * KjcOptions.smp;
          }

          commProcessed.add(slice);
      }
      */

      System.out.println("Final Computation: " + comp);
      System.out.println("Final Communication: " + comm);
      System.out.println("Final Comp/Comm Ratio: " + (float) comp / (float) comm);
    }

    System.exit(0);
  }
Example #4
0
  /*
   * File reader code currently works by using fread.
   *
   * The File* is declared outside any function / method.
   *
   * This code follows the model in the library of stalling (not pushing)
   * at end of file (detected by fread returning 0).
   *
   */
  private static void genFileReaderWork(
      SIRFileReader filter, Tape outputTape, int selfID, CodegenPrintWriter p) {

    if (KjcOptions.asciifileio) {
      System.err.println("Error: -asciifileio not supported in cluster backend.");
      System.err.println("Exiting...");
      System.exit(1);
    }

    String theType = "" + filter.getOutputType();
    // dispatch to special routine for bit type
    if (theType.equals("bit")) {
      genFileReaderWorkBit(filter, outputTape, selfID, p);
      return;
    }

    // get source and destination ids of outgoing tape
    Tape out = RegisterStreams.getFilterOutStream(filter);
    int s = out.getSource();
    int d = out.getDest();

    // template code to generate, using symbolic free vars above.
    StringBuilder sb = new StringBuilder();
    sb.append("\n  int __index;\n");
    sb.append("  for (__index=0; __index < ____n; __index++) {\n");
    if (KjcOptions.compressed) {
      sb.append("    unsigned char __temp = 0;\n");
      sb.append("    FileReader_read(__file_descr__");
      sb.append(selfID);
      sb.append(", &__temp, 1);\n");
      sb.append("    PUSH(__temp);\n");
      sb.append("    unsigned int __frame_size = __temp;\n");

      sb.append("    FileReader_read(__file_descr__");
      sb.append(selfID);
      sb.append(", &__temp, 1);\n");
      sb.append("    PUSH(__temp);\n");
      sb.append("    __frame_size <<= 8;\n");
      sb.append("    __frame_size += __temp;\n");

      sb.append("    FileReader_read(__file_descr__");
      sb.append(selfID);
      sb.append(", &__temp, 1);\n");
      sb.append("    PUSH(__temp);\n");
      sb.append("    __frame_size <<= 8;\n");
      sb.append("    __frame_size += __temp;\n");

      sb.append("    FileReader_read(__file_descr__");
      sb.append(selfID);
      sb.append(", &__temp, 1);\n");
      sb.append("    PUSH(__temp);\n");
      sb.append("    __frame_size <<= 8;\n");
      sb.append("    __frame_size += __temp;\n");

      // the frame size includes the four bytes used to state the frame size
      sb.append("    FileReader_read(__file_descr__");
      sb.append(selfID);
      sb.append(", (void *)(BUFFER + HEAD), __frame_size - 4);\n");
      sb.append("    HEAD += __frame_size - 4;\n");
    } else {
      sb.append("    PUSH(FileReader_read<");
      sb.append(theType);
      sb.append(">(__file_descr__");
      sb.append(selfID);
      sb.append("));\n");
    }
    sb.append("  }\n");

    String template = sb.toString();

    /*
               "  #ifdef FUSED" + "\n" +
               "    #ifdef NOMOD" + "\n" +
               "      // read directly into buffer" + "\n" +
               "      if (fread(&(BUFFER[HEAD]), sizeof(BUFFER[HEAD]), ____n, FILEREADER)) {" + "\n" +
               "        HEAD+=____n;" + "\n" +
               "      }" + "\n" +
               "    #else" + "\n" +
               "      // wraparound buffer, might have to read in two pieces (but never" + "\n" +
               "      // more, since we would overwrote what we already wrote on this call)" + "\n" +
               "      if (HEAD+____n <= __BUF_SIZE_MASK) {" + "\n" +
               "          // no overflow" + "\n" +
               "          if (fread(&(BUFFER[HEAD]), sizeof(BUFFER[HEAD]), ____n, FILEREADER)) {" + "\n" +
               "             HEAD+=____n;" + "\n" +
               "          }" + "\n" +
               "      } else {" + "\n" +
               "          // overflow, need two pieces" + "\n" +
               "          int piece1 = __BUF_SIZE_MASK - HEAD;" + "\n" +
               "          int piece2 = ____n - piece1;" + "\n" +
               "          if (fread(&(BUFFER[HEAD]), sizeof(BUFFER[HEAD]), piece1, FILEREADER)) {" + "\n" +
               "              if (fread(&(BUFFER[0]), sizeof(BUFFER[HEAD]), piece2, FILEREADER)) {" + "\n" +
               "                HEAD+=____n;" + "\n" +
               "                HEAD&=__BUF_SIZE_MASK;" + "\n" +
               "              }" + "\n" +
               "          }" + "\n" +
               "      }" + "\n" +
               "     #endif" + "\n" +
               "  #else" + "\n" +
               "    // read as a block" + "\n" +
               "    TYPE __buffer[____n];" + "\n" +
               "    int __index;" + "\n" +
               "    if (fread(__buffer, sizeof(__buffer[0]), ____n, FILEREADER)) {" + "\n" +
               "      for (__index=0; __index < ____n; __index++) {" + "\n" +
               "       // should move push out of loop, but not clear if mult-push implemented yet" + "\n" +
               "       PUSH(__buffer[__index]);" + "\n" +
               "      }" + "\n" +
               "    }" + "\n" +
               "  #endif";
    */

    // set values of free variables
    String TYPE = theType;
    String FILEREADER = fpName(filter);
    String FUSED = "__FUSED_" + s + "_" + d;
    String NOMOD = "__NOMOD_" + s + "_" + d;
    String BUFFER = "BUFFER_" + s + "_" + d;
    String HEAD = "HEAD_" + s + "_" + d;
    String BUF_SIZE_MASK = "__BUF_SIZE_MASK_" + s + "_" + d;
    String PUSH = outputTape.getPushName();

    // replace templates with correct values
    template = Utils.replaceAll(template, "TYPE", TYPE);
    template = Utils.replaceAll(template, "FILEREADER", FILEREADER);
    template = Utils.replaceAll(template, "FUSED", FUSED);
    template = Utils.replaceAll(template, "NOMOD", NOMOD);
    template = Utils.replaceAll(template, "BUFFER", BUFFER);
    template = Utils.replaceAll(template, "HEAD", HEAD);
    template = Utils.replaceAll(template, "BUF_SIZE_MASK", BUF_SIZE_MASK);
    template = Utils.replaceAll(template, "PUSH", PUSH);

    // output code
    p.println(template);
  }