Пример #1
0
  public Cost computeRefCost(LoopHeaderChord loop, SubscriptExpr se) {
    Cost tripL = loop.getTripCount();
    InductionVar ivar = loop.getPrimaryInductionVar();
    if (ivar == null) return tripL;

    // Compute
    // tripL = (ubL - lbL + stepL) / stepL

    int nsubs = se.numSubscripts(); // number of subscripts in the subscript
    LoopHeaderChord tloop = loop.getTopLoop();
    VariableDecl iv = ivar.getVar();

    for (int i = 0; i < nsubs - 1; i++) { // for c code
      // If the loop index variable of this loop appears in any of the
      // subscripts other then the last, then return tripL.
      Expr sI = se.getSubscript(i);
      AffineExpr ae = tloop.isAffine(sI);

      if (ae == null) return null;

      if (ae.hasTerm(iv)) return tripL;
    }

    int fs = nsubs - 1;
    Expr s0 = se.getSubscript(fs);
    AffineExpr ae = tloop.isAffine(s0);
    if (ae == null) return null;

    int at = ae.getTermIndexOrig(iv);
    long coeff = 0;
    if (at >= 0) coeff = ae.getCoefficient(at);

    if (coeff == 0) // Invariant Reuse.
    return new Cost(1.0, 0);

    long stepL = loop.getStepValue();
    long stride = stepL * coeff;
    if (stride < 0) stride = -stride;

    // Unit Reuse.

    Type et = se.getCoreType().getPointedTo();
    int bs = et.memorySizeAsInt(Machine.currentMachine);
    int cs = Machine.currentMachine.getCacheSize(bs);
    if (stride <= cs) { // cache line or block size
      tripL.multiply(stride);
      tripL.divide(cs);
      return tripL;
    }

    // No Reuse.

    return tripL;
  }
Пример #2
0
  public void perform() {
    if (trace) System.out.println("** LP " + scribble.getRoutineDecl().getName());

    LoopHeaderChord lt = scribble.getLoopTree();
    Vector<LoopHeaderChord> innerLoops = lt.getInnerLoops();

    for (int li = 0; li < innerLoops.size(); li++) {
      LoopHeaderChord loop = innerLoops.elementAt(li);
      InductionVar ivar = loop.getPrimaryInductionVar();

      if (trace) System.out.println("   lp " + loop.nestedLevel() + " " + loop);

      if ((ivar == null) || !loop.isPerfectlyNested()) {
        innerLoops.addVectors(loop.getInnerLoops());
        continue;
      }

      if (loop.nestedLevel() < 2) // no need to check permutation for a simple nest
      continue;

      tryPermute(loop);
    }
  }
Пример #3
0
  private void tryPermute(LoopHeaderChord topLoop) {
    Vector<LoopHeaderChord> loopNest = topLoop.getTightlyNestedLoops();
    if (loopNest == null) return;

    int loopDepth = loopNest.size();
    LoopHeaderChord bottom = loopNest.get(loopDepth - 1);
    if (!unsafe && !legalLoop(bottom)) return;

    // Set the loop costs for the loops in a loop nest. The cost for a loop is
    // the cost of executing the nest with that loop in the innermost nesting.

    Table<Declaration, SubscriptExpr> arrayRefs = new Table<Declaration, SubscriptExpr>();
    if (!topLoop.getSubscriptsRecursive(arrayRefs)) return;

    graph = topLoop.getDDGraph(false);
    if (graph == null) return;

    if (trace) System.out.println("     " + graph);

    int[] loopIndex = new int[loopDepth];
    Cost[] loopCostList = new Cost[loopDepth];
    Vector<RefGroup> refGroups = new Vector<RefGroup>(20);

    for (int i = 0; i < loopDepth; i++) {
      LoopHeaderChord loop = loopNest.elementAt(i);
      if (trace) System.out.println("   " + i + " " + loop);

      computeRefGroups(loop.getNestedLevel(), 2, 2, arrayRefs, refGroups);

      Cost lc = computeRefCostSum(loop, refGroups);
      if (trace) System.out.println("   " + i + " " + lc);

      loopCostList[i] = lc;
      loopIndex[i] = i; // the outtermost loop is at position 0

      Cost tp = tripProduct(loopNest, loop);
      if (trace) System.out.println("   " + i + " " + tp);
      lc.multiply(tp);
      if (trace) System.out.println("   " + i + " " + lc);
    }

    boolean permuted = sortByCost(loopCostList, loopIndex);

    if (!permuted) return;
    if (trace) {
      System.out.print("   permute " + loopDepth);
      System.out.print(":");
      for (int i = 0; i < loopIndex.length; i++) System.out.print(" " + loopIndex[i]);
      System.out.println("");
    }

    int[][] ddVec = getDDVec(arrayRefs, loopDepth);
    if (trace) printDDInfo(ddVec, loopDepth);

    if (!isLegal(loopIndex, ddVec)) return;

    if (trace) System.out.println("   permute " + loopDepth);

    int[] rank = new int[loopDepth];

    // We will do sorting on the rank vector, which corresponds to the interchange we need.

    for (int i = 0; i < loopDepth; i++) {
      int loopNum = loopIndex[i];
      rank[loopNum] = i;
    }

    if (trace) printOrder(rank);

    boolean changed = true;

    while (changed) {
      changed = false;

      for (int i = 0; i < loopDepth - 1; i++) {
        int j = i + 1;
        int outerRank = rank[i];
        int innerRank = rank[j];

        if (innerRank >= outerRank) continue;

        LoopHeaderChord innerLoop = loopNest.elementAt(j);
        LoopHeaderChord outerLoop = loopNest.elementAt(i);

        if (!outerLoop.isDDComplete() || outerLoop.inhibitLoopPermute()) continue;

        if (!innerLoop.isDDComplete() || innerLoop.inhibitLoopPermute()) continue;

        changed = true;

        rank[i] = innerRank;
        rank[j] = outerRank;

        loopNest.setElementAt(innerLoop, i);
        loopNest.setElementAt(outerLoop, j);

        performLoopInterchange(innerLoop, outerLoop);
      }
    }
  }
Пример #4
0
  /**
   * Return true if this is a legal loop. A legal loop contains no function calls and has no scalar
   * variable cycles. A cycle exists when the variable is referenced before it is defed. We go to
   * some trouble to allow loops containing cycles such as
   *
   * <pre>
   *   s = s + a(i,j)
   * </pre>
   *
   * to be permuted.
   */
  private boolean legalLoop(LoopHeaderChord loop) {
    Stack<Chord> wl = WorkArea.<Chord>getStack("legalLoop");
    References refs = scribble.getRefs();

    Chord.nextVisit();
    wl.push(loop);
    loop.setVisited();

    int n = loop.numLoopExits();
    for (int i = 0; i < n; i++) loop.getLoopExit(i).setVisited();

    boolean legal = true;

    outer:
    while (!wl.empty()) {
      Chord c = wl.pop();

      if (c.getCall(true) != null) {
        legal = false;
        break;
      }

      if ((c instanceof DecisionChord) && (c != loop.getLoopTest())) {
        legal = false;
        break;
      }

      if (c.isAssignChord() && !c.isPhiExpr()) {
        ExprChord ec = (ExprChord) c;
        Expr lhs = ec.getLValue();
        Expr rhs = ec.getRValue();

        // The variable is both defed and used in the loop and
        // we don't know how it is used.  For example, it could be
        //   s = s + 1
        //   c(i,j) = s
        // or
        //   c(i,j) = s
        //   s = s + 1
        // We want to allow
        //   s = s + c(i,j)
        // because we know that s is not used to specify the
        // value of an array element.  We want to allow
        //   s = ...
        //     = s
        // since there is no cycle.

        if (lhs instanceof LoadDeclAddressExpr) {
          LoadDeclAddressExpr ldae = (LoadDeclAddressExpr) lhs;
          VariableDecl vd = ldae.getDecl().returnVariableDecl();
          if ((vd != null) && !loop.isLoopIndex(vd)) {
            boolean cycle = false;
            Iterator<Chord> it1 = refs.getUseChords(vd);
            while (it1.hasNext()) {
              Chord s = it1.next();
              cycle |= (s == c);
            }

            Iterator<Chord> it2 = refs.getUseChords(vd);
            while (it2.hasNext()) {
              Chord s = it2.next();
              if (c == s) continue;

              if (s.getLoopHeader() != loop) continue;

              if (cycle) {
                // There was a cycle and another use.
                //   s = s + 1
                //     = s
                legal = false;
                break outer;
              }

              // Check for a use before the def.
              while (true) {
                s = s.getNextChord();
                if (s == null) break;
                if (s == c) {
                  legal = false;
                  break outer;
                }
              }
            }
          }
        }
      }

      c.pushOutCfgEdges(wl);
    }

    WorkArea.<Chord>returnStack(wl);

    return legal;
  }