private boolean sortByCost(Cost[] loopCostList, int[] loopIndex) { int maxlooptemp = loopCostList.length - 1; boolean exchanged = true; boolean permuted = false; while (exchanged) { // Bubble sort the LoopCosts. exchanged = false; for (int i = 0; i < maxlooptemp; i++) { int j = i + 1; Cost lci = loopCostList[i]; Cost lci1 = loopCostList[j]; if (lci1 == null) continue; if (lci.lessThan(lci1)) { int temp = loopIndex[i]; loopIndex[i] = loopIndex[j]; loopIndex[j] = temp; loopCostList[i] = lci1; loopCostList[j] = lci; exchanged = true; } } permuted |= exchanged; maxlooptemp--; } return permuted; }
public Cost computeRefCost(LoopHeaderChord loop, SubscriptExpr se) { Cost tripL = loop.getTripCount(); InductionVar ivar = loop.getPrimaryInductionVar(); if (ivar == null) return tripL; // Compute // tripL = (ubL - lbL + stepL) / stepL int nsubs = se.numSubscripts(); // number of subscripts in the subscript LoopHeaderChord tloop = loop.getTopLoop(); VariableDecl iv = ivar.getVar(); for (int i = 0; i < nsubs - 1; i++) { // for c code // If the loop index variable of this loop appears in any of the // subscripts other then the last, then return tripL. Expr sI = se.getSubscript(i); AffineExpr ae = tloop.isAffine(sI); if (ae == null) return null; if (ae.hasTerm(iv)) return tripL; } int fs = nsubs - 1; Expr s0 = se.getSubscript(fs); AffineExpr ae = tloop.isAffine(s0); if (ae == null) return null; int at = ae.getTermIndexOrig(iv); long coeff = 0; if (at >= 0) coeff = ae.getCoefficient(at); if (coeff == 0) // Invariant Reuse. return new Cost(1.0, 0); long stepL = loop.getStepValue(); long stride = stepL * coeff; if (stride < 0) stride = -stride; // Unit Reuse. Type et = se.getCoreType().getPointedTo(); int bs = et.memorySizeAsInt(Machine.currentMachine); int cs = Machine.currentMachine.getCacheSize(bs); if (stride <= cs) { // cache line or block size tripL.multiply(stride); tripL.divide(cs); return tripL; } // No Reuse. return tripL; }
private Cost computeRefCostSum(LoopHeaderChord loop, Vector<RefGroup> refGroups) { Cost lc = new Cost(); int l = refGroups.size(); for (int i = 0; i < l; i++) { RefGroup rg = refGroups.elementAt(i); SubscriptExpr se = rg.getRepresentative(); Cost cost = computeRefCost(loop, se); if (cost == null) continue; lc.add(cost); } return lc; }
private void tryPermute(LoopHeaderChord topLoop) { Vector<LoopHeaderChord> loopNest = topLoop.getTightlyNestedLoops(); if (loopNest == null) return; int loopDepth = loopNest.size(); LoopHeaderChord bottom = loopNest.get(loopDepth - 1); if (!unsafe && !legalLoop(bottom)) return; // Set the loop costs for the loops in a loop nest. The cost for a loop is // the cost of executing the nest with that loop in the innermost nesting. Table<Declaration, SubscriptExpr> arrayRefs = new Table<Declaration, SubscriptExpr>(); if (!topLoop.getSubscriptsRecursive(arrayRefs)) return; graph = topLoop.getDDGraph(false); if (graph == null) return; if (trace) System.out.println(" " + graph); int[] loopIndex = new int[loopDepth]; Cost[] loopCostList = new Cost[loopDepth]; Vector<RefGroup> refGroups = new Vector<RefGroup>(20); for (int i = 0; i < loopDepth; i++) { LoopHeaderChord loop = loopNest.elementAt(i); if (trace) System.out.println(" " + i + " " + loop); computeRefGroups(loop.getNestedLevel(), 2, 2, arrayRefs, refGroups); Cost lc = computeRefCostSum(loop, refGroups); if (trace) System.out.println(" " + i + " " + lc); loopCostList[i] = lc; loopIndex[i] = i; // the outtermost loop is at position 0 Cost tp = tripProduct(loopNest, loop); if (trace) System.out.println(" " + i + " " + tp); lc.multiply(tp); if (trace) System.out.println(" " + i + " " + lc); } boolean permuted = sortByCost(loopCostList, loopIndex); if (!permuted) return; if (trace) { System.out.print(" permute " + loopDepth); System.out.print(":"); for (int i = 0; i < loopIndex.length; i++) System.out.print(" " + loopIndex[i]); System.out.println(""); } int[][] ddVec = getDDVec(arrayRefs, loopDepth); if (trace) printDDInfo(ddVec, loopDepth); if (!isLegal(loopIndex, ddVec)) return; if (trace) System.out.println(" permute " + loopDepth); int[] rank = new int[loopDepth]; // We will do sorting on the rank vector, which corresponds to the interchange we need. for (int i = 0; i < loopDepth; i++) { int loopNum = loopIndex[i]; rank[loopNum] = i; } if (trace) printOrder(rank); boolean changed = true; while (changed) { changed = false; for (int i = 0; i < loopDepth - 1; i++) { int j = i + 1; int outerRank = rank[i]; int innerRank = rank[j]; if (innerRank >= outerRank) continue; LoopHeaderChord innerLoop = loopNest.elementAt(j); LoopHeaderChord outerLoop = loopNest.elementAt(i); if (!outerLoop.isDDComplete() || outerLoop.inhibitLoopPermute()) continue; if (!innerLoop.isDDComplete() || innerLoop.inhibitLoopPermute()) continue; changed = true; rank[i] = innerRank; rank[j] = outerRank; loopNest.setElementAt(innerLoop, i); loopNest.setElementAt(outerLoop, j); performLoopInterchange(innerLoop, outerLoop); } } }