/* * Solves for times by processing samples in the active list in parallel. */ private void solveParallel( final ActiveList al, final float[][][] t, final int m, final float[][][] times, final int[][][] marks) { int mbmin = 64; // target minimum number of samples per block int nbmax = 256; // maximum number of blocks final float[][] dtask = new float[nbmax][]; final ActiveList[] bltask = new ActiveList[nbmax]; while (!al.isEmpty()) { final int n = al.size(); // number of samples in active (A) list final int mbmax = max(mbmin, 1 + (n - 1) / nbmax); // max samples per block final int nb = 1 + (n - 1) / mbmax; // number of blocks <= nbmax final int mb = 1 + (n - 1) / nb; // evenly distribute samples per block Parallel.loop( nb, new Parallel.LoopInt() { // for all blocks, ... public void compute(int ib) { if (bltask[ib] == null) { // if necessary for this block, make ... dtask[ib] = new float[6]; // work array for tensor coefficients bltask[ib] = new ActiveList(); // and an empty active list } int i = ib * mb; // beginning of block int j = min(i + mb, n); // beginning of next block (or end) for (int k = i; k < j; ++k) { // for each sample in block, ... Sample s = al.get(k); // get k'th sample from A list solveOne(t, m, times, marks, s, bltask[ib], dtask[ib]); // do sample } bltask[ib].setAllAbsent(); // needed when merging B lists below } }); // Merge samples from all B lists to a new A list. All samples // in B lists are currently marked as absent in the A list. As // samples in B lists are appended to the A list, their absent // flags are set to false, so that no sample is appended more // than once to the new A list. al.clear(); for (int ib = 0; ib < nb; ++ib) { if (bltask[ib] != null) { al.appendIfAbsent(bltask[ib]); bltask[ib].clear(); } } } }
/* * Solves for times by sequentially processing each sample in active list. */ private void solveSerial( ActiveList al, float[][][] t, int m, float[][][] times, int[][][] marks) { float[] d = new float[6]; ActiveList bl = new ActiveList(); int ntotal = 0; while (!al.isEmpty()) { // al.shuffle(); // demonstrate that solution depends on order int n = al.size(); ntotal += n; for (int i = 0; i < n; ++i) { Sample s = al.get(i); solveOne(t, m, times, marks, s, bl, d); } bl.setAllAbsent(); al.clear(); al.appendIfAbsent(bl); bl.clear(); } trace("solveSerial: ntotal=" + ntotal); trace(" nratio=" + (float) ntotal / (float) (_n1 * _n2 * _n3)); }
/* * Solves for times by processing samples in the active list in parallel. */ private void solveParallelX( final ActiveList al, final float[][][] t, final int m, final float[][][] times, final int[][][] marks) { int nthread = Runtime.getRuntime().availableProcessors(); ExecutorService es = Executors.newFixedThreadPool(nthread); CompletionService<Void> cs = new ExecutorCompletionService<Void>(es); ActiveList[] bl = new ActiveList[nthread]; float[][] d = new float[nthread][]; for (int ithread = 0; ithread < nthread; ++ithread) { bl[ithread] = new ActiveList(); d[ithread] = new float[6]; } final AtomicInteger ai = new AtomicInteger(); int ntotal = 0; // int niter = 0; while (!al.isEmpty()) { ai.set(0); // initialize the shared block index to zero final int n = al.size(); // number of samples in active (A) list ntotal += n; final int mb = 32; // size of blocks of samples final int nb = 1 + (n - 1) / mb; // number of blocks of samples int ntask = min(nb, nthread); // number of tasks (threads to be used) for (int itask = 0; itask < ntask; ++itask) { // for each task, ... final ActiveList bltask = bl[itask]; // task-specific B list final float[] dtask = d[itask]; // task-specific work array cs.submit( new Callable<Void>() { // submit new task public Void call() { for (int ib = ai.getAndIncrement(); ib < nb; ib = ai.getAndIncrement()) { int i = ib * mb; // beginning of block int j = min(i + mb, n); // beginning of next block (or end) for (int k = i; k < j; ++k) { // for each sample in block, ... Sample s = al.get(k); // get k'th sample from A list solveOne(t, m, times, marks, s, bltask, dtask); // process sample } } bltask.setAllAbsent(); // needed when merging B lists below return null; } }); } try { for (int itask = 0; itask < ntask; ++itask) cs.take(); } catch (InterruptedException e) { throw new RuntimeException(e); } // Merge samples from all B lists to a new A list. As samples // are appended, their absent flags are set to false, so that // each sample is appended no more than once to the new A list. al.clear(); for (int itask = 0; itask < ntask; ++itask) { al.appendIfAbsent(bl[itask]); bl[itask].clear(); } // ++niter; } es.shutdown(); // trace("solveParallel: ntotal="+ntotal); // trace(" nratio="+(float)ntotal/(float)(_n1*_n2*_n3)); }