public final void sumOverThreadsAndMPI() { for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { TotalNumberofPoints += NumberofPoints[ThreadNo]; } // Note - parallel for try { forallChunked( 0, SALSAUtility.ThreadCount - 1, (threadIndex) -> { int beginindex = ParallelArrayRanges[threadIndex].getStartIndex(); int indexlength = ParallelArrayRanges[threadIndex].getLength(); for (int ArrayLoop = beginindex; ArrayLoop < beginindex + indexlength; ArrayLoop++) { TotalVectorSum[ArrayLoop] = 0.0; for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { TotalVectorSum[ArrayLoop] += VectorSum[ThreadNo][ArrayLoop]; } } }); } catch (SuspendableException e) { SALSAUtility.printAndThrowRuntimeException(e.getMessage()); } if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - double - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); // Note - MPI Call - Allreduce - double[] - sum SALSAUtility.mpiOps.allReduce(TotalVectorSum, MPI.SUM); SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } }
public final void sumOverThreadsAndMPI() { SALSAUtility.StartSubTimer(SALSAUtility.ThreadTiming); // Note - parallel for try { forallChunked( 0, SALSAUtility.ThreadCount - 1, (threadIndex) -> { int beginindex = ParallelArrayRanges[threadIndex].getStartIndex(); int indexlength = ParallelArrayRanges[threadIndex].getLength(); for (int ArrayLoop = beginindex; ArrayLoop < beginindex + indexlength; ArrayLoop++) { double tmp = 0.0; for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { tmp += VectorSum[ThreadNo][ArrayLoop]; } TotalVectorSum[ArrayLoop] = tmp; } }); } catch (SuspendableException e) { SALSAUtility.printAndThrowRuntimeException(e.getMessage()); } SALSAUtility.StopSubTimer(SALSAUtility.ThreadTiming); if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - double - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); int bigsize = TotalVectorSum.length; if (bigsize <= 4096) { // Note - MPI Call - Allreduce - double[] - sum SALSAUtility.mpiOps.allReduce(TotalVectorSum, MPI.SUM); } else { double[] buffer = new double[4096]; int start = 0; while (start < bigsize) { int whatsLeft = Math.min(bigsize - start, 4096); System.arraycopy(TotalVectorSum, start, buffer, 0, whatsLeft); // Note - MPI Call - Allreduce - double[] - sum SALSAUtility.mpiOps.allReduce(buffer, MPI.SUM); System.arraycopy(buffer, 0, TotalVectorSum, start, whatsLeft); start += whatsLeft; } } SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } }