public final void sumOverThreadsAndMPI() { for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { TotalNumberofPoints += NumberofPoints[ThreadNo]; } // Note - parallel for try { forallChunked( 0, SALSAUtility.ThreadCount - 1, (threadIndex) -> { int beginindex = ParallelArrayRanges[threadIndex].getStartIndex(); int indexlength = ParallelArrayRanges[threadIndex].getLength(); for (int ArrayLoop = beginindex; ArrayLoop < beginindex + indexlength; ArrayLoop++) { TotalVectorSum[ArrayLoop] = 0.0; for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { TotalVectorSum[ArrayLoop] += VectorSum[ThreadNo][ArrayLoop]; } } }); } catch (SuspendableException e) { SALSAUtility.printAndThrowRuntimeException(e.getMessage()); } if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - double - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); // Note - MPI Call - Allreduce - double[] - sum SALSAUtility.mpiOps.allReduce(TotalVectorSum, MPI.SUM); SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } }
public final void sumOverThreadsAndMPI() { for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { TotalNumberofPoints += NumberofPoints[ThreadNo]; Totalmean += mean[ThreadNo]; Totalsquare += square[ThreadNo]; } if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - double - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); // Note - MPI Call - Allreduce - double - sum Totalmean = SALSAUtility.mpiOps.allReduce(Totalmean, MPI.SUM); // Note - MPI Call - Allreduce - double - sum Totalsquare = SALSAUtility.mpiOps.allReduce(Totalsquare, MPI.SUM); SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } if (TotalNumberofPoints < 0.5) { return; } Totalmean = Totalmean / TotalNumberofPoints; Totalsquare = (Totalsquare / TotalNumberofPoints) - Totalmean * Totalmean; Totalsigma = Math.sqrt(Math.max(0.0, Totalsquare)); }
public final void sumOverThreadsAndMPI() { for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { TotalNumberofPoints += NumberofPoints[ThreadNo]; TotalInt += Intvalue[ThreadNo]; } if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - int - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); // Note - MPI Call - Allreduce - int - sum TotalInt = SALSAUtility.mpiOps.allReduce(TotalInt, MPI.SUM); SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } }
public final void sumOverThreadsAndMPI() { for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { TotalNumberofPoints += NumberofPoints[ThreadNo]; TotalOr = Orvalue[ThreadNo] || TotalOr; } if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - double - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); // Note - MPI Call - Allreduce - boolean - or TotalOr = SALSAUtility.mpiOps.allReduce(TotalOr, MPI.LOR); SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } }
public final void sumOverThreadsAndMPI() { for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { TotalNumberofPoints += NumberofPoints[ThreadNo]; TotalMax = Math.max(TotalMax, Maxvalue[ThreadNo]); } if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - double - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); // Note - MPI Call - Allreduce - double - max TotalMax = SALSAUtility.mpiOps.allReduce(TotalMax, MPI.MAX); SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } }
public final void sumOverThreadsAndMPI() { for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { TotalNumberofPoints += NumberofPoints[ThreadNo]; for (int ArrayLoop = 0; ArrayLoop < ArraySize; ArrayLoop++) { TotalVectorSum[ArrayLoop] += VectorSum[ThreadNo][ArrayLoop]; } } if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - int - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); // Note - MPI Call - Allreduce - int[] - sum SALSAUtility.mpiOps.allReduce(TotalVectorSum, MPI.SUM); SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } }
public final void sumOverThreadsAndMPI() { SALSAUtility.StartSubTimer(SALSAUtility.ThreadTiming); // Note - parallel for try { forallChunked( 0, SALSAUtility.ThreadCount - 1, (threadIndex) -> { int beginindex = ParallelArrayRanges[threadIndex].getStartIndex(); int indexlength = ParallelArrayRanges[threadIndex].getLength(); for (int ArrayLoop = beginindex; ArrayLoop < beginindex + indexlength; ArrayLoop++) { double tmp = 0.0; for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { tmp += VectorSum[ThreadNo][ArrayLoop]; } TotalVectorSum[ArrayLoop] = tmp; } }); } catch (SuspendableException e) { SALSAUtility.printAndThrowRuntimeException(e.getMessage()); } SALSAUtility.StopSubTimer(SALSAUtility.ThreadTiming); if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - double - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); int bigsize = TotalVectorSum.length; if (bigsize <= 4096) { // Note - MPI Call - Allreduce - double[] - sum SALSAUtility.mpiOps.allReduce(TotalVectorSum, MPI.SUM); } else { double[] buffer = new double[4096]; int start = 0; while (start < bigsize) { int whatsLeft = Math.min(bigsize - start, 4096); System.arraycopy(TotalVectorSum, start, buffer, 0, whatsLeft); // Note - MPI Call - Allreduce - double[] - sum SALSAUtility.mpiOps.allReduce(buffer, MPI.SUM); System.arraycopy(buffer, 0, TotalVectorSum, start, whatsLeft); start += whatsLeft; } } SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } }
public final void sumOverThreadsAndMPI() { for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { TotalNumberofPoints += NumberofPoints[ThreadNo]; for (int ArrayLoop = 0; ArrayLoop < ArraySize; ArrayLoop++) { TotalVectorMax[ArrayLoop] = Math.max(TotalVectorMax[ArrayLoop], VectorMax[ThreadNo][ArrayLoop]); } } if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - double - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); // Note - MPI Call - Allreduce - double[] - max SALSAUtility.mpiOps.allReduce(TotalVectorMax, MPI.MAX); SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } }
public final void sumOverThreadsAndMPI() { for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { if (IndexValue[ThreadNo] < 0) { continue; } TotalNumberofPoints += NumberofPoints[ThreadNo]; if (MinMaxPointer != 0) { if ((TotalIndexValue >= 0) && (TotalMaxOrMin > MaxOrMinvalue[ThreadNo])) { continue; } } else { if ((TotalIndexValue >= 0) && (TotalMaxOrMin <= MaxOrMinvalue[ThreadNo])) { continue; } } TotalMaxOrMin = MaxOrMinvalue[ThreadNo]; TotalIndexValue = IndexValue[ThreadNo]; } if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); if (MinMaxPointer != 0) { // Note - MPI Call - Allreduce - MPIReducePlusIndex - max with index salsa.mpi.MPIReducePlusIndex result = SALSAUtility.mpiOps.allReduce( new salsa.mpi.MPIReducePlusIndex(TotalIndexValue, TotalMaxOrMin), salsa.mpi.MPIReducePlusIndex.Op.MAX_WITH_INDEX); TotalMaxOrMin = result.getValue(); TotalIndexValue = result.getIndex(); } else { // Note - MPI Call - Allreduce - MPIReducePlusIndex - min with index salsa.mpi.MPIReducePlusIndex result = SALSAUtility.mpiOps.allReduce( new salsa.mpi.MPIReducePlusIndex(TotalIndexValue, TotalMaxOrMin), salsa.mpi.MPIReducePlusIndex.Op.MIN_WITH_INDEX); TotalMaxOrMin = result.getValue(); TotalIndexValue = result.getIndex(); } // Note - MPI Call - Allreduce - double - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } }
public final void sumOverThreadsAndMPI() { for (int threadNo = 0; threadNo < NumberOfThreads; threadNo++) { TotalNumberofPoints += NumberOfPoints[threadNo]; for (int i = 0; i < OuterDimension; ++i) { for (int j = 0; j < InnerDimension; ++j) { TotalSum[i][j] += Sum[threadNo][i][j]; } } } if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - double - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); for (int i = 0; i < OuterDimension; ++i) { // Note - MPI Call - Allreduce - double[] - sum SALSAUtility.mpiOps.allReduce(TotalSum[i], MPI.SUM); } SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } }
public final void sumOverThreadsAndMPI() { for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { TotalNumberofPoints += NumberofPoints[ThreadNo]; Totalmean1 += mean1[ThreadNo]; Totalmean2 += mean2[ThreadNo]; Totalsquare1 += square1[ThreadNo]; Totalsquare2 += square2[ThreadNo]; Totalcross12 += cross12[ThreadNo]; } if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - double - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); // Note - MPI Call - Allreduce - double - sum Totalmean1 = SALSAUtility.mpiOps.allReduce(Totalmean1, MPI.SUM); // Note - MPI Call - Allreduce - double - sum Totalmean2 = SALSAUtility.mpiOps.allReduce(Totalmean2, MPI.SUM); // Note - MPI Call - Allreduce - double - sum Totalsquare1 = SALSAUtility.mpiOps.allReduce(Totalsquare1, MPI.SUM); // Note - MPI Call - Allreduce - double - sum Totalsquare2 = SALSAUtility.mpiOps.allReduce(Totalsquare2, MPI.SUM); // Note - MPI Call - Allreduce - double - sum Totalcross12 = SALSAUtility.mpiOps.allReduce(Totalcross12, MPI.SUM); SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } if (TotalNumberofPoints < 0.5) { return; } Totalmean1 = Totalmean1 / TotalNumberofPoints; Totalmean2 = Totalmean2 / TotalNumberofPoints; Totalsquare1 = (Totalsquare1 / TotalNumberofPoints) - Totalmean1 * Totalmean1; Totalsquare2 = (Totalsquare2 / TotalNumberofPoints) - Totalmean2 * Totalmean2; Totalcross12 = (Totalcross12 / TotalNumberofPoints) - Totalmean1 * Totalmean2; Totalsigma1 = Math.sqrt(Totalsquare1); Totalsigma2 = Math.sqrt(Totalsquare2); Totalcross12 = Totalcross12 / (Totalsigma1 * Totalsigma2); }
public final void sumOverThreadsAndMPI() { for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { TotalNumberofPoints += NumberofPoints[ThreadNo]; for (int ArrayLoop = 0; ArrayLoop < ArraySize; ArrayLoop++) { Totalmean[ArrayLoop] += mean[ThreadNo][ArrayLoop]; } } if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - double - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); // Note - MPI Call - Allreduce - double[] - sum SALSAUtility.mpiOps.allReduce(Totalmean, MPI.SUM); SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } if (TotalNumberofPoints < 0.5) { return; } for (int ArrayLoop = 0; ArrayLoop < ArraySize; ArrayLoop++) { Totalmean[ArrayLoop] = Totalmean[ArrayLoop] / TotalNumberofPoints; } }
public final void print(String label, String FPformat) { if ((SALSAUtility.DebugPrintOption == 0) || (SALSAUtility.MPI_Rank != 0)) { return; } SALSAUtility.SALSAPrint( 1, label + " means " + String.format(FPformat, Totalmean1) + " " + String.format(FPformat, Totalmean2) + " sigmas " + String.format(FPformat, Totalsigma1) + " " + String.format(FPformat, Totalsigma2) + " correl " + String.format(FPformat, Totalcross12)); }
public final void sumOverThreadsAndMPI() { for (int storeloop = 0; storeloop < Numbertofind; storeloop++) { TotalMinValue[storeloop] = -1.0; TotalIndexValue[storeloop] = -1; } TotalWorst = -1; for (int ThreadNo = 0; ThreadNo < NumberofThreads; ThreadNo++) { TotalNumberofPoints += NumberofPoints[ThreadNo]; for (int storeloop = 0; storeloop < Numbertofind; storeloop++) { if (IndexValuebythread[ThreadNo][storeloop] < 0) { continue; // End this thread } tangible.RefObject<Integer> tempRef_TotalWorst = new tangible.RefObject<>(TotalWorst); FindMinimumSet( MinValuebythread[ThreadNo][storeloop], IndexValuebythread[ThreadNo][storeloop], tempRef_TotalWorst, TotalMinValue, TotalIndexValue, Numbertofind); TotalWorst = tempRef_TotalWorst.argValue; } } if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - double - sum TotalNumberofPoints = SALSAUtility.mpiOps.allReduce(TotalNumberofPoints, MPI.SUM); SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } // Sort in absolute order and accumulate over processes. This takes Numbertofindsteps for (int OrderLoop = 0; OrderLoop < Numbertofind; OrderLoop++) { int localindex = -1; // unset double localvalue = -1.0; int loopused = -1; for (int internalloop = 0; internalloop < Numbertofind; internalloop++) { // Find minimum if (TotalIndexValue[internalloop] < 0) { continue; } if ((localindex < 0) || (TotalMinValue[internalloop] < localvalue)) { localindex = TotalIndexValue[internalloop]; localvalue = TotalMinValue[internalloop]; loopused = internalloop; } } int oldlocalindex = localindex; if (SALSAUtility.MPI_Size > 1) { SALSAUtility.StartSubTimer(SALSAUtility.MPIREDUCETiming1); // Note - MPI Call - Allreduce - MPIReducePlusIndex - min with index salsa.mpi.MPIReducePlusIndex result = SALSAUtility.mpiOps.allReduce( new salsa.mpi.MPIReducePlusIndex(localindex, localvalue), salsa.mpi.MPIReducePlusIndex.Op.MIN_WITH_INDEX); localvalue = result.getValue(); localindex = result.getIndex(); SALSAUtility.StopSubTimer(SALSAUtility.MPIREDUCETiming1); } OrderedMinValue[OrderLoop] = localvalue; OrderedIndexValue[OrderLoop] = localindex; if ((oldlocalindex >= 0) && (OrderedIndexValue[OrderLoop] == oldlocalindex)) { TotalIndexValue[loopused] = -1; TotalMinValue[loopused] = -1.0; } } // Loop over Order Loop }