Example #1
0
        @Override
        public HPCMemoryConfig apply(String s) {
          HPCMemoryConfig.Source source = HPCMemoryConfig.Source.EXPLICIT_MEMORY_RATIO;
          //noinspection StringEquality
          if (s == DEFAULT) // Note instance equality check is on purpose
          {
            s = "50.0";
            source = HPCMemoryConfig.Source.DEFAULT_MEMORY_RATIO;
          }

          long oldGenSize =
              memoryPoolMax("java.lang:type=MemoryPool,name=G1 Old Gen")
                  .or(memoryPoolMax("java.lang:type=MemoryPool,name=PS Old Gen"))
                  .or(heap() / 2) // Conservative, to be on the safe side
                  .get();

          long allocated = (long) ((FLOAT.apply(s) / 100.0) * oldGenSize);

          return new HPCMemoryConfig(
              /* Node cache size */ (long) (allocated * 0.3),
              /* Rel cache size  */ (long) (allocated * 0.4),
              /* Node lookup table */ asPercentageOfHeap(allocated * 0.1),
              /* Rel lookup table  */ asPercentageOfHeap(allocated * 0.1),
              source);
        }
Example #2
0
            @Override
            public HPCMemoryConfig apply(
                HPCMemoryConfig basedOnRatio, Function<String, String> settings) {
              String explicitNodeCacheSize =
                  settings.apply(HighPerformanceCacheSettings.node_cache_size.name());
              String explicitRelCacheSize =
                  settings.apply(HighPerformanceCacheSettings.relationship_cache_size.name());
              String explicitNodeArrayFraction =
                  settings.apply(HighPerformanceCacheSettings.node_cache_array_fraction.name());
              String explicitRelArrayFraction =
                  settings.apply(
                      HighPerformanceCacheSettings.relationship_cache_array_fraction.name());

              if (explicitNodeCacheSize != null
                  || explicitRelCacheSize != null
                  || explicitNodeArrayFraction != null
                  || explicitRelArrayFraction != null) {
                // At least one explicit config set, swap to explicit mode
                long nodeCacheBytes =
                    explicitNodeCacheSize != null ? BYTES.apply(explicitNodeCacheSize) : heap() / 8;
                long relCacheBytes =
                    explicitRelCacheSize != null ? BYTES.apply(explicitRelCacheSize) : heap() / 8;
                float nodeArrRatio =
                    explicitNodeArrayFraction != null
                        ? FLOAT.apply(explicitNodeArrayFraction)
                        : 1.0f;
                float relArrRatio =
                    explicitRelArrayFraction != null ? FLOAT.apply(explicitRelArrayFraction) : 1.0f;

                // Figure out if user is inadvertently overwriting her own configuration
                HPCMemoryConfig.Source source =
                    basedOnRatio.source() == HPCMemoryConfig.Source.DEFAULT_MEMORY_RATIO
                        ? HPCMemoryConfig.Source.SPECIFIC
                        : HPCMemoryConfig.Source.SPECIFIC_OVERRIDING_RATIO;
                return new HPCMemoryConfig(
                    nodeCacheBytes, relCacheBytes, nodeArrRatio, relArrRatio, source);
              }

              return basedOnRatio;
            }
 @Override
 public Object convert(Object value) {
   if (!(value instanceof String)) {
     return value;
   }
   try {
     return BOOLEAN.convert(value);
   } catch (IllegalArgumentException e) {
   }
   try {
     return INTEGER.convert(value);
   } catch (IllegalArgumentException e) {
   }
   try {
     return FLOAT.convert(value);
   } catch (IllegalArgumentException e) {
   }
   return value;
 }
Example #4
0
  static void pdgstrf_bmod1D_mv2(
      final int pnum, /* process number */
      final int n, /* number of rows in the matrix */
      final int w, /* current panel width */
      final int jcol, /* leading column of the current panel */
      final int fsupc, /* leading column of the updating s-node */
      final int krep, /* last column of the updating s-node */
      final int nsupc, /* number of columns in the updating s-node */
      int nsupr, /* number of rows in the updating supernode */
      int nrow, /* number of rows below the diagonal block of
					 the updating supernode */
      int repfnz[], /* in */
      int panel_lsub[], /* modified */
      int w_lsub_end[], /* modified */
      int spa_marker[], /* modified; size n-by-w */
      double dense[], /* modified */
      double tempv[], /* working array - zeros on entry/exit */
      GlobalLU_t Glu, /* modified */
      Gstat_t Gstat /* modified */) {
    /*
     * -- SuperLU MT routine (version 2.0) --
     * Lawrence Berkeley National Lab, Univ. of California Berkeley,
     * and Xerox Palo Alto Research Center.
     * September 10, 2007
     *
     * Purpose
     * =======
     *
     *    Performs numeric block updates (sup-panel) in topological order.
     *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
     *    Results are returned in SPA dense[*,w].
     *
     */

    double zero = 0.0;
    double one = 1.0;

    int incx = 1, incy = 1;
    double alpha = one, beta = zero;

    double ukj, ukj1, ukj2;
    int luptr, luptr1, luptr2;
    int segsze;
    int lptr; /* start of row subscripts of the updating supernode */
    int i, j, kfnz, krep_ind, isub, irow, no_zeros, twocols;
    int jj; /* index through each column in the panel */
    int kfnz2[], jj2[]; /* detect two identical columns */
    kfnz2 = new int[2];
    jj2 = new int[2];
    int repfnz_col[], repfnz_col1[]; /* repfnz[] for a column in the panel */
    double dense_col[], dense_col1[]; /* dense[] for a column in the panel */
    double[] tri[], matvec[];
    tri = new double[2][];
    matvec = new double[2][];
    int[] matvec_offset = {0, 0};
    int col_marker[], col_marker1[]; /* each column of the spa_marker[*,w] */
    int col_lsub[], col_lsub1[]; /* each column of the panel_lsub[*,w] */
    int lsub[], xlsub_end[];
    double lusup[];
    int xlusup[];
    float flopcnt;

    double utime[] = Gstat.utime;
    double f_time = 0;

    lsub = Glu.lsub;
    xlsub_end = Glu.xlsub_end;
    lusup = Glu.lusup;
    xlusup = Glu.xlusup;
    lptr = Glu.xlsub[fsupc];
    krep_ind = lptr + nsupc - 1;
    twocols = 0;
    tri[0] = tempv;
    tri[1] = tempv;
    int[] tri_offset = {0, n};

    if (DEBUG) {
      if (jcol == BADPAN && krep == BADREP) {
        printf(
            "(%d) dbmod1D[1] jcol %d, fsupc %d, krep %d, nsupc %d, nsupr %d, nrow %d\n",
            pnum, jcol, fsupc, krep, nsupc, nsupr, nrow);
        PrintInt10("lsub[xlsub[2774]", nsupr, lsub, lptr);
      }
    }

    /* -----------------------------------------------
     * Sequence through each column in the panel ...
     * ----------------------------------------------- */
    repfnz_col = repfnz;
    dense_col = dense;
    col_marker = spa_marker;
    col_lsub = panel_lsub;
    int repfnz_col_offset = 0, dense_col_offset = 0;
    int col_marker_offset = 0, col_lsub_offset = 0;

    for (jj = jcol;
        jj < jcol + w;
        ++jj, col_marker_offset += n, col_lsub_offset += n, repfnz_col_offset += n,
            dense_col_offset += n) {

      kfnz = repfnz_col[repfnz_col_offset + krep];
      if (kfnz == EMPTY) continue; /* skip any zero segment */

      segsze = krep - kfnz + 1;
      luptr = xlusup[fsupc];

      flopcnt = segsze * (segsze - 1) + 2 * nrow * segsze;
      Gstat.procstat[pnum].fcops += flopcnt;

      /* Case 1: Update U-segment of size 1 -- col-col update */
      if (segsze == 1) {
        if (TIMING) {
          f_time = SuperLU_timer_();
        }
        ukj = dense_col[dense_col_offset + lsub[krep_ind]];
        luptr += nsupr * (nsupc - 1) + nsupc;
        if (DEBUG) {
          if (krep == BADCOL && jj == -1) {
            printf(
                "(%d) dbmod1D[segsze=1]: k %d, j %d, ukj %.10e\n", pnum, lsub[krep_ind], jj, ukj);
            PrintInt10("segsze=1", nsupr, lsub, lptr);
          }
        }
        for (i = lptr + nsupc; i < xlsub_end[fsupc]; i++) {
          irow = lsub[i];
          dense_col[dense_col_offset + irow] -= ukj * lusup[luptr];
          ++luptr;
          if (SCATTER_FOUND) {
            if (col_marker[col_marker_offset + irow] != jj) {
              col_marker[col_marker_offset + irow] = jj;
              col_lsub[col_lsub_offset + w_lsub_end[jj - jcol]++] = irow;
            }
          }
        }
        if (TIMING) {
          utime[FLOAT.ordinal()] += SuperLU_timer_() - f_time;
        }

      } else if (segsze <= 3) {

        if (TIMING) {
          f_time = SuperLU_timer_();
        }
        ukj = dense_col[dense_col_offset + lsub[krep_ind]];
        luptr += nsupr * (nsupc - 1) + nsupc - 1;
        ukj1 = dense_col[dense_col_offset + lsub[krep_ind - 1]];
        luptr1 = luptr - nsupr;
        if (segsze == 2) {
          ukj -= ukj1 * lusup[luptr1];
          dense_col[dense_col_offset + lsub[krep_ind]] = ukj;
          /*#pragma ivdep*/
          for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
            irow = lsub[i];
            ++luptr;
            ++luptr1;
            dense_col[dense_col_offset + irow] -= (ukj * lusup[luptr] + ukj1 * lusup[luptr1]);
            if (SCATTER_FOUND) {
              if (col_marker[col_marker_offset + irow] != jj) {
                col_marker[col_marker_offset + irow] = jj;
                col_lsub[col_lsub_offset + w_lsub_end[jj - jcol]++] = irow;
              }
            }
          }
        } else {
          ukj2 = dense_col[dense_col_offset + lsub[krep_ind - 2]];
          luptr2 = luptr1 - nsupr;
          ukj1 -= ukj2 * lusup[luptr2 - 1];
          ukj = ukj - ukj1 * lusup[luptr1] - ukj2 * lusup[luptr2];
          dense_col[dense_col_offset + lsub[krep_ind]] = ukj;
          dense_col[dense_col_offset + lsub[krep_ind - 1]] = ukj1;
          for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
            irow = lsub[i];
            ++luptr;
            ++luptr1;
            ++luptr2;
            dense_col[dense_col_offset + irow] -=
                (ukj * lusup[luptr] + ukj1 * lusup[luptr1] + ukj2 * lusup[luptr2]);
            if (SCATTER_FOUND) {
              if (col_marker[col_marker_offset + irow] != jj) {
                col_marker[col_marker_offset + irow] = jj;
                col_lsub[col_lsub_offset + w_lsub_end[jj - jcol]++] = irow;
              }
            }
          }
        }
        if (TIMING) {
          utime[FLOAT.ordinal()] += SuperLU_timer_() - f_time;
        }
      } else {
          /* segsze >= 4 */
        if (twocols == 1) {
          jj2[1] = jj; /* got two columns */
          twocols = 0;

          for (j = 0; j < 2; ++j) {
              /* Do two tri-solves */
            i = n * (jj2[j] - jcol);
            repfnz_col1 = repfnz;
            int repfnz_col1_offset = i;
            dense_col1 = dense;
            int dense_col1_offset = i;
            kfnz2[j] = repfnz_col1[repfnz_col1_offset + krep];
            no_zeros = kfnz2[j] - fsupc;
            segsze = krep - kfnz2[j] + 1;
            matvec[j] = tri[j];
            matvec_offset[j] = tri_offset[j] + segsze;

            /* Gather U[*,j] segment from dense[*] to tri[*]. */
            isub = lptr + no_zeros;
            for (i = 0; i < segsze; ++i) {
              irow = lsub[isub];
              tri[j][tri_offset[j] + i] = dense_col1[dense_col1_offset + irow]; /* Gather */
              ++isub;
            }

            if (TIMING) {
              f_time = SuperLU_timer_();
            }
            /* start effective triangle */
            luptr = xlusup[fsupc] + nsupr * no_zeros + no_zeros;

            if (USE_VENDOR_BLAS) {
              dtrsv("L", "N", "U", segsze, lusup, luptr, nsupr, tri[j], tri_offset[j], incx);
            } else {
              dlsolve(nsupr, segsze, lusup, luptr, tri[j], tri_offset[j]);
            }

            if (TIMING) {
              utime[FLOAT.ordinal()] += SuperLU_timer_() - f_time;
            }
          } /* end for j ... two tri-solves */

          if (TIMING) {
            f_time = SuperLU_timer_();
          }

          if (kfnz2[0] < kfnz2[1]) {
              /* First column is bigger */
            no_zeros = kfnz2[0] - fsupc;
            segsze = kfnz2[1] - kfnz2[0];
            luptr = xlusup[fsupc] + nsupr * no_zeros + nsupc;
            if (USE_VENDOR_BLAS) {
              dgemv(
                  "N",
                  nrow,
                  segsze,
                  alpha,
                  lusup,
                  luptr,
                  nsupr,
                  tri[0],
                  tri_offset[0],
                  incx,
                  beta,
                  matvec[0],
                  matvec_offset[0],
                  incy);
            } else {
              dmatvec(
                  nsupr,
                  nrow,
                  segsze,
                  lusup,
                  luptr,
                  tri[0],
                  tri_offset[0],
                  matvec[0],
                  matvec_offset[0]);
            }

          } else if (kfnz2[0] > kfnz2[1]) {
            no_zeros = kfnz2[1] - fsupc;
            segsze = kfnz2[0] - kfnz2[1];
            luptr = xlusup[fsupc] + nsupr * no_zeros + nsupc;
            if (USE_VENDOR_BLAS) {
              dgemv(
                  "N",
                  nrow,
                  segsze,
                  alpha,
                  lusup,
                  luptr,
                  nsupr,
                  tri[1],
                  tri_offset[1],
                  incx,
                  beta,
                  matvec[1],
                  matvec_offset[1],
                  incy);
            } else {
              dmatvec(
                  nsupr,
                  nrow,
                  segsze,
                  lusup,
                  luptr,
                  tri[1],
                  tri_offset[1],
                  matvec[1],
                  matvec_offset[1]);
            }
          }

          /* Do matrix-vector multiply with two destinations */
          kfnz = SUPERLU_MAX(kfnz2[0], kfnz2[1]);
          no_zeros = kfnz - fsupc;
          segsze = krep - kfnz + 1;
          luptr = xlusup[fsupc] + nsupr * no_zeros + nsupc;

          dmatvec2(
              nsupr,
              nrow,
              segsze,
              lusup,
              luptr,
              tri[0],
              tri_offset[0] + kfnz - kfnz2[0],
              tri[1],
              tri_offset[1] + kfnz - kfnz2[1],
              matvec[0],
              matvec_offset[0],
              matvec[1],
              matvec_offset[1]);

          if (TIMING) {
            utime[FLOAT.ordinal()] += SuperLU_timer_() - f_time;
          }

          for (j = 0; j < 2; ++j) {
            i = n * (jj2[j] - jcol);
            dense_col1 = dense;
            int dense_col1_offset = i;
            col_marker1 = spa_marker;
            int col_marker1_offset = i;
            col_lsub1 = panel_lsub;
            int col_lsub1_offset = i;
            no_zeros = kfnz2[j] - fsupc;
            segsze = krep - kfnz2[j] + 1;

            /* Scatter tri[*] into SPA dense[*]. */
            isub = lptr + no_zeros;
            for (i = 0; i < segsze; i++) {
              irow = lsub[isub];
              dense_col1[dense_col1_offset + irow] = tri[j][tri_offset[j] + i]; /* Scatter */
              tri[j][tri_offset[j] + i] = zero;
              ++isub;
              if (DEBUG) {
                if (jj == -1 && krep == 3423)
                  printf(
                      "(%d) dbmod1D[scatter] jj %d, dense_col[%d] %e\n",
                      pnum, jj, irow, dense_col[dense_col_offset + irow]);
              }
            }

            /* Scatter matvec[*] into SPA dense[*]. */
            /*#pragma ivdep*/
            for (i = 0; i < nrow; i++) {
              irow = lsub[isub];
              dense_col1[dense_col1_offset + irow] -=
                  matvec[j][matvec_offset[j] + i]; /* Scatter-add */
              if (SCATTER_FOUND) {
                if (col_marker1[col_marker1_offset + irow] != jj2[j]) {
                  col_marker1[col_marker1_offset + irow] = jj2[j];
                  col_lsub1[col_lsub1_offset + w_lsub_end[jj2[j] - jcol]++] = irow;
                }
              }
              matvec[j][matvec_offset[j] + i] = zero;
              ++isub;
            }
          } /* end for two destination update */

        } else {
            /* wait for a second column */
          jj2[0] = jj;
          twocols = 1;
        }
      } /* else segsze >= 4 */
    } /* for jj ... */

    if (twocols == 1) {
        /* one more column left */
      i = n * (jj2[0] - jcol);
      repfnz_col1 = repfnz;
      int repfnz_col1_offset = i;
      dense_col1 = dense;
      int dense_col1_offset = i;
      col_marker1 = spa_marker;
      int col_marker1_offset = i;
      col_lsub1 = panel_lsub;
      int col_lsub1_offset = i;
      kfnz = repfnz_col1[repfnz_col1_offset + krep];
      no_zeros = kfnz - fsupc;
      segsze = krep - kfnz + 1;

      /* Gather U[*,j] segment from dense[*] to tri[*]. */
      isub = lptr + no_zeros;
      for (i = 0; i < segsze; ++i) {
        irow = lsub[isub];
        tri[0][tri_offset[0] + i] = dense_col1[dense_col1_offset + irow]; /* Gather */
        ++isub;
      }

      if (TIMING) {
        f_time = SuperLU_timer_();
      }
      /* start effective triangle */
      luptr = xlusup[fsupc] + nsupr * no_zeros + no_zeros;
      if (USE_VENDOR_BLAS) {
        dtrsv("L", "N", "U", segsze, lusup, luptr, nsupr, tri[0], tri_offset[0], incx);
      } else {
        dlsolve(nsupr, segsze, lusup, luptr, tri[0], tri_offset[0]);
      }

      luptr += segsze; /* Dense matrix-vector */
      matvec[0] = tri[0];
      matvec_offset[0] = tri_offset[0] + segsze;

      if (USE_VENDOR_BLAS) {
        dgemv(
            "N",
            nrow,
            segsze,
            alpha,
            lusup,
            luptr,
            nsupr,
            tri[0],
            tri_offset[0],
            incx,
            beta,
            matvec[0],
            matvec_offset[0],
            incy);
      } else {
        dmatvec(
            nsupr, nrow, segsze, lusup, luptr, tri[0], tri_offset[0], matvec[0], matvec_offset[0]);
      }
      if (TIMING) {
        utime[FLOAT.ordinal()] += SuperLU_timer_() - f_time;
      }

      /* Scatter tri[*] into SPA dense[*]. */
      isub = lptr + no_zeros;
      for (i = 0; i < segsze; i++) {
        irow = lsub[isub];
        dense_col1[dense_col1_offset + irow] = tri[0][tri_offset[0] + i]; /* Scatter */
        tri[0][tri_offset[0] + i] = zero;
        ++isub;
        if (DEBUG) {
          if (jj == -1 && krep == 3423)
            printf(
                "(%d) dbmod1D[scatter] jj %d, dense_col[%d] %e\n",
                pnum, jj, irow, dense_col[dense_col_offset + irow]);
        }
      }

      /* Scatter matvec[*] into SPA dense[*]. */
      for (i = 0; i < nrow; i++) {
        irow = lsub[isub];
        dense_col1[dense_col1_offset + irow] -= matvec[0][matvec_offset[0] + i]; /* Scatter-add */
        if (SCATTER_FOUND) {
          if (col_marker1[col_marker1_offset + irow] != jj2[0]) {
            col_marker1[col_marker1_offset + irow] = jj2[0];
            col_lsub1[col_lsub1_offset + w_lsub_end[jj2[0] - jcol]++] = irow;
          }
        }
        matvec[0][matvec_offset[0] + i] = zero;
        ++isub;
      }
    } /* if twocols == 1 */
  }