   * Compute Variable Importance, based on GEDEON: DATA MINING OF INPUTS: ANALYSING MAGNITUDE AND
   * @return variable importances for input features
  public float[] computeVariableImportances() {
    float[] vi = new float[units[0]];
    Arrays.fill(vi, 0f);

    float[][] Qik = new float[units[0]][units[2]]; // importance of input i on output k
    float[] sum_wj = new float[units[1]]; // sum of incoming weights into first hidden layer
    float[] sum_wk =
        new float[units[2]]; // sum of incoming weights into output layer (or second hidden layer)
    for (float[] Qi : Qik) Arrays.fill(Qi, 0f);
    Arrays.fill(sum_wj, 0f);
    Arrays.fill(sum_wk, 0f);

    // compute sum of absolute incoming weights
    for (int j = 0; j < units[1]; j++) {
      for (int i = 0; i < units[0]; i++) {
        float wij = get_weights(0).get(j, i);
        sum_wj[j] += Math.abs(wij);
    for (int k = 0; k < units[2]; k++) {
      for (int j = 0; j < units[1]; j++) {
        float wjk = get_weights(1).get(k, j);
        sum_wk[k] += Math.abs(wjk);
    // compute importance of input i on output k as product of connecting weights going through j
    for (int i = 0; i < units[0]; i++) {
      for (int k = 0; k < units[2]; k++) {
        for (int j = 0; j < units[1]; j++) {
          float wij = get_weights(0).get(j, i);
          float wjk = get_weights(1).get(k, j);
          // Qik[i][k] += Math.abs(wij)/sum_wj[j] * wjk; //Wong,Gedeon,Taggart '95
          Qik[i][k] += Math.abs(wij) / sum_wj[j] * Math.abs(wjk) / sum_wk[k]; // Gedeon '97
    // normalize Qik over all outputs k
    for (int k = 0; k < units[2]; k++) {
      float sumQk = 0;
      for (int i = 0; i < units[0]; i++) sumQk += Qik[i][k];
      for (int i = 0; i < units[0]; i++) Qik[i][k] /= sumQk;
    // importance for feature i is the sum over k of i->k importances
    for (int i = 0; i < units[0]; i++) vi[i] = ArrayUtils.sum(Qik[i]);

    // normalize importances such that max(vi) = 1
    ArrayUtils.div(vi, ArrayUtils.maxValue(vi));

    // zero out missing categorical variables if they were never seen
    if (_saw_missing_cats != null) {
      for (int i = 0; i < _saw_missing_cats.length; ++i) {
        assert (data_info._catMissing[i] == 1); // have a missing bucket for each categorical
        if (!_saw_missing_cats[i]) vi[data_info._catOffsets[i + 1] - 1] = 0;
    return vi;
Beispiel #2
 // Call builder specific score code and then correct probabilities
 // if it is necessary.
 void score2(Chunk chks[], double weight, double offset, double fs[ /*nclass*/], int row) {
   double sum = score1(chks, weight, offset, fs, row);
   if (isClassifier()) {
     if (!Double.isInfinite(sum) && sum > 0f && sum != 1f) ArrayUtils.div(fs, sum);
     if (_parms._balance_classes)
           fs, _model._output._priorClassDist, _model._output._modelClassDist);
Beispiel #3
    public void map(Chunk[] cs) {
      int N = cs.length - (_hasWeight ? 1 : 0);
      assert _centers[0].length == N;
      _cMeans = new double[_k][N];
      _cSqr = new double[_k];
      _size = new long[_k];
      // Space for cat histograms
      _cats = new long[_k][N][];
      for (int clu = 0; clu < _k; clu++)
        for (int col = 0; col < N; col++)
          _cats[clu][col] = _isCats[col] == null ? null : new long[cs[col].vec().cardinality()];
      _worst_err = 0;

      // Find closest cluster center for each row
      double[] values = new double[N]; // Temp data to hold row as doubles
      ClusterDist cd = new ClusterDist();
      for (int row = 0; row < cs[0]._len; row++) {
        double weight = _hasWeight ? cs[N].atd(row) : 1;
        if (weight == 0) continue; // skip holdout rows
        assert (weight == 1); // K-Means only works for weight 1 (or weight 0 for holdout)
        data(values, cs, row, _means, _mults, _modes); // Load row as doubles
        closest(_centers, values, _isCats, cd); // Find closest cluster center
        int clu = cd._cluster;
        assert clu != -1; // No broken rows
        _cSqr[clu] += cd._dist;

        // Add values and increment counter for chosen cluster
        for (int col = 0; col < N; col++)
          if (_isCats[col] != null) _cats[clu][col][(int) values[col]]++; // Histogram the cats
          else _cMeans[clu][col] += values[col]; // Sum the column centers
        // Track worst row
        if (cd._dist > _worst_err) {
          _worst_err = cd._dist;
          _worst_row = cs[0].start() + row;
      // Scale back down to local mean
      for (int clu = 0; clu < _k; clu++)
        if (_size[clu] != 0) ArrayUtils.div(_cMeans[clu], _size[clu]);
      _centers = null;
      _means = _mults = null;
      _modes = null;
  * Divide all weights/biases by a real-valued number
  * @param N
 protected void div(float N) {
   for (int i = 0; i < dense_row_weights.length; ++i) ArrayUtils.div(get_weights(i).raw(), N);
   for (Storage.Vector bias : biases) ArrayUtils.div(bias.raw(), N);
   if (avg_activations != null)
     for (Storage.Vector avgac : avg_activations) ArrayUtils.div(avgac.raw(), N);
   if (has_momenta()) {
     for (int i = 0; i < dense_row_weights_momenta.length; ++i)
       ArrayUtils.div(get_weights_momenta(i).raw(), N);
     for (Storage.Vector bias_momenta : biases_momenta) ArrayUtils.div(bias_momenta.raw(), N);
   if (adaDelta()) {
     for (int i = 0; i < dense_row_ada_dx_g.length; ++i) {
       ArrayUtils.div(get_ada_dx_g(i).raw(), N);
Beispiel #5
  // Matrix covariance.  Compute covariance between all columns from each Frame
  // against each other.  Return a matrix of covariances which is frx.numCols
  // wide and fry.numCols tall.
  private Val array(Frame frx, Frame fry, Mode mode, boolean symmetric) {
    Vec[] vecxs = frx.vecs();
    int ncolx = vecxs.length;
    Vec[] vecys = fry.vecs();
    int ncoly = vecys.length;

    if (mode.equals(Mode.Everything) || mode.equals(Mode.AllObs)) {

      if (mode.equals(Mode.AllObs)) {
        for (Vec v : vecxs)
          if (v.naCnt() != 0)
            throw new IllegalArgumentException("Mode is 'all.obs' but NAs are present");
        if (!symmetric)
          for (Vec v : vecys)
            if (v.naCnt() != 0)
              throw new IllegalArgumentException("Mode is 'all.obs' but NAs are present");
      CoVarTaskEverything[] cvs = new CoVarTaskEverything[ncoly];

      double[] xmeans = new double[ncolx];
      for (int x = 0; x < ncoly; x++) xmeans[x] = vecxs[x].mean();

      if (symmetric) {
        // 1-col returns scalar
        if (ncoly == 1)
          return new ValNum(
              vecys[0].naCnt() == 0 ? vecys[0].sigma() * vecys[0].sigma() : Double.NaN);

        int[] idx = new int[ncoly];
        for (int y = 1; y < ncoly; y++) idx[y] = y;
        int[] first_index = new int[] {0};
        // compute covariances between column_i and column_i+1, column_i+2, ...
        Frame reduced_fr;
        for (int y = 0; y < ncoly - 1; y++) {
          idx = ArrayUtils.removeIds(idx, first_index);
          reduced_fr = new Frame(frx.vecs(idx));
          cvs[y] =
              new CoVarTaskEverything(vecys[y].mean(), xmeans)
                  .dfork(new Frame(vecys[y]).add(reduced_fr));

        double[][] res_array = new double[ncoly][ncoly];

        // fill in the diagonals (variances) using sigma from rollupstats
        for (int y = 0; y < ncoly; y++)
          res_array[y][y] =
              vecys[y].naCnt() == 0 ? vecys[y].sigma() * vecys[y].sigma() : Double.NaN;

        // arrange the results into the bottom left of res_array. each successive cvs is 1 smaller
        // in length
        for (int y = 0; y < ncoly - 1; y++)
              ArrayUtils.div(cvs[y].getResult()._covs, (fry.numRows() - 1)),
              y + 1,
              ncoly - y - 1);

        // copy over the bottom left of res_array to its top right
        for (int y = 0; y < ncoly - 1; y++) {
          for (int x = y + 1; x < ncoly; x++) {
            res_array[x][y] = res_array[y][x];
        // set Frame
        Vec[] res = new Vec[ncoly];
        Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly);
        for (int y = 0; y < ncoly; y++) {
          res[y] = Vec.makeVec(res_array[y], keys[y]);
        return new ValFrame(new Frame(fry._names, res));

      // Launch tasks; each does all Xs vs one Y
      for (int y = 0; y < ncoly; y++)
        cvs[y] =
            new CoVarTaskEverything(vecys[y].mean(), xmeans).dfork(new Frame(vecys[y]).add(frx));

      // 1-col returns scalar
      if (ncolx == 1 && ncoly == 1) {
        return new ValNum(cvs[0].getResult()._covs[0] / (fry.numRows() - 1));

      // Gather all the Xs-vs-Y covariance arrays; divide by rows
      Vec[] res = new Vec[ncoly];
      Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly);
      for (int y = 0; y < ncoly; y++)
        res[y] =
            Vec.makeVec(ArrayUtils.div(cvs[y].getResult()._covs, (fry.numRows() - 1)), keys[y]);

      return new ValFrame(new Frame(fry._names, res));
    } else { // if (mode.equals(Mode.CompleteObs)) {
      // two-pass algorithm for computation of variance for numerical stability

      if (symmetric) {
        if (ncoly == 1) return new ValNum(vecys[0].sigma() * vecys[0].sigma());

        CoVarTaskCompleteObsMeanSym taskCompleteObsMeanSym =
            new CoVarTaskCompleteObsMeanSym().doAll(fry);
        long NACount = taskCompleteObsMeanSym._NACount;
        double[] ymeans = ArrayUtils.div(taskCompleteObsMeanSym._ysum, fry.numRows() - NACount);

        // 1 task with all Ys
        CoVarTaskCompleteObsSym cvs = new CoVarTaskCompleteObsSym(ymeans).doAll(new Frame(fry));
        double[][] res_array = new double[ncoly][ncoly];

        for (int y = 0; y < ncoly; y++) {
              ArrayUtils.div(cvs._covs[y], (fry.numRows() - 1 - NACount)),
              ncoly - y);

        // copy over the bottom left of res_array to its top right
        for (int y = 0; y < ncoly - 1; y++) {
          for (int x = y + 1; x < ncoly; x++) {
            res_array[x][y] = res_array[y][x];
        // set Frame
        Vec[] res = new Vec[ncoly];
        Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly);
        for (int y = 0; y < ncoly; y++) {
          res[y] = Vec.makeVec(res_array[y], keys[y]);
        return new ValFrame(new Frame(fry._names, res));

      CoVarTaskCompleteObsMean taskCompleteObsMean =
          new CoVarTaskCompleteObsMean(ncoly, ncolx).doAll(new Frame(fry).add(frx));
      long NACount = taskCompleteObsMean._NACount;
      double[] ymeans = ArrayUtils.div(taskCompleteObsMean._ysum, fry.numRows() - NACount);
      double[] xmeans = ArrayUtils.div(taskCompleteObsMean._xsum, fry.numRows() - NACount);

      // 1 task with all Xs and Ys
      CoVarTaskCompleteObs cvs =
          new CoVarTaskCompleteObs(ymeans, xmeans).doAll(new Frame(fry).add(frx));

      // 1-col returns scalar
      if (ncolx == 1 && ncoly == 1) {
        return new ValNum(cvs._covs[0][0] / (fry.numRows() - 1 - NACount));

      // Gather all the Xs-vs-Y covariance arrays; divide by rows
      Vec[] res = new Vec[ncoly];
      Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly);
      for (int y = 0; y < ncoly; y++)
        res[y] = Vec.makeVec(ArrayUtils.div(cvs._covs[y], (fry.numRows() - 1 - NACount)), keys[y]);

      return new ValFrame(new Frame(fry._names, res));
Beispiel #6
   * Extracts the values, applies regularization to numerics, adds appropriate offsets to
   * categoricals, and adapts response according to the CaseMode/CaseValue if set.
  public final void map(Chunk[] chunks, NewChunk[] outputs) {
    if (_jobKey != null && !Job.isRunning(_jobKey)) throw new JobCancelledException();
    final int nrows = chunks[0]._len;
    final long offset = chunks[0].start();
    boolean doWork = chunkInit();
    if (!doWork) return;
    final boolean obs_weights = _dinfo._weights && !_fr.vecs()[_dinfo.weightChunkId()].isConst();
    final double global_weight_sum =
        obs_weights ? _fr.vecs()[_dinfo.weightChunkId()].mean() * _fr.numRows() : 0;

    DataInfo.Row row = _dinfo.newDenseRow();
    double[] weight_map = null;
    double relative_chunk_weight = 1;
    // TODO: store node-local helper arrays in _dinfo -> avoid re-allocation and construction
    if (obs_weights) {
      weight_map = new double[nrows];
      double weight_sum = 0;
      for (int i = 0; i < nrows; ++i) {
        row = _dinfo.extractDenseRow(chunks, i, row);
        weight_sum += row.weight;
        weight_map[i] = weight_sum;
        assert (i == 0 || row.weight == 0 || weight_map[i] > weight_map[i - 1]);
      if (weight_sum > 0) {
        ArrayUtils.div(weight_map, weight_sum); // normalize to 0...1
        relative_chunk_weight = global_weight_sum * nrows / _fr.numRows() / weight_sum;
      } else return; // nothing to do here - all rows have 0 weight

    // Example:
    // _useFraction = 0.8 -> 1 repeat with fraction = 0.8
    // _useFraction = 1.0 -> 1 repeat with fraction = 1.0
    // _useFraction = 1.1 -> 2 repeats with fraction = 0.55
    // _useFraction = 2.1 -> 3 repeats with fraction = 0.7
    // _useFraction = 3.0 -> 3 repeats with fraction = 1.0
    final int repeats = (int) Math.ceil(_useFraction * relative_chunk_weight);
    final float fraction = (float) (_useFraction * relative_chunk_weight) / repeats;
    assert (fraction <= 1.0);

    final boolean sample = (fraction < 0.999 || obs_weights || _shuffle);
    final Random skip_rng =
            ? RandomUtils.getRNG(
                (0x8734093502429734L + _seed + offset) * (_iteration + 0x9823423497823423L))
            : null;

    long num_processed_rows = 0;
    for (int rep = 0; rep < repeats; ++rep) {
      for (int row_idx = 0; row_idx < nrows; ++row_idx) {
        int r = sample ? -1 : 0;
        // only train with a given number of training samples (fraction*nrows)
        if (sample && !obs_weights && skip_rng.nextDouble() > fraction) continue;
        if (obs_weights
            && num_processed_rows % 2
                == 0) { // every second row is randomly sampled -> that way we won't "forget" rare
          // rows
          // importance sampling based on inverse of cumulative distribution
          double key = skip_rng.nextDouble();
          r = Arrays.binarySearch(weight_map, 0, nrows, key);
          //          Log.info(Arrays.toString(weight_map));
          //          Log.info("key: " + key + " idx: " + (r >= 0 ? r : (-r-1)));
          if (r < 0) r = -r - 1;
          assert (r == 0 || weight_map[r] > weight_map[r - 1]);
        } else if (r == -1) {
          do {
            r = skip_rng.nextInt(nrows); // random sampling (with replacement)
          // if we have weights, and we did the %2 skipping above, then we need to find an alternate
          // row with non-zero weight
          while (obs_weights
              && ((r == 0 && weight_map[0] == 0) || (r > 0 && weight_map[r] == weight_map[r - 1])));
        } else {
          assert (!obs_weights);
          r = row_idx; // linear scan - slightly faster
        assert (r >= 0 && r <= nrows);

        row = _dinfo.extractDenseRow(chunks, r, row);
        if (!row.bad) {
          assert (row.weight
              > 0); // check that we never process a row that was held out via row.weight = 0
          long seed = offset + rep * nrows + r;
          if (outputs != null && outputs.length > 0) processRow(seed++, row, outputs);
          else processRow(seed++, row);
    assert (fraction != 1 || num_processed_rows == repeats * nrows);