/** * Compute Variable Importance, based on GEDEON: DATA MINING OF INPUTS: ANALYSING MAGNITUDE AND * FUNCTIONAL MEASURES * * @return variable importances for input features */ public float[] computeVariableImportances() { float[] vi = new float[units[0]]; Arrays.fill(vi, 0f); float[][] Qik = new float[units[0]][units[2]]; // importance of input i on output k float[] sum_wj = new float[units[1]]; // sum of incoming weights into first hidden layer float[] sum_wk = new float[units[2]]; // sum of incoming weights into output layer (or second hidden layer) for (float[] Qi : Qik) Arrays.fill(Qi, 0f); Arrays.fill(sum_wj, 0f); Arrays.fill(sum_wk, 0f); // compute sum of absolute incoming weights for (int j = 0; j < units[1]; j++) { for (int i = 0; i < units[0]; i++) { float wij = get_weights(0).get(j, i); sum_wj[j] += Math.abs(wij); } } for (int k = 0; k < units[2]; k++) { for (int j = 0; j < units[1]; j++) { float wjk = get_weights(1).get(k, j); sum_wk[k] += Math.abs(wjk); } } // compute importance of input i on output k as product of connecting weights going through j for (int i = 0; i < units[0]; i++) { for (int k = 0; k < units[2]; k++) { for (int j = 0; j < units[1]; j++) { float wij = get_weights(0).get(j, i); float wjk = get_weights(1).get(k, j); // Qik[i][k] += Math.abs(wij)/sum_wj[j] * wjk; //Wong,Gedeon,Taggart '95 Qik[i][k] += Math.abs(wij) / sum_wj[j] * Math.abs(wjk) / sum_wk[k]; // Gedeon '97 } } } // normalize Qik over all outputs k for (int k = 0; k < units[2]; k++) { float sumQk = 0; for (int i = 0; i < units[0]; i++) sumQk += Qik[i][k]; for (int i = 0; i < units[0]; i++) Qik[i][k] /= sumQk; } // importance for feature i is the sum over k of i->k importances for (int i = 0; i < units[0]; i++) vi[i] = ArrayUtils.sum(Qik[i]); // normalize importances such that max(vi) = 1 ArrayUtils.div(vi, ArrayUtils.maxValue(vi)); // zero out missing categorical variables if they were never seen if (_saw_missing_cats != null) { for (int i = 0; i < _saw_missing_cats.length; ++i) { assert (data_info._catMissing[i] == 1); // have a missing bucket for each categorical if (!_saw_missing_cats[i]) vi[data_info._catOffsets[i + 1] - 1] = 0; } } return vi; }
// Call builder specific score code and then correct probabilities // if it is necessary. void score2(Chunk chks[], double weight, double offset, double fs[ /*nclass*/], int row) { double sum = score1(chks, weight, offset, fs, row); if (isClassifier()) { if (!Double.isInfinite(sum) && sum > 0f && sum != 1f) ArrayUtils.div(fs, sum); if (_parms._balance_classes) GenModel.correctProbabilities( fs, _model._output._priorClassDist, _model._output._modelClassDist); } }
@Override public void map(Chunk[] cs) { int N = cs.length - (_hasWeight ? 1 : 0); assert _centers[0].length == N; _cMeans = new double[_k][N]; _cSqr = new double[_k]; _size = new long[_k]; // Space for cat histograms _cats = new long[_k][N][]; for (int clu = 0; clu < _k; clu++) for (int col = 0; col < N; col++) _cats[clu][col] = _isCats[col] == null ? null : new long[cs[col].vec().cardinality()]; _worst_err = 0; // Find closest cluster center for each row double[] values = new double[N]; // Temp data to hold row as doubles ClusterDist cd = new ClusterDist(); for (int row = 0; row < cs[0]._len; row++) { double weight = _hasWeight ? cs[N].atd(row) : 1; if (weight == 0) continue; // skip holdout rows assert (weight == 1); // K-Means only works for weight 1 (or weight 0 for holdout) data(values, cs, row, _means, _mults, _modes); // Load row as doubles closest(_centers, values, _isCats, cd); // Find closest cluster center int clu = cd._cluster; assert clu != -1; // No broken rows _cSqr[clu] += cd._dist; // Add values and increment counter for chosen cluster for (int col = 0; col < N; col++) if (_isCats[col] != null) _cats[clu][col][(int) values[col]]++; // Histogram the cats else _cMeans[clu][col] += values[col]; // Sum the column centers _size[clu]++; // Track worst row if (cd._dist > _worst_err) { _worst_err = cd._dist; _worst_row = cs[0].start() + row; } } // Scale back down to local mean for (int clu = 0; clu < _k; clu++) if (_size[clu] != 0) ArrayUtils.div(_cMeans[clu], _size[clu]); _centers = null; _means = _mults = null; _modes = null; }
/** * Divide all weights/biases by a real-valued number * * @param N */ protected void div(float N) { for (int i = 0; i < dense_row_weights.length; ++i) ArrayUtils.div(get_weights(i).raw(), N); for (Storage.Vector bias : biases) ArrayUtils.div(bias.raw(), N); if (avg_activations != null) for (Storage.Vector avgac : avg_activations) ArrayUtils.div(avgac.raw(), N); if (has_momenta()) { for (int i = 0; i < dense_row_weights_momenta.length; ++i) ArrayUtils.div(get_weights_momenta(i).raw(), N); for (Storage.Vector bias_momenta : biases_momenta) ArrayUtils.div(bias_momenta.raw(), N); } if (adaDelta()) { for (int i = 0; i < dense_row_ada_dx_g.length; ++i) { ArrayUtils.div(get_ada_dx_g(i).raw(), N); } } }
// Matrix covariance. Compute covariance between all columns from each Frame // against each other. Return a matrix of covariances which is frx.numCols // wide and fry.numCols tall. private Val array(Frame frx, Frame fry, Mode mode, boolean symmetric) { Vec[] vecxs = frx.vecs(); int ncolx = vecxs.length; Vec[] vecys = fry.vecs(); int ncoly = vecys.length; if (mode.equals(Mode.Everything) || mode.equals(Mode.AllObs)) { if (mode.equals(Mode.AllObs)) { for (Vec v : vecxs) if (v.naCnt() != 0) throw new IllegalArgumentException("Mode is 'all.obs' but NAs are present"); if (!symmetric) for (Vec v : vecys) if (v.naCnt() != 0) throw new IllegalArgumentException("Mode is 'all.obs' but NAs are present"); } CoVarTaskEverything[] cvs = new CoVarTaskEverything[ncoly]; double[] xmeans = new double[ncolx]; for (int x = 0; x < ncoly; x++) xmeans[x] = vecxs[x].mean(); if (symmetric) { // 1-col returns scalar if (ncoly == 1) return new ValNum( vecys[0].naCnt() == 0 ? vecys[0].sigma() * vecys[0].sigma() : Double.NaN); int[] idx = new int[ncoly]; for (int y = 1; y < ncoly; y++) idx[y] = y; int[] first_index = new int[] {0}; // compute covariances between column_i and column_i+1, column_i+2, ... Frame reduced_fr; for (int y = 0; y < ncoly - 1; y++) { idx = ArrayUtils.removeIds(idx, first_index); reduced_fr = new Frame(frx.vecs(idx)); cvs[y] = new CoVarTaskEverything(vecys[y].mean(), xmeans) .dfork(new Frame(vecys[y]).add(reduced_fr)); } double[][] res_array = new double[ncoly][ncoly]; // fill in the diagonals (variances) using sigma from rollupstats for (int y = 0; y < ncoly; y++) res_array[y][y] = vecys[y].naCnt() == 0 ? vecys[y].sigma() * vecys[y].sigma() : Double.NaN; // arrange the results into the bottom left of res_array. each successive cvs is 1 smaller // in length for (int y = 0; y < ncoly - 1; y++) System.arraycopy( ArrayUtils.div(cvs[y].getResult()._covs, (fry.numRows() - 1)), 0, res_array[y], y + 1, ncoly - y - 1); // copy over the bottom left of res_array to its top right for (int y = 0; y < ncoly - 1; y++) { for (int x = y + 1; x < ncoly; x++) { res_array[x][y] = res_array[y][x]; } } // set Frame Vec[] res = new Vec[ncoly]; Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly); for (int y = 0; y < ncoly; y++) { res[y] = Vec.makeVec(res_array[y], keys[y]); } return new ValFrame(new Frame(fry._names, res)); } // Launch tasks; each does all Xs vs one Y for (int y = 0; y < ncoly; y++) cvs[y] = new CoVarTaskEverything(vecys[y].mean(), xmeans).dfork(new Frame(vecys[y]).add(frx)); // 1-col returns scalar if (ncolx == 1 && ncoly == 1) { return new ValNum(cvs[0].getResult()._covs[0] / (fry.numRows() - 1)); } // Gather all the Xs-vs-Y covariance arrays; divide by rows Vec[] res = new Vec[ncoly]; Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly); for (int y = 0; y < ncoly; y++) res[y] = Vec.makeVec(ArrayUtils.div(cvs[y].getResult()._covs, (fry.numRows() - 1)), keys[y]); return new ValFrame(new Frame(fry._names, res)); } else { // if (mode.equals(Mode.CompleteObs)) { // two-pass algorithm for computation of variance for numerical stability if (symmetric) { if (ncoly == 1) return new ValNum(vecys[0].sigma() * vecys[0].sigma()); CoVarTaskCompleteObsMeanSym taskCompleteObsMeanSym = new CoVarTaskCompleteObsMeanSym().doAll(fry); long NACount = taskCompleteObsMeanSym._NACount; double[] ymeans = ArrayUtils.div(taskCompleteObsMeanSym._ysum, fry.numRows() - NACount); // 1 task with all Ys CoVarTaskCompleteObsSym cvs = new CoVarTaskCompleteObsSym(ymeans).doAll(new Frame(fry)); double[][] res_array = new double[ncoly][ncoly]; for (int y = 0; y < ncoly; y++) { System.arraycopy( ArrayUtils.div(cvs._covs[y], (fry.numRows() - 1 - NACount)), y, res_array[y], y, ncoly - y); } // copy over the bottom left of res_array to its top right for (int y = 0; y < ncoly - 1; y++) { for (int x = y + 1; x < ncoly; x++) { res_array[x][y] = res_array[y][x]; } } // set Frame Vec[] res = new Vec[ncoly]; Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly); for (int y = 0; y < ncoly; y++) { res[y] = Vec.makeVec(res_array[y], keys[y]); } return new ValFrame(new Frame(fry._names, res)); } CoVarTaskCompleteObsMean taskCompleteObsMean = new CoVarTaskCompleteObsMean(ncoly, ncolx).doAll(new Frame(fry).add(frx)); long NACount = taskCompleteObsMean._NACount; double[] ymeans = ArrayUtils.div(taskCompleteObsMean._ysum, fry.numRows() - NACount); double[] xmeans = ArrayUtils.div(taskCompleteObsMean._xsum, fry.numRows() - NACount); // 1 task with all Xs and Ys CoVarTaskCompleteObs cvs = new CoVarTaskCompleteObs(ymeans, xmeans).doAll(new Frame(fry).add(frx)); // 1-col returns scalar if (ncolx == 1 && ncoly == 1) { return new ValNum(cvs._covs[0][0] / (fry.numRows() - 1 - NACount)); } // Gather all the Xs-vs-Y covariance arrays; divide by rows Vec[] res = new Vec[ncoly]; Key<Vec>[] keys = Vec.VectorGroup.VG_LEN1.addVecs(ncoly); for (int y = 0; y < ncoly; y++) res[y] = Vec.makeVec(ArrayUtils.div(cvs._covs[y], (fry.numRows() - 1 - NACount)), keys[y]); return new ValFrame(new Frame(fry._names, res)); } }
/** * Extracts the values, applies regularization to numerics, adds appropriate offsets to * categoricals, and adapts response according to the CaseMode/CaseValue if set. */ @Override public final void map(Chunk[] chunks, NewChunk[] outputs) { if (_jobKey != null && !Job.isRunning(_jobKey)) throw new JobCancelledException(); final int nrows = chunks[0]._len; final long offset = chunks[0].start(); boolean doWork = chunkInit(); if (!doWork) return; final boolean obs_weights = _dinfo._weights && !_fr.vecs()[_dinfo.weightChunkId()].isConst(); final double global_weight_sum = obs_weights ? _fr.vecs()[_dinfo.weightChunkId()].mean() * _fr.numRows() : 0; DataInfo.Row row = _dinfo.newDenseRow(); double[] weight_map = null; double relative_chunk_weight = 1; // TODO: store node-local helper arrays in _dinfo -> avoid re-allocation and construction if (obs_weights) { weight_map = new double[nrows]; double weight_sum = 0; for (int i = 0; i < nrows; ++i) { row = _dinfo.extractDenseRow(chunks, i, row); weight_sum += row.weight; weight_map[i] = weight_sum; assert (i == 0 || row.weight == 0 || weight_map[i] > weight_map[i - 1]); } if (weight_sum > 0) { ArrayUtils.div(weight_map, weight_sum); // normalize to 0...1 relative_chunk_weight = global_weight_sum * nrows / _fr.numRows() / weight_sum; } else return; // nothing to do here - all rows have 0 weight } // Example: // _useFraction = 0.8 -> 1 repeat with fraction = 0.8 // _useFraction = 1.0 -> 1 repeat with fraction = 1.0 // _useFraction = 1.1 -> 2 repeats with fraction = 0.55 // _useFraction = 2.1 -> 3 repeats with fraction = 0.7 // _useFraction = 3.0 -> 3 repeats with fraction = 1.0 final int repeats = (int) Math.ceil(_useFraction * relative_chunk_weight); final float fraction = (float) (_useFraction * relative_chunk_weight) / repeats; assert (fraction <= 1.0); final boolean sample = (fraction < 0.999 || obs_weights || _shuffle); final Random skip_rng = sample ? RandomUtils.getRNG( (0x8734093502429734L + _seed + offset) * (_iteration + 0x9823423497823423L)) : null; long num_processed_rows = 0; for (int rep = 0; rep < repeats; ++rep) { for (int row_idx = 0; row_idx < nrows; ++row_idx) { int r = sample ? -1 : 0; // only train with a given number of training samples (fraction*nrows) if (sample && !obs_weights && skip_rng.nextDouble() > fraction) continue; if (obs_weights && num_processed_rows % 2 == 0) { // every second row is randomly sampled -> that way we won't "forget" rare // rows // importance sampling based on inverse of cumulative distribution double key = skip_rng.nextDouble(); r = Arrays.binarySearch(weight_map, 0, nrows, key); // Log.info(Arrays.toString(weight_map)); // Log.info("key: " + key + " idx: " + (r >= 0 ? r : (-r-1))); if (r < 0) r = -r - 1; assert (r == 0 || weight_map[r] > weight_map[r - 1]); } else if (r == -1) { do { r = skip_rng.nextInt(nrows); // random sampling (with replacement) } // if we have weights, and we did the %2 skipping above, then we need to find an alternate // row with non-zero weight while (obs_weights && ((r == 0 && weight_map[0] == 0) || (r > 0 && weight_map[r] == weight_map[r - 1]))); } else { assert (!obs_weights); r = row_idx; // linear scan - slightly faster } assert (r >= 0 && r <= nrows); row = _dinfo.extractDenseRow(chunks, r, row); if (!row.bad) { assert (row.weight > 0); // check that we never process a row that was held out via row.weight = 0 long seed = offset + rep * nrows + r; if (outputs != null && outputs.length > 0) processRow(seed++, row, outputs); else processRow(seed++, row); } num_processed_rows++; } } assert (fraction != 1 || num_processed_rows == repeats * nrows); chunkDone(num_processed_rows); }