/** * Compute Variable Importance, based on GEDEON: DATA MINING OF INPUTS: ANALYSING MAGNITUDE AND * FUNCTIONAL MEASURES * * @return variable importances for input features */ public float[] computeVariableImportances() { float[] vi = new float[units[0]]; Arrays.fill(vi, 0f); float[][] Qik = new float[units[0]][units[2]]; // importance of input i on output k float[] sum_wj = new float[units[1]]; // sum of incoming weights into first hidden layer float[] sum_wk = new float[units[2]]; // sum of incoming weights into output layer (or second hidden layer) for (float[] Qi : Qik) Arrays.fill(Qi, 0f); Arrays.fill(sum_wj, 0f); Arrays.fill(sum_wk, 0f); // compute sum of absolute incoming weights for (int j = 0; j < units[1]; j++) { for (int i = 0; i < units[0]; i++) { float wij = get_weights(0).get(j, i); sum_wj[j] += Math.abs(wij); } } for (int k = 0; k < units[2]; k++) { for (int j = 0; j < units[1]; j++) { float wjk = get_weights(1).get(k, j); sum_wk[k] += Math.abs(wjk); } } // compute importance of input i on output k as product of connecting weights going through j for (int i = 0; i < units[0]; i++) { for (int k = 0; k < units[2]; k++) { for (int j = 0; j < units[1]; j++) { float wij = get_weights(0).get(j, i); float wjk = get_weights(1).get(k, j); // Qik[i][k] += Math.abs(wij)/sum_wj[j] * wjk; //Wong,Gedeon,Taggart '95 Qik[i][k] += Math.abs(wij) / sum_wj[j] * Math.abs(wjk) / sum_wk[k]; // Gedeon '97 } } } // normalize Qik over all outputs k for (int k = 0; k < units[2]; k++) { float sumQk = 0; for (int i = 0; i < units[0]; i++) sumQk += Qik[i][k]; for (int i = 0; i < units[0]; i++) Qik[i][k] /= sumQk; } // importance for feature i is the sum over k of i->k importances for (int i = 0; i < units[0]; i++) vi[i] = ArrayUtils.sum(Qik[i]); // normalize importances such that max(vi) = 1 ArrayUtils.div(vi, ArrayUtils.maxValue(vi)); // zero out missing categorical variables if they were never seen if (_saw_missing_cats != null) { for (int i = 0; i < _saw_missing_cats.length; ++i) { assert (data_info._catMissing[i] == 1); // have a missing bucket for each categorical if (!_saw_missing_cats[i]) vi[data_info._catOffsets[i + 1] - 1] = 0; } } return vi; }
public void testSum() { assertEquals(0, ArrayUtils.sum(new long[0])); assertEquals(0, ArrayUtils.sum(new long[10])); assertEquals(5, ArrayUtils.sum(new long[] {1, 2, 2})); assertEquals(0, ArrayUtils.sum(new int[0])); assertEquals(0, ArrayUtils.sum(new int[10])); assertEquals(5, ArrayUtils.sum(new int[] {1, 2, 2})); assertEquals( 2L * Integer.MAX_VALUE, ArrayUtils.sum(new int[] {Integer.MAX_VALUE, Integer.MAX_VALUE})); }
/** Unique identifier for this model's state, based on raw numbers */ protected long checksum_impl() { long cs = parameters._seed; cs ^= size() * get_processed_total(); cs ^= (long) (2234.3424 * ArrayUtils.sum(mean_bias)); cs *= (long) (9234.1343 * ArrayUtils.sum(rms_bias)); cs ^= (long) (9723.9734 * ArrayUtils.sum(mean_weight)); cs *= (long) (9234.1783 * ArrayUtils.sum(rms_weight)); cs ^= (long) (4273.2344 * (Math.E + ArrayUtils.sum(mean_rate))); cs *= (long) (3378.1999 * (Math.PI + ArrayUtils.sum(rms_rate))); return cs; }