public final double loss(double u, double a, Loss loss) { assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics"; switch (loss) { case Quadratic: return (u - a) * (u - a); case Absolute: return Math.abs(u - a); case Huber: return Math.abs(u - a) <= 1 ? 0.5 * (u - a) * (u - a) : Math.abs(u - a) - 0.5; case Poisson: assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0"; return Math.exp(u) + (a == 0 ? 0 : -a * u + a * Math.log(a) - a); // Since \lim_{a->0} a*log(a) = 0 case Hinge: // return Math.max(1-a*u,0); return Math.max(1 - (a == 0 ? -u : u), 0); // Booleans are coded {0,1} instead of {-1,1} case Logistic: // return Math.log(1 + Math.exp(-a * u)); return Math.log( 1 + Math.exp(a == 0 ? u : -u)); // Booleans are coded {0,1} instead of {-1,1} case Periodic: return 1 - Math.cos((a - u) * (2 * Math.PI) / _period); default: throw new RuntimeException("Unknown loss function " + loss); } }
/** * Compute Variable Importance, based on GEDEON: DATA MINING OF INPUTS: ANALYSING MAGNITUDE AND * FUNCTIONAL MEASURES * * @return variable importances for input features */ public float[] computeVariableImportances() { float[] vi = new float[units[0]]; Arrays.fill(vi, 0f); float[][] Qik = new float[units[0]][units[2]]; // importance of input i on output k float[] sum_wj = new float[units[1]]; // sum of incoming weights into first hidden layer float[] sum_wk = new float[units[2]]; // sum of incoming weights into output layer (or second hidden layer) for (float[] Qi : Qik) Arrays.fill(Qi, 0f); Arrays.fill(sum_wj, 0f); Arrays.fill(sum_wk, 0f); // compute sum of absolute incoming weights for (int j = 0; j < units[1]; j++) { for (int i = 0; i < units[0]; i++) { float wij = get_weights(0).get(j, i); sum_wj[j] += Math.abs(wij); } } for (int k = 0; k < units[2]; k++) { for (int j = 0; j < units[1]; j++) { float wjk = get_weights(1).get(k, j); sum_wk[k] += Math.abs(wjk); } } // compute importance of input i on output k as product of connecting weights going through j for (int i = 0; i < units[0]; i++) { for (int k = 0; k < units[2]; k++) { for (int j = 0; j < units[1]; j++) { float wij = get_weights(0).get(j, i); float wjk = get_weights(1).get(k, j); // Qik[i][k] += Math.abs(wij)/sum_wj[j] * wjk; //Wong,Gedeon,Taggart '95 Qik[i][k] += Math.abs(wij) / sum_wj[j] * Math.abs(wjk) / sum_wk[k]; // Gedeon '97 } } } // normalize Qik over all outputs k for (int k = 0; k < units[2]; k++) { float sumQk = 0; for (int i = 0; i < units[0]; i++) sumQk += Qik[i][k]; for (int i = 0; i < units[0]; i++) Qik[i][k] /= sumQk; } // importance for feature i is the sum over k of i->k importances for (int i = 0; i < units[0]; i++) vi[i] = ArrayUtils.sum(Qik[i]); // normalize importances such that max(vi) = 1 ArrayUtils.div(vi, ArrayUtils.maxValue(vi)); // zero out missing categorical variables if they were never seen if (_saw_missing_cats != null) { for (int i = 0; i < _saw_missing_cats.length; ++i) { assert (data_info._catMissing[i] == 1); // have a missing bucket for each categorical if (!_saw_missing_cats[i]) vi[data_info._catOffsets[i + 1] - 1] = 0; } } return vi; }
public final double regularize(double[] u, Regularizer regularization) { if (u == null) return 0; double ureg = 0; switch (regularization) { case None: return 0; case Quadratic: for (int i = 0; i < u.length; i++) ureg += u[i] * u[i]; return ureg; case L2: for (int i = 0; i < u.length; i++) ureg += u[i] * u[i]; return Math.sqrt(ureg); case L1: for (int i = 0; i < u.length; i++) ureg += Math.abs(u[i]); return ureg; case NonNegative: for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; } return 0; case OneSparse: int card = 0; for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; else if (u[i] > 0) card++; } return card == 1 ? 0 : Double.POSITIVE_INFINITY; case UnitOneSparse: int ones = 0, zeros = 0; for (int i = 0; i < u.length; i++) { if (u[i] == 1) ones++; else if (u[i] == 0) zeros++; else return Double.POSITIVE_INFINITY; } return ones == 1 && zeros == u.length - 1 ? 0 : Double.POSITIVE_INFINITY; case Simplex: double sum = 0, absum = 0; for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; else { sum += u[i]; absum += Math.abs(u[i]); } } return MathUtils.equalsWithinRecSumErr(sum, 1.0, u.length, absum) ? 0 : Double.POSITIVE_INFINITY; default: throw new RuntimeException("Unknown regularization function " + regularization); } }
public final double lgrad(double u, double a, Loss loss) { assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics"; switch (loss) { case Quadratic: return 2 * (u - a); case Absolute: return Math.signum(u - a); case Huber: return Math.abs(u - a) <= 1 ? u - a : Math.signum(u - a); case Poisson: assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0"; return Math.exp(u) - a; case Hinge: // return a*u <= 1 ? -a : 0; return a == 0 ? (-u <= 1 ? 1 : 0) : (u <= 1 ? -1 : 0); // Booleans are coded as {0,1} instead of {-1,1} case Logistic: // return -a/(1+Math.exp(a*u)); return a == 0 ? 1 / (1 + Math.exp(-u)) : -1 / (1 + Math.exp(u)); // Booleans are coded as {0,1} instead of {-1,1} case Periodic: return ((2 * Math.PI) / _period) * Math.sin((a - u) * (2 * Math.PI) / _period); default: throw new RuntimeException("Unknown loss function " + loss); } }
@Override public void map(Chunk[] chks) { _gss = new double[_nclass][]; _rss = new double[_nclass][]; // For all tree/klasses for (int k = 0; k < _nclass; k++) { final DTree tree = _trees[k]; final int leaf = _leafs[k]; if (tree == null) continue; // Empty class is ignored // A leaf-biased array of all active Tree leaves. final double gs[] = _gss[k] = new double[tree._len - leaf]; final double rs[] = _rss[k] = new double[tree._len - leaf]; final Chunk nids = chk_nids(chks, k); // Node-ids for this tree/class final Chunk ress = chk_work(chks, k); // Residuals for this tree/class // If we have all constant responses, then we do not split even the // root and the residuals should be zero. if (tree.root() instanceof LeafNode) continue; for (int row = 0; row < nids._len; row++) { // For all rows int nid = (int) nids.at80(row); // Get Node to decide from if (nid < 0) continue; // Missing response if (tree.node(nid) instanceof UndecidedNode) // If we bottomed out the tree nid = tree.node(nid)._pid; // Then take parent's decision DecidedNode dn = tree.decided(nid); // Must have a decision point if (dn._split._col == -1) // Unable to decide? dn = tree.decided(nid = dn._pid); // Then take parent's decision int leafnid = dn.ns(chks, row); // Decide down to a leafnode assert leaf <= leafnid && leafnid < tree._len; assert tree.node(leafnid) instanceof LeafNode; // Note: I can which leaf/region I end up in, but I do not care for // the prediction presented by the tree. For GBM, we compute the // sum-of-residuals (and sum/abs/mult residuals) for all rows in the // leaf, and get our prediction from that. nids.set0(row, leafnid); assert !ress.isNA0(row); double res = ress.at0(row); double ares = Math.abs(res); gs[leafnid - leaf] += _nclass > 1 ? ares * (1 - ares) : 1; rs[leafnid - leaf] += res; } } }
@Override protected void compute2() { CoxPHModel model = null; try { Scope.enter(); _parms.read_lock_frames(CoxPH.this); init(true); applyScoringFrameSideEffects(); // The model to be built model = new CoxPHModel(dest(), _parms, new CoxPHModel.CoxPHOutput(CoxPH.this)); model.delete_and_lock(_key); applyTrainingFrameSideEffects(); int nResponses = 1; boolean useAllFactorLevels = false; final DataInfo dinfo = new DataInfo( Key.make(), _modelBuilderTrain, null, nResponses, useAllFactorLevels, DataInfo.TransformType.DEMEAN, TransformType.NONE, true, false, false, false, false, false); initStats(model, dinfo); final int n_offsets = (model._parms.offset_columns == null) ? 0 : model._parms.offset_columns.length; final int n_coef = dinfo.fullN() - n_offsets; final double[] step = MemoryManager.malloc8d(n_coef); final double[] oldCoef = MemoryManager.malloc8d(n_coef); final double[] newCoef = MemoryManager.malloc8d(n_coef); Arrays.fill(step, Double.NaN); Arrays.fill(oldCoef, Double.NaN); for (int j = 0; j < n_coef; ++j) newCoef[j] = model._parms.init; double oldLoglik = -Double.MAX_VALUE; final int n_time = (int) (model._output.max_time - model._output.min_time + 1); final boolean has_start_column = (model._parms.start_column != null); final boolean has_weights_column = (model._parms.weights_column != null); for (int i = 0; i <= model._parms.iter_max; ++i) { model._output.iter = i; final CoxPHTask coxMR = new CoxPHTask( self(), dinfo, newCoef, model._output.min_time, n_time, n_offsets, has_start_column, has_weights_column) .doAll(dinfo._adaptedFrame); final double newLoglik = calcLoglik(model, coxMR); if (newLoglik > oldLoglik) { if (i == 0) calcCounts(model, coxMR); calcModelStats(model, newCoef, newLoglik); calcCumhaz_0(model, coxMR); if (newLoglik == 0) model._output.lre = -Math.log10(Math.abs(oldLoglik - newLoglik)); else model._output.lre = -Math.log10(Math.abs((oldLoglik - newLoglik) / newLoglik)); if (model._output.lre >= model._parms.lre_min) break; Arrays.fill(step, 0); for (int j = 0; j < n_coef; ++j) for (int k = 0; k < n_coef; ++k) step[j] -= model._output.var_coef[j][k] * model._output.gradient[k]; for (int j = 0; j < n_coef; ++j) if (Double.isNaN(step[j]) || Double.isInfinite(step[j])) break; oldLoglik = newLoglik; System.arraycopy(newCoef, 0, oldCoef, 0, oldCoef.length); } else { for (int j = 0; j < n_coef; ++j) step[j] /= 2; } for (int j = 0; j < n_coef; ++j) newCoef[j] = oldCoef[j] - step[j]; } model.update(_key); } catch (Throwable t) { Job thisJob = DKV.getGet(_key); if (thisJob._state == JobState.CANCELLED) { Log.info("Job cancelled by user."); } else { t.printStackTrace(); failed(t); throw t; } } finally { updateModelOutput(); _parms.read_unlock_frames(CoxPH.this); Scope.exit(); done(); // Job done! } tryComplete(); }