/** * Compute Variable Importance, based on GEDEON: DATA MINING OF INPUTS: ANALYSING MAGNITUDE AND * FUNCTIONAL MEASURES * * @return variable importances for input features */ public float[] computeVariableImportances() { float[] vi = new float[units[0]]; Arrays.fill(vi, 0f); float[][] Qik = new float[units[0]][units[2]]; // importance of input i on output k float[] sum_wj = new float[units[1]]; // sum of incoming weights into first hidden layer float[] sum_wk = new float[units[2]]; // sum of incoming weights into output layer (or second hidden layer) for (float[] Qi : Qik) Arrays.fill(Qi, 0f); Arrays.fill(sum_wj, 0f); Arrays.fill(sum_wk, 0f); // compute sum of absolute incoming weights for (int j = 0; j < units[1]; j++) { for (int i = 0; i < units[0]; i++) { float wij = get_weights(0).get(j, i); sum_wj[j] += Math.abs(wij); } } for (int k = 0; k < units[2]; k++) { for (int j = 0; j < units[1]; j++) { float wjk = get_weights(1).get(k, j); sum_wk[k] += Math.abs(wjk); } } // compute importance of input i on output k as product of connecting weights going through j for (int i = 0; i < units[0]; i++) { for (int k = 0; k < units[2]; k++) { for (int j = 0; j < units[1]; j++) { float wij = get_weights(0).get(j, i); float wjk = get_weights(1).get(k, j); // Qik[i][k] += Math.abs(wij)/sum_wj[j] * wjk; //Wong,Gedeon,Taggart '95 Qik[i][k] += Math.abs(wij) / sum_wj[j] * Math.abs(wjk) / sum_wk[k]; // Gedeon '97 } } } // normalize Qik over all outputs k for (int k = 0; k < units[2]; k++) { float sumQk = 0; for (int i = 0; i < units[0]; i++) sumQk += Qik[i][k]; for (int i = 0; i < units[0]; i++) Qik[i][k] /= sumQk; } // importance for feature i is the sum over k of i->k importances for (int i = 0; i < units[0]; i++) vi[i] = ArrayUtils.sum(Qik[i]); // normalize importances such that max(vi) = 1 ArrayUtils.div(vi, ArrayUtils.maxValue(vi)); // zero out missing categorical variables if they were never seen if (_saw_missing_cats != null) { for (int i = 0; i < _saw_missing_cats.length; ++i) { assert (data_info._catMissing[i] == 1); // have a missing bucket for each categorical if (!_saw_missing_cats[i]) vi[data_info._catOffsets[i + 1] - 1] = 0; } } return vi; }
public final double loss(double u, double a, Loss loss) { assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics"; switch (loss) { case Quadratic: return (u - a) * (u - a); case Absolute: return Math.abs(u - a); case Huber: return Math.abs(u - a) <= 1 ? 0.5 * (u - a) * (u - a) : Math.abs(u - a) - 0.5; case Poisson: assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0"; return Math.exp(u) + (a == 0 ? 0 : -a * u + a * Math.log(a) - a); // Since \lim_{a->0} a*log(a) = 0 case Hinge: // return Math.max(1-a*u,0); return Math.max(1 - (a == 0 ? -u : u), 0); // Booleans are coded {0,1} instead of {-1,1} case Logistic: // return Math.log(1 + Math.exp(-a * u)); return Math.log( 1 + Math.exp(a == 0 ? u : -u)); // Booleans are coded {0,1} instead of {-1,1} case Periodic: return 1 - Math.cos((a - u) * (2 * Math.PI) / _period); default: throw new RuntimeException("Unknown loss function " + loss); } }
public final double regularize(double[] u, Regularizer regularization) { if (u == null) return 0; double ureg = 0; switch (regularization) { case None: return 0; case Quadratic: for (int i = 0; i < u.length; i++) ureg += u[i] * u[i]; return ureg; case L2: for (int i = 0; i < u.length; i++) ureg += u[i] * u[i]; return Math.sqrt(ureg); case L1: for (int i = 0; i < u.length; i++) ureg += Math.abs(u[i]); return ureg; case NonNegative: for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; } return 0; case OneSparse: int card = 0; for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; else if (u[i] > 0) card++; } return card == 1 ? 0 : Double.POSITIVE_INFINITY; case UnitOneSparse: int ones = 0, zeros = 0; for (int i = 0; i < u.length; i++) { if (u[i] == 1) ones++; else if (u[i] == 0) zeros++; else return Double.POSITIVE_INFINITY; } return ones == 1 && zeros == u.length - 1 ? 0 : Double.POSITIVE_INFINITY; case Simplex: double sum = 0, absum = 0; for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; else { sum += u[i]; absum += Math.abs(u[i]); } } return MathUtils.equalsWithinRecSumErr(sum, 1.0, u.length, absum) ? 0 : Double.POSITIVE_INFINITY; default: throw new RuntimeException("Unknown regularization function " + regularization); } }
// Read the 'tree' columns, do model-specific math and put the results in the // fs[] array, and return the sum. Dividing any fs[] element by the sum // turns the results into a probability distribution. @Override protected float score1(Chunk chks[], float fs[ /*nclass*/], int row) { if (_nclass == 1) // Classification? return (float) chk_tree(chks, 0).at0(row); // Regression. if (_nclass == 2) { // The Boolean Optimization // This optimization assumes the 2nd tree of a 2-class system is the // inverse of the first. Fill in the missing tree fs[1] = (float) Math.exp(chk_tree(chks, 0).at0(row)); fs[2] = 1.0f / fs[1]; // exp(-d) === 1/d return fs[1] + fs[2]; } float sum = 0; for (int k = 0; k < _nclass; k++) // Sum across of likelyhoods sum += (fs[k + 1] = (float) Math.exp(chk_tree(chks, k).at0(row))); return sum; }
public final double lgrad(double u, double a, Loss loss) { assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics"; switch (loss) { case Quadratic: return 2 * (u - a); case Absolute: return Math.signum(u - a); case Huber: return Math.abs(u - a) <= 1 ? u - a : Math.signum(u - a); case Poisson: assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0"; return Math.exp(u) - a; case Hinge: // return a*u <= 1 ? -a : 0; return a == 0 ? (-u <= 1 ? 1 : 0) : (u <= 1 ? -1 : 0); // Booleans are coded as {0,1} instead of {-1,1} case Logistic: // return -a/(1+Math.exp(a*u)); return a == 0 ? 1 / (1 + Math.exp(-u)) : -1 / (1 + Math.exp(u)); // Booleans are coded as {0,1} instead of {-1,1} case Periodic: return ((2 * Math.PI) / _period) * Math.sin((a - u) * (2 * Math.PI) / _period); default: throw new RuntimeException("Unknown loss function " + loss); } }
TotSS(double[] means, double[] mults, int[] modes, String[][] isCats, int[] card) { _means = means; _mults = mults; _modes = modes; _tss = 0; _isCats = isCats; _card = card; // Mean of numeric col is zero when standardized _gc = mults != null ? new double[means.length] : Arrays.copyOf(means, means.length); for (int i = 0; i < means.length; i++) { if (isCats[i] != null) _gc[i] = Math.min( Math.round(means[i]), _card[i] - 1); // TODO: Should set to majority class of categorical column } }
private static void addFolder(FileSystem fs, Path p, JsonArray succeeded, JsonArray failed) { try { if (fs == null) return; for (FileStatus file : fs.listStatus(p)) { Path pfs = file.getPath(); if (file.isDir()) { addFolder(fs, pfs, succeeded, failed); } else { Key k = Key.make(pfs.toString()); long size = file.getLen(); Value val = null; if (pfs.getName().endsWith(Extensions.JSON)) { JsonParser parser = new JsonParser(); JsonObject json = parser.parse(new InputStreamReader(fs.open(pfs))).getAsJsonObject(); JsonElement v = json.get(Constants.VERSION); if (v == null) throw new RuntimeException("Missing version"); JsonElement type = json.get(Constants.TYPE); if (type == null) throw new RuntimeException("Missing type"); Class c = Class.forName(type.getAsString()); OldModel model = (OldModel) c.newInstance(); model.fromJson(json); } else if (pfs.getName().endsWith(Extensions.HEX)) { // Hex file? FSDataInputStream s = fs.open(pfs); int sz = (int) Math.min(1L << 20, size); // Read up to the 1st meg byte[] mem = MemoryManager.malloc1(sz); s.readFully(mem); // Convert to a ValueArray (hope it fits in 1Meg!) ValueArray ary = new ValueArray(k, 0).read(new AutoBuffer(mem)); val = new Value(k, ary, Value.HDFS); } else if (size >= 2 * ValueArray.CHUNK_SZ) { val = new Value( k, new ValueArray(k, size), Value.HDFS); // ValueArray byte wrapper over a large file } else { val = new Value(k, (int) size, Value.HDFS); // Plain Value val.setdsk(); } DKV.put(k, val); Log.info("PersistHdfs: DKV.put(" + k + ")"); JsonObject o = new JsonObject(); o.addProperty(Constants.KEY, k.toString()); o.addProperty(Constants.FILE, pfs.toString()); o.addProperty(Constants.VALUE_SIZE, file.getLen()); succeeded.add(o); } } } catch (Exception e) { Log.err(e); JsonObject o = new JsonObject(); o.addProperty(Constants.FILE, p.toString()); o.addProperty(Constants.ERROR, e.getMessage()); failed.add(o); } }
public static double mloss(double[] u, int a, Loss multi_loss) { assert multi_loss.isForCategorical() : "Loss function " + multi_loss + " not applicable to categoricals"; if (a < 0 || a > u.length - 1) throw new IllegalArgumentException( "Index must be between 0 and " + String.valueOf(u.length - 1)); double sum = 0; switch (multi_loss) { case Categorical: for (int i = 0; i < u.length; i++) sum += Math.max(1 + u[i], 0); sum += Math.max(1 - u[a], 0) - Math.max(1 + u[a], 0); return sum; case Ordinal: for (int i = 0; i < u.length - 1; i++) sum += Math.max(a > i ? 1 - u[i] : 1, 0); return sum; default: throw new RuntimeException("Unknown multidimensional loss function " + multi_loss); } }
protected void calcModelStats( CoxPHModel model, final double[] newCoef, final double newLoglik) { CoxPHModel.CoxPHParameters p = model._parms; CoxPHModel.CoxPHOutput o = model._output; final int n_coef = o.coef.length; final Matrix inv_hessian = new Matrix(o.hessian).inverse(); for (int j = 0; j < n_coef; ++j) { for (int k = 0; k <= j; ++k) { final double elem = -inv_hessian.get(j, k); o.var_coef[j][k] = elem; o.var_coef[k][j] = elem; } } for (int j = 0; j < n_coef; ++j) { o.coef[j] = newCoef[j]; o.exp_coef[j] = Math.exp(o.coef[j]); o.exp_neg_coef[j] = Math.exp(-o.coef[j]); o.se_coef[j] = Math.sqrt(o.var_coef[j][j]); o.z_coef[j] = o.coef[j] / o.se_coef[j]; } if (o.iter == 0) { o.null_loglik = newLoglik; o.maxrsq = 1 - Math.exp(2 * o.null_loglik / o.n); o.score_test = 0; for (int j = 0; j < n_coef; ++j) { double sum = 0; for (int k = 0; k < n_coef; ++k) sum += o.var_coef[j][k] * o.gradient[k]; o.score_test += o.gradient[j] * sum; } } o.loglik = newLoglik; o.loglik_test = -2 * (o.null_loglik - o.loglik); o.rsq = 1 - Math.exp(-o.loglik_test / o.n); o.wald_test = 0; for (int j = 0; j < n_coef; ++j) { double sum = 0; for (int k = 0; k < n_coef; ++k) sum -= o.hessian[j][k] * (o.coef[k] - p.init); o.wald_test += (o.coef[j] - p.init) * sum; } }
private TwoDimTable createModelSummaryTable(KMeansModel.KMeansOutput output) { List<String> colHeaders = new ArrayList<>(); List<String> colTypes = new ArrayList<>(); List<String> colFormat = new ArrayList<>(); colHeaders.add("Number of Rows"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Number of Clusters"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Number of Categorical Columns"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Number of Iterations"); colTypes.add("long"); colFormat.add("%d"); colHeaders.add("Within Cluster Sum of Squares"); colTypes.add("double"); colFormat.add("%.5f"); colHeaders.add("Total Sum of Squares"); colTypes.add("double"); colFormat.add("%.5f"); colHeaders.add("Between Cluster Sum of Squares"); colTypes.add("double"); colFormat.add("%.5f"); final int rows = 1; TwoDimTable table = new TwoDimTable( "Model Summary", null, new String[rows], colHeaders.toArray(new String[0]), colTypes.toArray(new String[0]), colFormat.toArray(new String[0]), ""); int row = 0; int col = 0; table.set( row, col++, Math.round(_train.numRows() * (hasWeightCol() ? _train.lastVec().mean() : 1))); table.set(row, col++, output._centers_raw.length); table.set(row, col++, output._categorical_column_count); table.set(row, col++, output._iterations); table.set(row, col++, output._tot_withinss); table.set(row, col++, output._totss); table.set(row, col++, output._betweenss); return table; }
@SuppressWarnings("unused") @Override protected void init() { super.init(); // Initialize local variables _mtry = (mtries == -1) ? // classification: mtry=sqrt(_ncols), regression: mtry=_ncols/3 (classification ? Math.max((int) Math.sqrt(_ncols), 1) : Math.max(_ncols / 3, 1)) : mtries; if (!(1 <= _mtry && _mtry <= _ncols)) throw new IllegalArgumentException( "Computed mtry should be in interval <1,#cols> but it is " + _mtry); if (!(0.0 < sample_rate && sample_rate <= 1.0)) throw new IllegalArgumentException( "Sample rate should be interval (0,1> but it is " + sample_rate); if (DEBUG_DETERMINISTIC && seed == -1) _seed = 0x1321e74a0192470cL; // fixed version of seed else if (seed == -1) _seed = _seedGenerator.nextLong(); else _seed = seed; if (sample_rate == 1f && validation != null) Log.warn( Sys.DRF__, "Sample rate is 100% and no validation dataset is required. There are no OOB data to perform validation!"); }
@Override protected float[] score0(double[] data, float[] preds) { float[] p = super.score0(data, preds); if (nclasses() > 1) { // classification // Because we call Math.exp, we have to be numerically stable or else // we get Infinities, and then shortly NaN's. Rescale the data so the // largest value is +/-1 and the other values are smaller. // See notes here: http://www.hongliangjie.com/2011/01/07/logsum/ float maxval = Float.NEGATIVE_INFINITY; float dsum = 0; if (nclasses() == 2) p[2] = -p[1]; // Find a max for (int k = 1; k < p.length; k++) maxval = Math.max(maxval, p[k]); assert !Float.isInfinite(maxval) : "Something is wrong with GBM trees since returned prediction is " + Arrays.toString(p); for (int k = 1; k < p.length; k++) dsum += (p[k] = (float) Math.exp(p[k] - maxval)); div(p, dsum); p[0] = getPrediction(p, data); } else { // regression // do nothing for regression } return p; }
public static double impute(double u, Loss loss) { assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics"; switch (loss) { case Quadratic: case Absolute: case Huber: case Periodic: return u; case Poisson: return Math.exp(u) - 1; case Hinge: case Logistic: return u > 0 ? 1 : 0; // Booleans are coded as {0,1} instead of {-1,1} default: throw new RuntimeException("Unknown loss function " + loss); } }
/** * Initialization of neural net weights cf. * http://machinelearning.wustl.edu/mlpapers/paper_files/AISTATS2010_GlorotB10.pdf */ private void randomizeWeights() { for (int w = 0; w < dense_row_weights.length; ++w) { final Random rng = water.util.RandomUtils.getRNG( get_params()._seed + 0xBAD5EED + w + 1); // to match NeuralNet behavior final double range = Math.sqrt(6. / (units[w] + units[w + 1])); for (int i = 0; i < get_weights(w).rows(); i++) { for (int j = 0; j < get_weights(w).cols(); j++) { if (get_params()._initial_weight_distribution == DeepLearningParameters.InitialWeightDistribution.UniformAdaptive) { // cf. http://machinelearning.wustl.edu/mlpapers/paper_files/AISTATS2010_GlorotB10.pdf if (w == dense_row_weights.length - 1 && _classification) get_weights(w) .set( i, j, (float) (4. * uniformDist( rng, -range, range))); // Softmax might need an extra factor 4, since it's like // a sigmoid else get_weights(w).set(i, j, (float) uniformDist(rng, -range, range)); } else if (get_params()._initial_weight_distribution == DeepLearningParameters.InitialWeightDistribution.Uniform) { get_weights(w) .set( i, j, (float) uniformDist( rng, -get_params()._initial_weight_scale, get_params()._initial_weight_scale)); } else if (get_params()._initial_weight_distribution == DeepLearningParameters.InitialWeightDistribution.Normal) { get_weights(w) .set(i, j, (float) (rng.nextGaussian() * get_params()._initial_weight_scale)); } } } } }
@Override public void map(Chunk[] chks) { _gss = new double[_nclass][]; _rss = new double[_nclass][]; // For all tree/klasses for (int k = 0; k < _nclass; k++) { final DTree tree = _trees[k]; final int leaf = _leafs[k]; if (tree == null) continue; // Empty class is ignored // A leaf-biased array of all active Tree leaves. final double gs[] = _gss[k] = new double[tree._len - leaf]; final double rs[] = _rss[k] = new double[tree._len - leaf]; final Chunk nids = chk_nids(chks, k); // Node-ids for this tree/class final Chunk ress = chk_work(chks, k); // Residuals for this tree/class // If we have all constant responses, then we do not split even the // root and the residuals should be zero. if (tree.root() instanceof LeafNode) continue; for (int row = 0; row < nids._len; row++) { // For all rows int nid = (int) nids.at80(row); // Get Node to decide from if (nid < 0) continue; // Missing response if (tree.node(nid) instanceof UndecidedNode) // If we bottomed out the tree nid = tree.node(nid)._pid; // Then take parent's decision DecidedNode dn = tree.decided(nid); // Must have a decision point if (dn._split._col == -1) // Unable to decide? dn = tree.decided(nid = dn._pid); // Then take parent's decision int leafnid = dn.ns(chks, row); // Decide down to a leafnode assert leaf <= leafnid && leafnid < tree._len; assert tree.node(leafnid) instanceof LeafNode; // Note: I can which leaf/region I end up in, but I do not care for // the prediction presented by the tree. For GBM, we compute the // sum-of-residuals (and sum/abs/mult residuals) for all rows in the // leaf, and get our prediction from that. nids.set0(row, leafnid); assert !ress.isNA0(row); double res = ress.at0(row); double ares = Math.abs(res); gs[leafnid - leaf] += _nclass > 1 ? ares * (1 - ares) : 1; rs[leafnid - leaf] += res; } } }
@Override public float progress() { if (_status == Status.Done) return 1.0f; return Math.min(0.99f, (float) ((double) _count / (double) _nchunks)); }
@Override protected void compute2() { CoxPHModel model = null; try { Scope.enter(); _parms.read_lock_frames(CoxPH.this); init(true); applyScoringFrameSideEffects(); // The model to be built model = new CoxPHModel(dest(), _parms, new CoxPHModel.CoxPHOutput(CoxPH.this)); model.delete_and_lock(_key); applyTrainingFrameSideEffects(); int nResponses = 1; boolean useAllFactorLevels = false; final DataInfo dinfo = new DataInfo( Key.make(), _modelBuilderTrain, null, nResponses, useAllFactorLevels, DataInfo.TransformType.DEMEAN, TransformType.NONE, true, false, false, false, false, false); initStats(model, dinfo); final int n_offsets = (model._parms.offset_columns == null) ? 0 : model._parms.offset_columns.length; final int n_coef = dinfo.fullN() - n_offsets; final double[] step = MemoryManager.malloc8d(n_coef); final double[] oldCoef = MemoryManager.malloc8d(n_coef); final double[] newCoef = MemoryManager.malloc8d(n_coef); Arrays.fill(step, Double.NaN); Arrays.fill(oldCoef, Double.NaN); for (int j = 0; j < n_coef; ++j) newCoef[j] = model._parms.init; double oldLoglik = -Double.MAX_VALUE; final int n_time = (int) (model._output.max_time - model._output.min_time + 1); final boolean has_start_column = (model._parms.start_column != null); final boolean has_weights_column = (model._parms.weights_column != null); for (int i = 0; i <= model._parms.iter_max; ++i) { model._output.iter = i; final CoxPHTask coxMR = new CoxPHTask( self(), dinfo, newCoef, model._output.min_time, n_time, n_offsets, has_start_column, has_weights_column) .doAll(dinfo._adaptedFrame); final double newLoglik = calcLoglik(model, coxMR); if (newLoglik > oldLoglik) { if (i == 0) calcCounts(model, coxMR); calcModelStats(model, newCoef, newLoglik); calcCumhaz_0(model, coxMR); if (newLoglik == 0) model._output.lre = -Math.log10(Math.abs(oldLoglik - newLoglik)); else model._output.lre = -Math.log10(Math.abs((oldLoglik - newLoglik) / newLoglik)); if (model._output.lre >= model._parms.lre_min) break; Arrays.fill(step, 0); for (int j = 0; j < n_coef; ++j) for (int k = 0; k < n_coef; ++k) step[j] -= model._output.var_coef[j][k] * model._output.gradient[k]; for (int j = 0; j < n_coef; ++j) if (Double.isNaN(step[j]) || Double.isInfinite(step[j])) break; oldLoglik = newLoglik; System.arraycopy(newCoef, 0, oldCoef, 0, oldCoef.length); } else { for (int j = 0; j < n_coef; ++j) step[j] /= 2; } for (int j = 0; j < n_coef; ++j) newCoef[j] = oldCoef[j] - step[j]; } model.update(_key); } catch (Throwable t) { Job thisJob = DKV.getGet(_key); if (thisJob._state == JobState.CANCELLED) { Log.info("Job cancelled by user."); } else { t.printStackTrace(); failed(t); throw t; } } finally { updateModelOutput(); _parms.read_unlock_frames(CoxPH.this); Scope.exit(); done(); // Job done! } tryComplete(); }
@Override protected void processRow(long gid, Row row) { n++; double[] response = row.response; int ncats = row.nBins; int[] cats = row.numIds; double[] nums = row.numVals; final double weight = _has_weights_column ? response[0] : 1.0; if (weight <= 0) throw new IllegalArgumentException("weights must be positive values"); final long event = (long) response[response.length - 1]; final int t1 = _has_start_column ? (int) (((long) response[response.length - 3] + 1) - _min_time) : -1; final int t2 = (int) (((long) response[response.length - 2]) - _min_time); if (t1 > t2) throw new IllegalArgumentException("start times must be strictly less than stop times"); final int numStart = _dinfo.numStart(); sumWeights += weight; for (int j = 0; j < ncats; ++j) sumWeightedCatX[cats[j]] += weight; for (int j = 0; j < nums.length; ++j) sumWeightedNumX[j] += weight * nums[j]; double logRisk = 0; for (int j = 0; j < ncats; ++j) logRisk += _beta[cats[j]]; for (int j = 0; j < nums.length - _n_offsets; ++j) logRisk += nums[j] * _beta[numStart + j]; for (int j = nums.length - _n_offsets; j < nums.length; ++j) logRisk += nums[j]; final double risk = weight * Math.exp(logRisk); logRisk *= weight; if (event > 0) { countEvents[t2]++; sizeEvents[t2] += weight; sumLogRiskEvents[t2] += logRisk; sumRiskEvents[t2] += risk; } else sizeCensored[t2] += weight; if (_has_start_column) { for (int t = t1; t <= t2; ++t) sizeRiskSet[t] += weight; for (int t = t1; t <= t2; ++t) rcumsumRisk[t] += risk; } else { sizeRiskSet[t2] += weight; rcumsumRisk[t2] += risk; } final int ntotal = ncats + (nums.length - _n_offsets); final int numStartIter = numStart - ncats; for (int jit = 0; jit < ntotal; ++jit) { final boolean jIsCat = jit < ncats; final int j = jIsCat ? cats[jit] : numStartIter + jit; final double x1 = jIsCat ? 1.0 : nums[jit - ncats]; final double xRisk = x1 * risk; if (event > 0) { sumXEvents[t2][j] += weight * x1; sumXRiskEvents[t2][j] += xRisk; } if (_has_start_column) { for (int t = t1; t <= t2; ++t) rcumsumXRisk[t][j] += xRisk; } else { rcumsumXRisk[t2][j] += xRisk; } for (int kit = 0; kit < ntotal; ++kit) { final boolean kIsCat = kit < ncats; final int k = kIsCat ? cats[kit] : numStartIter + kit; final double x2 = kIsCat ? 1.0 : nums[kit - ncats]; final double xxRisk = x2 * xRisk; if (event > 0) sumXXRiskEvents[t2][j][k] += xxRisk; if (_has_start_column) { for (int t = t1; t <= t2; ++t) rcumsumXXRisk[t][j][k] += xxRisk; } else { rcumsumXXRisk[t2][j][k] += xxRisk; } } } }
/** * Main constructor * * @param params Model parameters * @param dinfo Data Info * @param nClasses number of classes (1 for regression, 0 for autoencoder) * @param train User-given training data frame, prepared by AdaptTestTrain * @param valid User-specified validation data frame, prepared by AdaptTestTrain */ public DeepLearningModelInfo( final DeepLearningParameters params, final DataInfo dinfo, int nClasses, Frame train, Frame valid) { _classification = nClasses > 1; _train = train; _valid = valid; data_info = dinfo; parameters = (DeepLearningParameters) params.clone(); // make a copy, don't change model's parameters DeepLearningParameters.Sanity.modifyParms( parameters, parameters, nClasses); // sanitize the model_info's parameters final int num_input = dinfo.fullN(); final int num_output = get_params()._autoencoder ? num_input : (_classification ? train.lastVec().cardinality() : 1); if (!get_params()._autoencoder) assert (num_output == nClasses); _saw_missing_cats = dinfo._cats > 0 ? new boolean[data_info._cats] : null; assert (num_input > 0); assert (num_output > 0); if (has_momenta() && adaDelta()) throw new IllegalArgumentException( "Cannot have non-zero momentum and adaptive rate at the same time."); final int layers = get_params()._hidden.length; // units (# neurons for each layer) units = new int[layers + 2]; if (get_params()._max_categorical_features <= Integer.MAX_VALUE - dinfo._nums) units[0] = Math.min(dinfo._nums + get_params()._max_categorical_features, num_input); else units[0] = num_input; System.arraycopy(get_params()._hidden, 0, units, 1, layers); units[layers + 1] = num_output; boolean printLevels = units[0] > 1000L; boolean warn = units[0] > 100000L; if (printLevels) { final String[][] domains = dinfo._adaptedFrame.domains(); int[] levels = new int[domains.length]; for (int i = 0; i < levels.length; ++i) { levels[i] = domains[i] != null ? domains[i].length : 0; } Arrays.sort(levels); if (warn) { Log.warn( "==================================================================================================================================="); Log.warn( num_input + " input features" + (dinfo._cats > 0 ? " (after categorical one-hot encoding)" : "") + ". Can be slow and require a lot of memory."); } if (levels[levels.length - 1] > 0) { int levelcutoff = levels[levels.length - 1 - Math.min(10, levels.length - 1)]; int count = 0; for (int i = 0; i < dinfo._adaptedFrame.numCols() - (get_params()._autoencoder ? 0 : 1) && count < 10; ++i) { if (dinfo._adaptedFrame.domains()[i] != null && dinfo._adaptedFrame.domains()[i].length >= levelcutoff) { if (warn) { Log.warn( "Categorical feature '" + dinfo._adaptedFrame._names[i] + "' has cardinality " + dinfo._adaptedFrame.domains()[i].length + "."); } else { Log.info( "Categorical feature '" + dinfo._adaptedFrame._names[i] + "' has cardinality " + dinfo._adaptedFrame.domains()[i].length + "."); } } count++; } } if (warn) { Log.warn("Suggestions:"); Log.warn(" *) Limit the size of the first hidden layer"); if (dinfo._cats > 0) { Log.warn( " *) Limit the total number of one-hot encoded features with the parameter 'max_categorical_features'"); Log.warn( " *) Run h2o.interaction(...,pairwise=F) on high-cardinality categorical columns to limit the factor count, see http://learn.h2o.ai"); } Log.warn( "==================================================================================================================================="); } } // weights (to connect layers) dense_row_weights = new Storage.DenseRowMatrix[layers + 1]; dense_col_weights = new Storage.DenseColMatrix[layers + 1]; // decide format of weight matrices row-major or col-major if (get_params()._col_major) dense_col_weights[0] = new Storage.DenseColMatrix(units[1], units[0]); else dense_row_weights[0] = new Storage.DenseRowMatrix(units[1], units[0]); for (int i = 1; i <= layers; ++i) dense_row_weights[i] = new Storage.DenseRowMatrix(units[i + 1] /*rows*/, units[i] /*cols*/); // biases (only for hidden layers and output layer) biases = new Storage.DenseVector[layers + 1]; for (int i = 0; i <= layers; ++i) biases[i] = new Storage.DenseVector(units[i + 1]); // average activation (only for hidden layers) if (get_params()._autoencoder && get_params()._sparsity_beta > 0) { avg_activations = new Storage.DenseVector[layers]; mean_a = new float[layers]; for (int i = 0; i < layers; ++i) avg_activations[i] = new Storage.DenseVector(units[i + 1]); } allocateHelperArrays(); // for diagnostics mean_rate = new float[units.length]; rms_rate = new float[units.length]; mean_bias = new float[units.length]; rms_bias = new float[units.length]; mean_weight = new float[units.length]; rms_weight = new float[units.length]; }
// public final double[] rproxgrad_x(double[] u, double alpha) { return rproxgrad(u, alpha, // _gamma_x, _regularization_x, RandomUtils.getRNG(_seed)); } // public final double[] rproxgrad_y(double[] u, double alpha) { return rproxgrad(u, alpha, // _gamma_y, _regularization_y, RandomUtils.getRNG(_seed)); } static double[] rproxgrad( double[] u, double alpha, double gamma, Regularizer regularization, Random rand) { if (u == null || alpha == 0 || gamma == 0) return u; double[] v = new double[u.length]; switch (regularization) { case None: return u; case Quadratic: for (int i = 0; i < u.length; i++) v[i] = u[i] / (1 + 2 * alpha * gamma); return v; case L2: // Proof uses Moreau decomposition; see section 6.5.1 of Parikh and Boyd // https://web.stanford.edu/~boyd/papers/pdf/prox_algs.pdf double weight = 1 - alpha * gamma / ArrayUtils.l2norm(u); if (weight < 0) return v; // Zero vector for (int i = 0; i < u.length; i++) v[i] = weight * u[i]; return v; case L1: for (int i = 0; i < u.length; i++) v[i] = Math.max(u[i] - alpha * gamma, 0) + Math.min(u[i] + alpha * gamma, 0); return v; case NonNegative: for (int i = 0; i < u.length; i++) v[i] = Math.max(u[i], 0); return v; case OneSparse: int idx = ArrayUtils.maxIndex(u, rand); v[idx] = u[idx] > 0 ? u[idx] : 1e-6; return v; case UnitOneSparse: idx = ArrayUtils.maxIndex(u, rand); v[idx] = 1; return v; case Simplex: // Proximal gradient algorithm by Chen and Ye in http://arxiv.org/pdf/1101.6081v2.pdf // 1) Sort input vector u in ascending order: u[1] <= ... <= u[n] int n = u.length; int[] idxs = new int[n]; for (int i = 0; i < n; i++) idxs[i] = i; ArrayUtils.sort(idxs, u); // 2) Calculate cumulative sum of u in descending order // cumsum(u) = (..., u[n-2]+u[n-1]+u[n], u[n-1]+u[n], u[n]) double[] ucsum = new double[n]; ucsum[n - 1] = u[idxs[n - 1]]; for (int i = n - 2; i >= 0; i--) ucsum[i] = ucsum[i + 1] + u[idxs[i]]; // 3) Let t_i = (\sum_{j=i+1}^n u[j] - 1)/(n - i) // For i = n-1,...,1, set optimal t* to first t_i >= u[i] double t = (ucsum[0] - 1) / n; // Default t* = (\sum_{j=1}^n u[j] - 1)/n for (int i = n - 1; i >= 1; i--) { double tmp = (ucsum[i] - 1) / (n - i); if (tmp >= u[idxs[i - 1]]) { t = tmp; break; } } // 4) Return max(u - t*, 0) as projection of u onto simplex double[] x = new double[u.length]; for (int i = 0; i < u.length; i++) x[i] = Math.max(u[i] - t, 0); return x; default: throw new RuntimeException("Unknown regularization function " + regularization); } }
private void randomRow( Vec[] vecs, Random rand, double[] center, double[] means, double[] mults, int[] modes) { long row = Math.max(0, (long) (rand.nextDouble() * vecs[0].length()) - 1); data(center, vecs, row, means, mults, modes); }
protected double calcLoglik(CoxPHModel model, final CoxPHTask coxMR) { CoxPHModel.CoxPHParameters p = model._parms; CoxPHModel.CoxPHOutput o = model._output; final int n_coef = o.coef.length; final int n_time = coxMR.sizeEvents.length; double newLoglik = 0; for (int j = 0; j < n_coef; ++j) o.gradient[j] = 0; for (int j = 0; j < n_coef; ++j) for (int k = 0; k < n_coef; ++k) o.hessian[j][k] = 0; switch (p.ties) { case efron: final double[] newLoglik_t = MemoryManager.malloc8d(n_time); final double[][] gradient_t = malloc2DArray(n_time, n_coef); final double[][][] hessian_t = malloc3DArray(n_time, n_coef, n_coef); ForkJoinTask[] fjts = new ForkJoinTask[n_time]; for (int t = n_time - 1; t >= 0; --t) { final int _t = t; fjts[t] = new RecursiveAction() { @Override protected void compute() { final double sizeEvents_t = coxMR.sizeEvents[_t]; if (sizeEvents_t > 0) { final long countEvents_t = coxMR.countEvents[_t]; final double sumLogRiskEvents_t = coxMR.sumLogRiskEvents[_t]; final double sumRiskEvents_t = coxMR.sumRiskEvents[_t]; final double rcumsumRisk_t = coxMR.rcumsumRisk[_t]; final double avgSize = sizeEvents_t / countEvents_t; newLoglik_t[_t] = sumLogRiskEvents_t; System.arraycopy(coxMR.sumXEvents[_t], 0, gradient_t[_t], 0, n_coef); for (long e = 0; e < countEvents_t; ++e) { final double frac = ((double) e) / ((double) countEvents_t); final double term = rcumsumRisk_t - frac * sumRiskEvents_t; newLoglik_t[_t] -= avgSize * Math.log(term); for (int j = 0; j < n_coef; ++j) { final double djTerm = coxMR.rcumsumXRisk[_t][j] - frac * coxMR.sumXRiskEvents[_t][j]; final double djLogTerm = djTerm / term; gradient_t[_t][j] -= avgSize * djLogTerm; for (int k = 0; k < n_coef; ++k) { final double dkTerm = coxMR.rcumsumXRisk[_t][k] - frac * coxMR.sumXRiskEvents[_t][k]; final double djkTerm = coxMR.rcumsumXXRisk[_t][j][k] - frac * coxMR.sumXXRiskEvents[_t][j][k]; hessian_t[_t][j][k] -= avgSize * (djkTerm / term - (djLogTerm * (dkTerm / term))); } } } } } }; } ForkJoinTask.invokeAll(fjts); for (int t = 0; t < n_time; ++t) newLoglik += newLoglik_t[t]; for (int t = 0; t < n_time; ++t) for (int j = 0; j < n_coef; ++j) o.gradient[j] += gradient_t[t][j]; for (int t = 0; t < n_time; ++t) for (int j = 0; j < n_coef; ++j) for (int k = 0; k < n_coef; ++k) o.hessian[j][k] += hessian_t[t][j][k]; break; case breslow: for (int t = n_time - 1; t >= 0; --t) { final double sizeEvents_t = coxMR.sizeEvents[t]; if (sizeEvents_t > 0) { final double sumLogRiskEvents_t = coxMR.sumLogRiskEvents[t]; final double rcumsumRisk_t = coxMR.rcumsumRisk[t]; newLoglik += sumLogRiskEvents_t; newLoglik -= sizeEvents_t * Math.log(rcumsumRisk_t); for (int j = 0; j < n_coef; ++j) { final double dlogTerm = coxMR.rcumsumXRisk[t][j] / rcumsumRisk_t; o.gradient[j] += coxMR.sumXEvents[t][j]; o.gradient[j] -= sizeEvents_t * dlogTerm; for (int k = 0; k < n_coef; ++k) o.hessian[j][k] -= sizeEvents_t * (((coxMR.rcumsumXXRisk[t][j][k] / rcumsumRisk_t) - (dlogTerm * (coxMR.rcumsumXRisk[t][k] / rcumsumRisk_t)))); } } } break; default: throw new IllegalArgumentException("ties method must be either efron or breslow"); } return newLoglik; }
// Progress reporting for the job/progress page @Override public float progress() { return Math.min(1f, _iteration / (float) _maxIter); }