protected void calcCounts(CoxPHModel model, final CoxPHTask coxMR) { CoxPHModel.CoxPHParameters p = model._parms; CoxPHModel.CoxPHOutput o = model._output; o.n_missing = o.n - coxMR.n; o.n = coxMR.n; for (int j = 0; j < o.x_mean_cat.length; j++) o.x_mean_cat[j] = coxMR.sumWeightedCatX[j] / coxMR.sumWeights; for (int j = 0; j < o.x_mean_num.length; j++) o.x_mean_num[j] = coxMR.dinfo()._normSub[j] + coxMR.sumWeightedNumX[j] / coxMR.sumWeights; System.arraycopy( coxMR.dinfo()._normSub, o.x_mean_num.length, o.mean_offset, 0, o.mean_offset.length); int nz = 0; for (int t = 0; t < coxMR.countEvents.length; ++t) { o.total_event += coxMR.countEvents[t]; if (coxMR.sizeEvents[t] > 0 || coxMR.sizeCensored[t] > 0) { o.time[nz] = o.min_time + t; o.n_risk[nz] = coxMR.sizeRiskSet[t]; o.n_event[nz] = coxMR.sizeEvents[t]; o.n_censor[nz] = coxMR.sizeCensored[t]; nz++; } } if (p.start_column == null) for (int t = o.n_risk.length - 2; t >= 0; --t) o.n_risk[t] += o.n_risk[t + 1]; }
protected void initStats(final CoxPHModel model, final DataInfo dinfo) { CoxPHModel.CoxPHParameters p = model._parms; CoxPHModel.CoxPHOutput o = model._output; o.n = p.stop_column.length(); o.data_info = dinfo; final int n_offsets = (p.offset_columns == null) ? 0 : p.offset_columns.length; final int n_coef = o.data_info.fullN() - n_offsets; final String[] coefNames = o.data_info.coefNames(); o.coef_names = new String[n_coef]; System.arraycopy(coefNames, 0, o.coef_names, 0, n_coef); o.coef = MemoryManager.malloc8d(n_coef); o.exp_coef = MemoryManager.malloc8d(n_coef); o.exp_neg_coef = MemoryManager.malloc8d(n_coef); o.se_coef = MemoryManager.malloc8d(n_coef); o.z_coef = MemoryManager.malloc8d(n_coef); o.gradient = MemoryManager.malloc8d(n_coef); o.hessian = malloc2DArray(n_coef, n_coef); o.var_coef = malloc2DArray(n_coef, n_coef); o.x_mean_cat = MemoryManager.malloc8d(n_coef - (o.data_info._nums - n_offsets)); o.x_mean_num = MemoryManager.malloc8d(o.data_info._nums - n_offsets); o.mean_offset = MemoryManager.malloc8d(n_offsets); o.offset_names = new String[n_offsets]; System.arraycopy(coefNames, n_coef, o.offset_names, 0, n_offsets); final Vec start_column = p.start_column; final Vec stop_column = p.stop_column; o.min_time = p.start_column == null ? (long) stop_column.min() : (long) start_column.min() + 1; o.max_time = (long) stop_column.max(); final int n_time = new Vec.CollectDomain().doAll(stop_column).domain().length; o.time = MemoryManager.malloc8(n_time); o.n_risk = MemoryManager.malloc8d(n_time); o.n_event = MemoryManager.malloc8d(n_time); o.n_censor = MemoryManager.malloc8d(n_time); o.cumhaz_0 = MemoryManager.malloc8d(n_time); o.var_cumhaz_1 = MemoryManager.malloc8d(n_time); o.var_cumhaz_2 = malloc2DArray(n_time, n_coef); }
private static double[][] destandardize( double[][] centers, String[][] isCats, double[] means, double[] mults) { int K = centers.length; int N = centers[0].length; double[][] value = new double[K][N]; for (int clu = 0; clu < K; clu++) { System.arraycopy(centers[clu], 0, value[clu], 0, N); if (mults != null) { // Reverse standardization for (int col = 0; col < N; col++) if (isCats[col] == null) value[clu][col] = value[clu][col] / mults[col] + means[col]; } } return value; }
@Override protected void compute2() { CoxPHModel model = null; try { Scope.enter(); _parms.read_lock_frames(CoxPH.this); init(true); applyScoringFrameSideEffects(); // The model to be built model = new CoxPHModel(dest(), _parms, new CoxPHModel.CoxPHOutput(CoxPH.this)); model.delete_and_lock(_key); applyTrainingFrameSideEffects(); int nResponses = 1; boolean useAllFactorLevels = false; final DataInfo dinfo = new DataInfo( Key.make(), _modelBuilderTrain, null, nResponses, useAllFactorLevels, DataInfo.TransformType.DEMEAN, TransformType.NONE, true, false, false, false, false, false); initStats(model, dinfo); final int n_offsets = (model._parms.offset_columns == null) ? 0 : model._parms.offset_columns.length; final int n_coef = dinfo.fullN() - n_offsets; final double[] step = MemoryManager.malloc8d(n_coef); final double[] oldCoef = MemoryManager.malloc8d(n_coef); final double[] newCoef = MemoryManager.malloc8d(n_coef); Arrays.fill(step, Double.NaN); Arrays.fill(oldCoef, Double.NaN); for (int j = 0; j < n_coef; ++j) newCoef[j] = model._parms.init; double oldLoglik = -Double.MAX_VALUE; final int n_time = (int) (model._output.max_time - model._output.min_time + 1); final boolean has_start_column = (model._parms.start_column != null); final boolean has_weights_column = (model._parms.weights_column != null); for (int i = 0; i <= model._parms.iter_max; ++i) { model._output.iter = i; final CoxPHTask coxMR = new CoxPHTask( self(), dinfo, newCoef, model._output.min_time, n_time, n_offsets, has_start_column, has_weights_column) .doAll(dinfo._adaptedFrame); final double newLoglik = calcLoglik(model, coxMR); if (newLoglik > oldLoglik) { if (i == 0) calcCounts(model, coxMR); calcModelStats(model, newCoef, newLoglik); calcCumhaz_0(model, coxMR); if (newLoglik == 0) model._output.lre = -Math.log10(Math.abs(oldLoglik - newLoglik)); else model._output.lre = -Math.log10(Math.abs((oldLoglik - newLoglik) / newLoglik)); if (model._output.lre >= model._parms.lre_min) break; Arrays.fill(step, 0); for (int j = 0; j < n_coef; ++j) for (int k = 0; k < n_coef; ++k) step[j] -= model._output.var_coef[j][k] * model._output.gradient[k]; for (int j = 0; j < n_coef; ++j) if (Double.isNaN(step[j]) || Double.isInfinite(step[j])) break; oldLoglik = newLoglik; System.arraycopy(newCoef, 0, oldCoef, 0, oldCoef.length); } else { for (int j = 0; j < n_coef; ++j) step[j] /= 2; } for (int j = 0; j < n_coef; ++j) newCoef[j] = oldCoef[j] - step[j]; } model.update(_key); } catch (Throwable t) { Job thisJob = DKV.getGet(_key); if (thisJob._state == JobState.CANCELLED) { Log.info("Job cancelled by user."); } else { t.printStackTrace(); failed(t); throw t; } } finally { updateModelOutput(); _parms.read_unlock_frames(CoxPH.this); Scope.exit(); done(); // Job done! } tryComplete(); }