// Stopping criteria boolean isDone(KMeansModel model, double[][] newCenters, double[][] oldCenters) { if (!isRunning()) return true; // Stopped/cancelled // Stopped for running out iterations if (model._output._iterations >= _parms._max_iterations) return true; // Compute average change in standardized cluster centers if (oldCenters == null) return false; // No prior iteration, not stopping double average_change = 0; for (int clu = 0; clu < _parms._k; clu++) average_change += hex.genmodel.GenModel.KMeans_distance( oldCenters[clu], newCenters[clu], _isCats, null, null); average_change /= _parms._k; // Average change per cluster model._output._avg_centroids_chg = ArrayUtils.copyAndFillOf( model._output._avg_centroids_chg, model._output._avg_centroids_chg.length + 1, average_change); model._output._training_time_ms = ArrayUtils.copyAndFillOf( model._output._training_time_ms, model._output._training_time_ms.length + 1, System.currentTimeMillis()); return average_change < TOLERANCE; }
protected void calcCounts(CoxPHModel model, final CoxPHTask coxMR) { CoxPHModel.CoxPHParameters p = model._parms; CoxPHModel.CoxPHOutput o = model._output; o.n_missing = o.n - coxMR.n; o.n = coxMR.n; for (int j = 0; j < o.x_mean_cat.length; j++) o.x_mean_cat[j] = coxMR.sumWeightedCatX[j] / coxMR.sumWeights; for (int j = 0; j < o.x_mean_num.length; j++) o.x_mean_num[j] = coxMR.dinfo()._normSub[j] + coxMR.sumWeightedNumX[j] / coxMR.sumWeights; System.arraycopy( coxMR.dinfo()._normSub, o.x_mean_num.length, o.mean_offset, 0, o.mean_offset.length); int nz = 0; for (int t = 0; t < coxMR.countEvents.length; ++t) { o.total_event += coxMR.countEvents[t]; if (coxMR.sizeEvents[t] > 0 || coxMR.sizeCensored[t] > 0) { o.time[nz] = o.min_time + t; o.n_risk[nz] = coxMR.sizeRiskSet[t]; o.n_event[nz] = coxMR.sizeEvents[t]; o.n_censor[nz] = coxMR.sizeCensored[t]; nz++; } } if (p.start_column == null) for (int t = o.n_risk.length - 2; t >= 0; --t) o.n_risk[t] += o.n_risk[t + 1]; }
protected void initStats(final CoxPHModel model, final DataInfo dinfo) { CoxPHModel.CoxPHParameters p = model._parms; CoxPHModel.CoxPHOutput o = model._output; o.n = p.stop_column.length(); o.data_info = dinfo; final int n_offsets = (p.offset_columns == null) ? 0 : p.offset_columns.length; final int n_coef = o.data_info.fullN() - n_offsets; final String[] coefNames = o.data_info.coefNames(); o.coef_names = new String[n_coef]; System.arraycopy(coefNames, 0, o.coef_names, 0, n_coef); o.coef = MemoryManager.malloc8d(n_coef); o.exp_coef = MemoryManager.malloc8d(n_coef); o.exp_neg_coef = MemoryManager.malloc8d(n_coef); o.se_coef = MemoryManager.malloc8d(n_coef); o.z_coef = MemoryManager.malloc8d(n_coef); o.gradient = MemoryManager.malloc8d(n_coef); o.hessian = malloc2DArray(n_coef, n_coef); o.var_coef = malloc2DArray(n_coef, n_coef); o.x_mean_cat = MemoryManager.malloc8d(n_coef - (o.data_info._nums - n_offsets)); o.x_mean_num = MemoryManager.malloc8d(o.data_info._nums - n_offsets); o.mean_offset = MemoryManager.malloc8d(n_offsets); o.offset_names = new String[n_offsets]; System.arraycopy(coefNames, n_coef, o.offset_names, 0, n_offsets); final Vec start_column = p.start_column; final Vec stop_column = p.stop_column; o.min_time = p.start_column == null ? (long) stop_column.min() : (long) start_column.min() + 1; o.max_time = (long) stop_column.max(); final int n_time = new Vec.CollectDomain().doAll(stop_column).domain().length; o.time = MemoryManager.malloc8(n_time); o.n_risk = MemoryManager.malloc8d(n_time); o.n_event = MemoryManager.malloc8d(n_time); o.n_censor = MemoryManager.malloc8d(n_time); o.cumhaz_0 = MemoryManager.malloc8d(n_time); o.var_cumhaz_1 = MemoryManager.malloc8d(n_time); o.var_cumhaz_2 = malloc2DArray(n_time, n_coef); }
private static double[][] destandardize( double[][] centers, String[][] isCats, double[] means, double[] mults) { int K = centers.length; int N = centers[0].length; double[][] value = new double[K][N]; for (int clu = 0; clu < K; clu++) { System.arraycopy(centers[clu], 0, value[clu], 0, N); if (mults != null) { // Reverse standardization for (int col = 0; col < N; col++) if (isCats[col] == null) value[clu][col] = value[clu][col] / mults[col] + means[col]; } } return value; }
public static class PCAParameters extends Model.Parameters { public DataInfo.TransformType _transform = DataInfo.TransformType.NONE; // Data transformation public Method _pca_method = Method.GramSVD; // Method for computing PCA public int _k = 1; // Number of principal components public int _max_iterations = 1000; // Max iterations public long _seed = System.nanoTime(); // RNG seed // public Key<Frame> _loading_key; public String _loading_name; // Loading only generated if pca_method = Power public boolean _keep_loading = true; public boolean _use_all_factor_levels = false; // When expanding categoricals, should first level be kept or dropped? public boolean _compute_metrics = true; // Should a second pass be made through data to compute metrics? public enum Method { GramSVD, Power, GLRM } }
/** * Train a Deep Learning neural net model * * @param model Input model (e.g., from initModel(), or from a previous training run) * @return Trained model */ public final DeepLearningModel trainModel(DeepLearningModel model) { Frame validScoreFrame = null; Frame train, trainScoreFrame; try { // if (checkpoint == null && !quiet_mode) logStart(); //if checkpoint is given, some // Job's params might be uninitialized (but the restarted model's parameters are correct) if (model == null) { model = DKV.get(dest()).get(); } Log.info( "Model category: " + (_parms._autoencoder ? "Auto-Encoder" : isClassifier() ? "Classification" : "Regression")); final long model_size = model.model_info().size(); Log.info( "Number of model parameters (weights/biases): " + String.format("%,d", model_size)); model.write_lock(_job); _job.update(0, "Setting up training data..."); final DeepLearningParameters mp = model.model_info().get_params(); // temporary frames of the same "name" as the orig _train/_valid (asking the parameter's // Key, not the actual frame) // Note: don't put into DKV or they would overwrite the _train/_valid frames! Frame tra_fr = new Frame(mp._train, _train.names(), _train.vecs()); Frame val_fr = _valid != null ? new Frame(mp._valid, _valid.names(), _valid.vecs()) : null; train = tra_fr; if (model._output.isClassifier() && mp._balance_classes) { _job.update(0, "Balancing class distribution of training data..."); float[] trainSamplingFactors = new float [train .lastVec() .domain() .length]; // leave initialized to 0 -> will be filled up below if (mp._class_sampling_factors != null) { if (mp._class_sampling_factors.length != train.lastVec().domain().length) throw new IllegalArgumentException( "class_sampling_factors must have " + train.lastVec().domain().length + " elements"); trainSamplingFactors = mp._class_sampling_factors.clone(); // clone: don't modify the original } train = sampleFrameStratified( train, train.lastVec(), train.vec(model._output.weightsName()), trainSamplingFactors, (long) (mp._max_after_balance_size * train.numRows()), mp._seed, true, false); Vec l = train.lastVec(); Vec w = train.vec(model._output.weightsName()); MRUtils.ClassDist cd = new MRUtils.ClassDist(l); model._output._modelClassDist = _weights != null ? cd.doAll(l, w).rel_dist() : cd.doAll(l).rel_dist(); } model.training_rows = train.numRows(); if (_weights != null && _weights.min() == 0 && _weights.max() == 1 && _weights.isInt()) { model.training_rows = Math.round(train.numRows() * _weights.mean()); Log.warn( "Not counting " + (train.numRows() - model.training_rows) + " rows with weight=0 towards an epoch."); } Log.info("One epoch corresponds to " + model.training_rows + " training data rows."); trainScoreFrame = sampleFrame( train, mp._score_training_samples, mp._seed); // training scoring dataset is always sampled uniformly from the training // dataset if (trainScoreFrame != train) Scope.track(trainScoreFrame); if (!_parms._quiet_mode) Log.info("Number of chunks of the training data: " + train.anyVec().nChunks()); if (val_fr != null) { model.validation_rows = val_fr.numRows(); // validation scoring dataset can be sampled in multiple ways from the given validation // dataset if (model._output.isClassifier() && mp._balance_classes && mp._score_validation_sampling == DeepLearningParameters.ClassSamplingMethod.Stratified) { _job.update(0, "Sampling validation data (stratified)..."); validScoreFrame = sampleFrameStratified( val_fr, val_fr.lastVec(), val_fr.vec(model._output.weightsName()), null, mp._score_validation_samples > 0 ? mp._score_validation_samples : val_fr.numRows(), mp._seed + 1, false /* no oversampling */, false); } else { _job.update(0, "Sampling validation data..."); validScoreFrame = sampleFrame(val_fr, mp._score_validation_samples, mp._seed + 1); if (validScoreFrame != val_fr) Scope.track(validScoreFrame); } if (!_parms._quiet_mode) Log.info( "Number of chunks of the validation data: " + validScoreFrame.anyVec().nChunks()); } // Set train_samples_per_iteration size (cannot be done earlier since this depends on // whether stratified sampling is done) model.actual_train_samples_per_iteration = computeTrainSamplesPerIteration(mp, model.training_rows, model); // Determine whether shuffling is enforced if (mp._replicate_training_data && (model.actual_train_samples_per_iteration == model.training_rows * (mp._single_node_mode ? 1 : H2O.CLOUD.size())) && !mp._shuffle_training_data && H2O.CLOUD.size() > 1 && !mp._reproducible) { if (!mp._quiet_mode) Log.info( "Enabling training data shuffling, because all nodes train on the full dataset (replicated training data)."); mp._shuffle_training_data = true; } if (!mp._shuffle_training_data && model.actual_train_samples_per_iteration == model.training_rows && train.anyVec().nChunks() == 1) { if (!mp._quiet_mode) Log.info( "Enabling training data shuffling to avoid training rows in the same order over and over (no Hogwild since there's only 1 chunk)."); mp._shuffle_training_data = true; } // if (!mp._quiet_mode) Log.info("Initial model:\n" + model.model_info()); long now = System.currentTimeMillis(); model._timeLastIterationEnter = now; if (_parms._autoencoder) { _job.update(0, "Scoring null model of autoencoder..."); if (!mp._quiet_mode) Log.info("Scoring the null model of the autoencoder."); model.doScoring( trainScoreFrame, validScoreFrame, _job._key, 0, false); // get the null model reconstruction error } // put the initial version of the model into DKV model.update(_job); model.total_setup_time_ms += now - _job.start_time(); Log.info("Total setup time: " + PrettyPrint.msecs(model.total_setup_time_ms, true)); Log.info("Starting to train the Deep Learning model."); _job.update(0, "Training..."); // main loop for (; ; ) { model.iterations++; model.set_model_info( mp._epochs == 0 ? model.model_info() : H2O.CLOUD.size() > 1 && mp._replicate_training_data ? (mp._single_node_mode ? new DeepLearningTask2( _job._key, train, model.model_info(), rowFraction(train, mp, model), model.iterations) .doAll(Key.make(H2O.SELF)) .model_info() : // replicated data + single node mode new DeepLearningTask2( _job._key, train, model.model_info(), rowFraction(train, mp, model), model.iterations) .doAllNodes() .model_info()) : // replicated data + multi-node mode new DeepLearningTask( _job._key, model.model_info(), rowFraction(train, mp, model), model.iterations) .doAll(train) .model_info()); // distributed data (always in multi-node mode) if (stop_requested() && !timeout()) break; // cancellation if (!model.doScoring( trainScoreFrame, validScoreFrame, _job._key, model.iterations, false)) break; // finished training (or early stopping or convergence) if (timeout()) break; // stop after scoring } // replace the model with the best model so far (if it's better) if (!stop_requested() && _parms._overwrite_with_best_model && model.actual_best_model_key != null && _parms._nfolds == 0) { DeepLearningModel best_model = DKV.getGet(model.actual_best_model_key); if (best_model != null && best_model.loss() < model.loss() && Arrays.equals(best_model.model_info().units, model.model_info().units)) { if (!_parms._quiet_mode) Log.info("Setting the model to be the best model so far (based on scoring history)."); DeepLearningModelInfo mi = best_model.model_info().deep_clone(); // Don't cheat - count full amount of training samples, since that's the amount of // training it took to train (without finding anything better) mi.set_processed_global(model.model_info().get_processed_global()); mi.set_processed_local(model.model_info().get_processed_local()); model.set_model_info(mi); model.update(_job); model.doScoring(trainScoreFrame, validScoreFrame, _job._key, model.iterations, true); assert (best_model.loss() == model.loss()); } } // store coefficient names for future use // possibly change model.model_info().data_info().coefNames(); if (!_parms._quiet_mode) { Log.info( "=============================================================================================================================================================================="); if (stop_requested()) { Log.info("Deep Learning model training was interrupted."); } else { Log.info("Finished training the Deep Learning model."); Log.info(model); } Log.info( "=============================================================================================================================================================================="); } } finally { if (model != null) { model.deleteElasticAverageModels(); model.unlock(_job); if (model.actual_best_model_key != null) { assert (model.actual_best_model_key != model._key); DKV.remove(model.actual_best_model_key); } } } return model; }
protected double doScoringAndSaveModel( boolean finalScoring, boolean oob, boolean build_tree_one_node) { double training_r2 = Double.NaN; // Training R^2 value, if computed long now = System.currentTimeMillis(); if (_firstScore == 0) _firstScore = now; long sinceLastScore = now - _timeLastScoreStart; boolean updated = false; new ProgressUpdate( "Built " + _model._output._ntrees + " trees so far (out of " + _parms._ntrees + ").") .fork(_progressKey); // Now model already contains tid-trees in serialized form if (_parms._score_each_iteration || finalScoring || (now - _firstScore < 4000) || // Score every time for 4 secs // Throttle scoring to keep the cost sane; limit to a 10% duty cycle & every 4 secs (sinceLastScore > 4000 && // Limit scoring updates to every 4sec (double) (_timeLastScoreEnd - _timeLastScoreStart) / sinceLastScore < 0.1)) { // 10% duty cycle checkMemoryFootPrint(); // If validation is specified we use a model for scoring, so we need to // update it! First we save model with trees (i.e., make them available // for scoring) and then update it with resulting error _model.update(_key); updated = true; Log.info("============================================================== "); SharedTreeModel.SharedTreeOutput out = _model._output; _timeLastScoreStart = now; // Score on training data new ProgressUpdate("Scoring the model.").fork(_progressKey); Score sc = new Score(this, true, oob, _model._output.getModelCategory()) .doAll(train(), build_tree_one_node); ModelMetrics mm = sc.makeModelMetrics(_model, _parms.train()); out._training_metrics = mm; if (oob) out._training_metrics._description = "Metrics reported on Out-Of-Bag training samples"; out._scored_train[out._ntrees].fillFrom(mm); if (out._ntrees > 0) Log.info("Training " + out._scored_train[out._ntrees].toString()); // Score again on validation data if (_parms._valid != null) { Score scv = new Score(this, false, false, _model._output.getModelCategory()) .doAll(valid(), build_tree_one_node); ModelMetrics mmv = scv.makeModelMetrics(_model, _parms.valid()); out._validation_metrics = mmv; out._scored_valid[out._ntrees].fillFrom(mmv); if (out._ntrees > 0) Log.info("Validation " + out._scored_valid[out._ntrees].toString()); } if (out._ntrees > 0) { // Compute variable importances out._model_summary = createModelSummaryTable(out); out._scoring_history = createScoringHistoryTable(out); out._varimp = new hex.VarImp(_improvPerVar, out._names); out._variable_importances = hex.ModelMetrics.calcVarImp(out._varimp); Log.info(out._model_summary.toString()); // For Debugging: // Log.info(out._scoring_history.toString()); // Log.info(out._variable_importances.toString()); } ConfusionMatrix cm = mm.cm(); if (cm != null) { if (cm._cm.length <= _parms._max_confusion_matrix_size) { Log.info(cm.toASCII()); } else { Log.info( "Confusion Matrix is too large (max_confusion_matrix_size=" + _parms._max_confusion_matrix_size + "): " + _nclass + " classes."); } } _timeLastScoreEnd = System.currentTimeMillis(); } // Double update - after either scoring or variable importance if (updated) _model.update(_key); return training_r2; }
public static void userMain(String[] args) { H2O.main(args); TestUtil.stall_till_cloudsize(NODES); List<Class> tests = new ArrayList<Class>(); // Classes to test: // tests = JUnitRunner.all(); // Neural Net - deprecated // tests.add(NeuralNetSpiralsTest.class); //compare NeuralNet vs reference // tests.add(NeuralNetIrisTest.class); //compare NeuralNet vs reference // Chunk tests // tests.add(C0LChunkTest.class); // tests.add(C0DChunkTest.class); // tests.add(C1ChunkTest.class); // tests.add(C1NChunkTest.class); // tests.add(C1SChunkTest.class); // tests.add(C2ChunkTest.class); // tests.add(C2SChunkTest.class); // tests.add(C4ChunkTest.class); // tests.add(C4FChunkTest.class); // tests.add(C4SChunkTest.class); // tests.add(C8ChunkTest.class); // tests.add(C8DChunkTest.class); // tests.add(C16ChunkTest.class); // tests.add(CBSChunkTest.class); // tests.add(CX0ChunkTest.class); // tests.add(CXIChunkTest.class); // tests.add(CXDChunkTest.class); // tests.add(VecTest.class); // Deep Learning tests // tests.add(DeepLearningVsNeuralNet.class); //only passes for NODES=1, not clear why // tests.add(DeepLearningAutoEncoderTest.class); //test Deep Learning convergence // tests.add(DeepLearningAutoEncoderCategoricalTest.class); //test Deep Learning // convergence // tests.add(DeepLearningSpiralsTest.class); //test Deep Learning convergence // tests.add(DeepLearningIrisTest.Short.class); //compare Deep Learning vs reference // tests.add(DeepLearningIrisTest.Long.class); //compare Deep Learning vs reference tests.add(DeepLearningProstateTest.Short.class); // test Deep Learning // tests.add(DeepLearningMissingTest.class); //test Deep Learning // tests.add(DeepLearningProstateTest.Long.class); //test Deep Learning // tests.add(NeuronsTest.class); //test Deep Learning // tests.add(MRUtilsTest.class); //test MR sampling/rebalancing // tests.add(DropoutTest.class); //test NN Dropput // tests.add(ParserTest2.class); // tests.add(ParserTest2.ParseAllSmalldata.class); // tests.add(KMeans2Test.class); // tests.add(KMeans2RandomTest.class); // tests.add(GLMRandomTest.Short.class); // tests.add(SpeeDRFTest.class); // tests.add(SpeeDRFTest2.class); //// tests.add(GLMTest2.class); // tests.add(DRFTest.class); // tests.add(DRFTest2.class); // tests.add(GBMTest.class); // tests.add(KMeans2Test.class); // tests.add(PCATest.class); // tests.add(NetworkTestTest.class); // Uncomment this to sleep here and use the browser. // try { Thread.sleep(10000000); } catch (Exception _) {} JUnitCore junit = new JUnitCore(); junit.addListener(new LogListener()); Result result = junit.run(tests.toArray(new Class[0])); if (result.getFailures().size() == 0) { Log.info("SUCCESS!"); System.exit(0); } else { Log.info("FAIL!"); System.exit(1); } }
public static class GLRMParameters extends Model.Parameters { public String algoName() { return "GLRM"; } public String fullName() { return "Generalized Low Rank Modeling"; } public String javaName() { return GLRMModel.class.getName(); } public DataInfo.TransformType _transform = DataInfo.TransformType.NONE; // Data transformation (demean to compare with PCA) public int _k = 1; // Rank of resulting XY matrix public GLRM.Initialization _init = GLRM.Initialization.PlusPlus; // Initialization of Y matrix public SVDParameters.Method _svd_method = SVDParameters.Method.Randomized; // SVD initialization method (for _init = SVD) public Key<Frame> _user_y; // User-specified Y matrix (for _init = User) public Key<Frame> _user_x; // User-specified X matrix (for _init = User) public boolean _expand_user_y = true; // Should categorical columns in _user_y be expanded via one-hot encoding? (for _init // = User) // Loss functions public Loss _loss = Loss.Quadratic; // Default loss function for numeric cols public Loss _multi_loss = Loss.Categorical; // Default loss function for categorical cols public int _period = 1; // Length of the period when _loss = Periodic public Loss[] _loss_by_col; // Override default loss function for specific columns public int[] _loss_by_col_idx; // Regularization functions public Regularizer _regularization_x = Regularizer.None; // Regularization function for X matrix public Regularizer _regularization_y = Regularizer.None; // Regularization function for Y matrix public double _gamma_x = 0; // Regularization weight on X matrix public double _gamma_y = 0; // Regularization weight on Y matrix // Optional parameters public int _max_iterations = 1000; // Max iterations public int _max_updates = 2 * _max_iterations; // Max number of updates (X or Y) public double _init_step_size = 1.0; // Initial step size (decrease until we hit min_step_size) public double _min_step_size = 1e-4; // Min step size public long _seed = System.nanoTime(); // RNG seed @Override protected long nFoldSeed() { return _seed; } // public Key<Frame> _representation_key; // Key to save X matrix public String _representation_name; public boolean _recover_svd = false; // Recover singular values and eigenvectors of XY at the end? public boolean _impute_original = false; // Reconstruct original training data by reversing _transform? public boolean _verbose = true; // Log when objective increases each iteration? // Quadratic -> Gaussian distribution ~ exp(-(a-u)^2) // Absolute -> Laplace distribution ~ exp(-|a-u|) public enum Loss { Quadratic(true), Absolute(true), Huber(true), Poisson(true), Periodic(true), // One-dimensional loss (numeric) Logistic(true, true), Hinge(true, true), // Boolean loss (categorical) Categorical(false), Ordinal(false); // Multi-dimensional loss (categorical) private boolean forNumeric; private boolean forBinary; Loss(boolean forNumeric) { this(forNumeric, false); } Loss(boolean forNumeric, boolean forBinary) { this.forNumeric = forNumeric; this.forBinary = forBinary; } public boolean isForNumeric() { return forNumeric; } public boolean isForCategorical() { return !forNumeric; } public boolean isForBinary() { return forBinary; } } // Non-negative matrix factorization (NNMF): r_x = r_y = NonNegative // Orthogonal NNMF: r_x = OneSparse, r_y = NonNegative // K-means clustering: r_x = UnitOneSparse, r_y = 0 (\gamma_y = 0) // Quadratic mixture: r_x = Simplex, r_y = 0 (\gamma_y = 0) public enum Regularizer { None, Quadratic, L2, L1, NonNegative, OneSparse, UnitOneSparse, Simplex } // Check if all elements of _loss_by_col are equal to a specific loss function private final boolean allLossEquals(Loss loss) { if (null == _loss_by_col) return false; boolean res = true; for (int i = 0; i < _loss_by_col.length; i++) { if (_loss_by_col[i] != loss) { res = false; break; } } return res; } // Closed form solution only if quadratic loss, no regularization or quadratic regularization // (same for X and Y), and no missing values public final boolean hasClosedForm() { long na_cnt = 0; Frame train = _train.get(); for (int i = 0; i < train.numCols(); i++) na_cnt += train.vec(i).naCnt(); return hasClosedForm(na_cnt); } public final boolean hasClosedForm(long na_cnt) { boolean loss_quad = (null == _loss_by_col && _loss == Quadratic) || (null != _loss_by_col && allLossEquals(Quadratic) && (_loss_by_col.length == _train.get().numCols() || _loss == Quadratic)); return na_cnt == 0 && ((loss_quad && (_gamma_x == 0 || _regularization_x == Regularizer.None || _regularization_x == GLRMParameters.Regularizer.Quadratic) && (_gamma_y == 0 || _regularization_y == Regularizer.None || _regularization_y == GLRMParameters.Regularizer.Quadratic))); } // L(u,a): Loss function public final double loss(double u, double a) { return loss(u, a, _loss); } public final double loss(double u, double a, Loss loss) { assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics"; switch (loss) { case Quadratic: return (u - a) * (u - a); case Absolute: return Math.abs(u - a); case Huber: return Math.abs(u - a) <= 1 ? 0.5 * (u - a) * (u - a) : Math.abs(u - a) - 0.5; case Poisson: assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0"; return Math.exp(u) + (a == 0 ? 0 : -a * u + a * Math.log(a) - a); // Since \lim_{a->0} a*log(a) = 0 case Hinge: // return Math.max(1-a*u,0); return Math.max(1 - (a == 0 ? -u : u), 0); // Booleans are coded {0,1} instead of {-1,1} case Logistic: // return Math.log(1 + Math.exp(-a * u)); return Math.log( 1 + Math.exp(a == 0 ? u : -u)); // Booleans are coded {0,1} instead of {-1,1} case Periodic: return 1 - Math.cos((a - u) * (2 * Math.PI) / _period); default: throw new RuntimeException("Unknown loss function " + loss); } } // \grad_u L(u,a): Gradient of loss function with respect to u public final double lgrad(double u, double a) { return lgrad(u, a, _loss); } public final double lgrad(double u, double a, Loss loss) { assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics"; switch (loss) { case Quadratic: return 2 * (u - a); case Absolute: return Math.signum(u - a); case Huber: return Math.abs(u - a) <= 1 ? u - a : Math.signum(u - a); case Poisson: assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0"; return Math.exp(u) - a; case Hinge: // return a*u <= 1 ? -a : 0; return a == 0 ? (-u <= 1 ? 1 : 0) : (u <= 1 ? -1 : 0); // Booleans are coded as {0,1} instead of {-1,1} case Logistic: // return -a/(1+Math.exp(a*u)); return a == 0 ? 1 / (1 + Math.exp(-u)) : -1 / (1 + Math.exp(u)); // Booleans are coded as {0,1} instead of {-1,1} case Periodic: return ((2 * Math.PI) / _period) * Math.sin((a - u) * (2 * Math.PI) / _period); default: throw new RuntimeException("Unknown loss function " + loss); } } // L(u,a): Multidimensional loss function public final double mloss(double[] u, int a) { return mloss(u, a, _multi_loss); } public static double mloss(double[] u, int a, Loss multi_loss) { assert multi_loss.isForCategorical() : "Loss function " + multi_loss + " not applicable to categoricals"; if (a < 0 || a > u.length - 1) throw new IllegalArgumentException( "Index must be between 0 and " + String.valueOf(u.length - 1)); double sum = 0; switch (multi_loss) { case Categorical: for (int i = 0; i < u.length; i++) sum += Math.max(1 + u[i], 0); sum += Math.max(1 - u[a], 0) - Math.max(1 + u[a], 0); return sum; case Ordinal: for (int i = 0; i < u.length - 1; i++) sum += Math.max(a > i ? 1 - u[i] : 1, 0); return sum; default: throw new RuntimeException("Unknown multidimensional loss function " + multi_loss); } } // \grad_u L(u,a): Gradient of multidimensional loss function with respect to u public final double[] mlgrad(double[] u, int a) { return mlgrad(u, a, _multi_loss); } public static double[] mlgrad(double[] u, int a, Loss multi_loss) { assert multi_loss.isForCategorical() : "Loss function " + multi_loss + " not applicable to categoricals"; if (a < 0 || a > u.length - 1) throw new IllegalArgumentException( "Index must be between 0 and " + String.valueOf(u.length - 1)); double[] grad = new double[u.length]; switch (multi_loss) { case Categorical: for (int i = 0; i < u.length; i++) grad[i] = (1 + u[i] > 0) ? 1 : 0; grad[a] = (1 - u[a] > 0) ? -1 : 0; return grad; case Ordinal: for (int i = 0; i < u.length - 1; i++) grad[i] = (a > i && 1 - u[i] > 0) ? -1 : 0; return grad; default: throw new RuntimeException("Unknown multidimensional loss function " + multi_loss); } } // r_i(x_i), r_j(y_j): Regularization function for single row x_i or column y_j public final double regularize_x(double[] u) { return regularize(u, _regularization_x); } public final double regularize_y(double[] u) { return regularize(u, _regularization_y); } public final double regularize(double[] u, Regularizer regularization) { if (u == null) return 0; double ureg = 0; switch (regularization) { case None: return 0; case Quadratic: for (int i = 0; i < u.length; i++) ureg += u[i] * u[i]; return ureg; case L2: for (int i = 0; i < u.length; i++) ureg += u[i] * u[i]; return Math.sqrt(ureg); case L1: for (int i = 0; i < u.length; i++) ureg += Math.abs(u[i]); return ureg; case NonNegative: for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; } return 0; case OneSparse: int card = 0; for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; else if (u[i] > 0) card++; } return card == 1 ? 0 : Double.POSITIVE_INFINITY; case UnitOneSparse: int ones = 0, zeros = 0; for (int i = 0; i < u.length; i++) { if (u[i] == 1) ones++; else if (u[i] == 0) zeros++; else return Double.POSITIVE_INFINITY; } return ones == 1 && zeros == u.length - 1 ? 0 : Double.POSITIVE_INFINITY; case Simplex: double sum = 0, absum = 0; for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; else { sum += u[i]; absum += Math.abs(u[i]); } } return MathUtils.equalsWithinRecSumErr(sum, 1.0, u.length, absum) ? 0 : Double.POSITIVE_INFINITY; default: throw new RuntimeException("Unknown regularization function " + regularization); } } // \sum_i r_i(x_i): Sum of regularization function for all entries of X public final double regularize_x(double[][] u) { return regularize(u, _regularization_x); } public final double regularize_y(double[][] u) { return regularize(u, _regularization_y); } public final double regularize(double[][] u, Regularizer regularization) { if (u == null || regularization == Regularizer.None) return 0; double ureg = 0; for (int i = 0; i < u.length; i++) { ureg += regularize(u[i], regularization); if (Double.isInfinite(ureg)) return ureg; } return ureg; } // \prox_{\alpha_k*r}(u): Proximal gradient of (step size) * (regularization function) evaluated // at vector u public final double[] rproxgrad_x(double[] u, double alpha, Random rand) { return rproxgrad(u, alpha, _gamma_x, _regularization_x, rand); } public final double[] rproxgrad_y(double[] u, double alpha, Random rand) { return rproxgrad(u, alpha, _gamma_y, _regularization_y, rand); } // public final double[] rproxgrad_x(double[] u, double alpha) { return rproxgrad(u, alpha, // _gamma_x, _regularization_x, RandomUtils.getRNG(_seed)); } // public final double[] rproxgrad_y(double[] u, double alpha) { return rproxgrad(u, alpha, // _gamma_y, _regularization_y, RandomUtils.getRNG(_seed)); } static double[] rproxgrad( double[] u, double alpha, double gamma, Regularizer regularization, Random rand) { if (u == null || alpha == 0 || gamma == 0) return u; double[] v = new double[u.length]; switch (regularization) { case None: return u; case Quadratic: for (int i = 0; i < u.length; i++) v[i] = u[i] / (1 + 2 * alpha * gamma); return v; case L2: // Proof uses Moreau decomposition; see section 6.5.1 of Parikh and Boyd // https://web.stanford.edu/~boyd/papers/pdf/prox_algs.pdf double weight = 1 - alpha * gamma / ArrayUtils.l2norm(u); if (weight < 0) return v; // Zero vector for (int i = 0; i < u.length; i++) v[i] = weight * u[i]; return v; case L1: for (int i = 0; i < u.length; i++) v[i] = Math.max(u[i] - alpha * gamma, 0) + Math.min(u[i] + alpha * gamma, 0); return v; case NonNegative: for (int i = 0; i < u.length; i++) v[i] = Math.max(u[i], 0); return v; case OneSparse: int idx = ArrayUtils.maxIndex(u, rand); v[idx] = u[idx] > 0 ? u[idx] : 1e-6; return v; case UnitOneSparse: idx = ArrayUtils.maxIndex(u, rand); v[idx] = 1; return v; case Simplex: // Proximal gradient algorithm by Chen and Ye in http://arxiv.org/pdf/1101.6081v2.pdf // 1) Sort input vector u in ascending order: u[1] <= ... <= u[n] int n = u.length; int[] idxs = new int[n]; for (int i = 0; i < n; i++) idxs[i] = i; ArrayUtils.sort(idxs, u); // 2) Calculate cumulative sum of u in descending order // cumsum(u) = (..., u[n-2]+u[n-1]+u[n], u[n-1]+u[n], u[n]) double[] ucsum = new double[n]; ucsum[n - 1] = u[idxs[n - 1]]; for (int i = n - 2; i >= 0; i--) ucsum[i] = ucsum[i + 1] + u[idxs[i]]; // 3) Let t_i = (\sum_{j=i+1}^n u[j] - 1)/(n - i) // For i = n-1,...,1, set optimal t* to first t_i >= u[i] double t = (ucsum[0] - 1) / n; // Default t* = (\sum_{j=1}^n u[j] - 1)/n for (int i = n - 1; i >= 1; i--) { double tmp = (ucsum[i] - 1) / (n - i); if (tmp >= u[idxs[i - 1]]) { t = tmp; break; } } // 4) Return max(u - t*, 0) as projection of u onto simplex double[] x = new double[u.length]; for (int i = 0; i < u.length; i++) x[i] = Math.max(u[i] - t, 0); return x; default: throw new RuntimeException("Unknown regularization function " + regularization); } } // Project X,Y matrices into appropriate subspace so regularizer is finite. Used during // initialization. public final double[] project_x(double[] u, Random rand) { return project(u, _regularization_x, rand); } public final double[] project_y(double[] u, Random rand) { return project(u, _regularization_y, rand); } public final double[] project(double[] u, Regularizer regularization, Random rand) { if (u == null) return u; switch (regularization) { // Domain is all real numbers case None: case Quadratic: case L2: case L1: return u; // Proximal operator of indicator function for a set C is (Euclidean) projection onto C case NonNegative: case OneSparse: case UnitOneSparse: return rproxgrad(u, 1, 1, regularization, rand); case Simplex: double reg = regularize( u, regularization); // Check if inside simplex before projecting since algo is // complicated if (reg == 0) return u; return rproxgrad(u, 1, 1, regularization, rand); default: throw new RuntimeException("Unknown regularization function " + regularization); } } // \hat A_{i,j} = \argmin_a L_{i,j}(x_iy_j, a): Data imputation for real numeric values public final double impute(double u) { return impute(u, _loss); } public static double impute(double u, Loss loss) { assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics"; switch (loss) { case Quadratic: case Absolute: case Huber: case Periodic: return u; case Poisson: return Math.exp(u) - 1; case Hinge: case Logistic: return u > 0 ? 1 : 0; // Booleans are coded as {0,1} instead of {-1,1} default: throw new RuntimeException("Unknown loss function " + loss); } } // \hat A_{i,j} = \argmin_a L_{i,j}(x_iy_j, a): Data imputation for categorical values // {0,1,2,...} // TODO: Is there a faster way to find the loss minimizer? public final int mimpute(double[] u) { return mimpute(u, _multi_loss); } public static int mimpute(double[] u, Loss multi_loss) { assert multi_loss.isForCategorical() : "Loss function " + multi_loss + " not applicable to categoricals"; switch (multi_loss) { case Categorical: case Ordinal: double[] cand = new double[u.length]; for (int a = 0; a < cand.length; a++) cand[a] = mloss(u, a, multi_loss); return ArrayUtils.minIndex(cand); default: throw new RuntimeException("Unknown multidimensional loss function " + multi_loss); } } }
@Override protected void compute2() { CoxPHModel model = null; try { Scope.enter(); _parms.read_lock_frames(CoxPH.this); init(true); applyScoringFrameSideEffects(); // The model to be built model = new CoxPHModel(dest(), _parms, new CoxPHModel.CoxPHOutput(CoxPH.this)); model.delete_and_lock(_key); applyTrainingFrameSideEffects(); int nResponses = 1; boolean useAllFactorLevels = false; final DataInfo dinfo = new DataInfo( Key.make(), _modelBuilderTrain, null, nResponses, useAllFactorLevels, DataInfo.TransformType.DEMEAN, TransformType.NONE, true, false, false, false, false, false); initStats(model, dinfo); final int n_offsets = (model._parms.offset_columns == null) ? 0 : model._parms.offset_columns.length; final int n_coef = dinfo.fullN() - n_offsets; final double[] step = MemoryManager.malloc8d(n_coef); final double[] oldCoef = MemoryManager.malloc8d(n_coef); final double[] newCoef = MemoryManager.malloc8d(n_coef); Arrays.fill(step, Double.NaN); Arrays.fill(oldCoef, Double.NaN); for (int j = 0; j < n_coef; ++j) newCoef[j] = model._parms.init; double oldLoglik = -Double.MAX_VALUE; final int n_time = (int) (model._output.max_time - model._output.min_time + 1); final boolean has_start_column = (model._parms.start_column != null); final boolean has_weights_column = (model._parms.weights_column != null); for (int i = 0; i <= model._parms.iter_max; ++i) { model._output.iter = i; final CoxPHTask coxMR = new CoxPHTask( self(), dinfo, newCoef, model._output.min_time, n_time, n_offsets, has_start_column, has_weights_column) .doAll(dinfo._adaptedFrame); final double newLoglik = calcLoglik(model, coxMR); if (newLoglik > oldLoglik) { if (i == 0) calcCounts(model, coxMR); calcModelStats(model, newCoef, newLoglik); calcCumhaz_0(model, coxMR); if (newLoglik == 0) model._output.lre = -Math.log10(Math.abs(oldLoglik - newLoglik)); else model._output.lre = -Math.log10(Math.abs((oldLoglik - newLoglik) / newLoglik)); if (model._output.lre >= model._parms.lre_min) break; Arrays.fill(step, 0); for (int j = 0; j < n_coef; ++j) for (int k = 0; k < n_coef; ++k) step[j] -= model._output.var_coef[j][k] * model._output.gradient[k]; for (int j = 0; j < n_coef; ++j) if (Double.isNaN(step[j]) || Double.isInfinite(step[j])) break; oldLoglik = newLoglik; System.arraycopy(newCoef, 0, oldCoef, 0, oldCoef.length); } else { for (int j = 0; j < n_coef; ++j) step[j] /= 2; } for (int j = 0; j < n_coef; ++j) newCoef[j] = oldCoef[j] - step[j]; } model.update(_key); } catch (Throwable t) { Job thisJob = DKV.getGet(_key); if (thisJob._state == JobState.CANCELLED) { Log.info("Job cancelled by user."); } else { t.printStackTrace(); failed(t); throw t; } } finally { updateModelOutput(); _parms.read_unlock_frames(CoxPH.this); Scope.exit(); done(); // Job done! } tryComplete(); }