private static void run(Callable c, boolean read, int size) { // Count all i/o time from here, including all retry overheads long start_io_ms = System.currentTimeMillis(); while (true) { try { long start_ns = System.nanoTime(); // Blocking i/o call timing - without counting repeats c.call(); TimeLine.record_IOclose(start_ns, start_io_ms, read ? 1 : 0, size, Value.HDFS); break; // Explicitly ignore the following exceptions but // fail on the rest IOExceptions } catch (EOFException e) { ignoreAndWait(e, false); } catch (SocketTimeoutException e) { ignoreAndWait(e, false); } catch (S3Exception e) { // Preserve S3Exception before IOException // Since this is tricky code - we are supporting different HDFS version // New version declares S3Exception as IOException // But old versions (0.20.xxx) declares it as RuntimeException // So we have to catch it before IOException !!! ignoreAndWait(e, false); } catch (IOException e) { ignoreAndWait(e, true); } catch (Exception e) { throw Log.errRTExcept(e); } } }
protected void calcCounts(CoxPHModel model, final CoxPHTask coxMR) { CoxPHModel.CoxPHParameters p = model._parms; CoxPHModel.CoxPHOutput o = model._output; o.n_missing = o.n - coxMR.n; o.n = coxMR.n; for (int j = 0; j < o.x_mean_cat.length; j++) o.x_mean_cat[j] = coxMR.sumWeightedCatX[j] / coxMR.sumWeights; for (int j = 0; j < o.x_mean_num.length; j++) o.x_mean_num[j] = coxMR.dinfo()._normSub[j] + coxMR.sumWeightedNumX[j] / coxMR.sumWeights; System.arraycopy( coxMR.dinfo()._normSub, o.x_mean_num.length, o.mean_offset, 0, o.mean_offset.length); int nz = 0; for (int t = 0; t < coxMR.countEvents.length; ++t) { o.total_event += coxMR.countEvents[t]; if (coxMR.sizeEvents[t] > 0 || coxMR.sizeCensored[t] > 0) { o.time[nz] = o.min_time + t; o.n_risk[nz] = coxMR.sizeRiskSet[t]; o.n_event[nz] = coxMR.sizeEvents[t]; o.n_censor[nz] = coxMR.sizeCensored[t]; nz++; } } if (p.start_column == null) for (int t = o.n_risk.length - 2; t >= 0; --t) o.n_risk[t] += o.n_risk[t + 1]; }
// Stopping criteria boolean isDone(KMeansModel model, double[][] newCenters, double[][] oldCenters) { if (!isRunning()) return true; // Stopped/cancelled // Stopped for running out iterations if (model._output._iterations >= _parms._max_iterations) return true; // Compute average change in standardized cluster centers if (oldCenters == null) return false; // No prior iteration, not stopping double average_change = 0; for (int clu = 0; clu < _parms._k; clu++) average_change += hex.genmodel.GenModel.KMeans_distance( oldCenters[clu], newCenters[clu], _isCats, null, null); average_change /= _parms._k; // Average change per cluster model._output._avg_centroids_chg = ArrayUtils.copyAndFillOf( model._output._avg_centroids_chg, model._output._avg_centroids_chg.length + 1, average_change); model._output._training_time_ms = ArrayUtils.copyAndFillOf( model._output._training_time_ms, model._output._training_time_ms.length + 1, System.currentTimeMillis()); return average_change < TOLERANCE; }
public NanoHTTPD.Response serve(NanoHTTPD server, Properties args, RequestType type) { // Needs to be done also for help to initialize or argument records String query = checkArguments(args, type); switch (type) { case help: return wrap(server, build(Response.done(serveHelp()))); case json: case www: if (log()) { String log = getClass().getSimpleName(); for (Object arg : args.keySet()) { String value = args.getProperty((String) arg); if (value != null && value.length() != 0) log += " " + arg + "=" + value; } Log.debug(Sys.HTTPD, log); } if (query != null) return wrap(server, query, type); long time = System.currentTimeMillis(); Response response = serve(); response.setTimeStart(time); if (type == RequestType.json) return wrap(server, response.toJson()); return wrap(server, build(response)); case debug: response = serve_debug(); return wrap(server, build(response)); case query: return wrap(server, query); default: throw new RuntimeException("Invalid request type " + type.toString()); } }
protected void initStats(final CoxPHModel model, final DataInfo dinfo) { CoxPHModel.CoxPHParameters p = model._parms; CoxPHModel.CoxPHOutput o = model._output; o.n = p.stop_column.length(); o.data_info = dinfo; final int n_offsets = (p.offset_columns == null) ? 0 : p.offset_columns.length; final int n_coef = o.data_info.fullN() - n_offsets; final String[] coefNames = o.data_info.coefNames(); o.coef_names = new String[n_coef]; System.arraycopy(coefNames, 0, o.coef_names, 0, n_coef); o.coef = MemoryManager.malloc8d(n_coef); o.exp_coef = MemoryManager.malloc8d(n_coef); o.exp_neg_coef = MemoryManager.malloc8d(n_coef); o.se_coef = MemoryManager.malloc8d(n_coef); o.z_coef = MemoryManager.malloc8d(n_coef); o.gradient = MemoryManager.malloc8d(n_coef); o.hessian = malloc2DArray(n_coef, n_coef); o.var_coef = malloc2DArray(n_coef, n_coef); o.x_mean_cat = MemoryManager.malloc8d(n_coef - (o.data_info._nums - n_offsets)); o.x_mean_num = MemoryManager.malloc8d(o.data_info._nums - n_offsets); o.mean_offset = MemoryManager.malloc8d(n_offsets); o.offset_names = new String[n_offsets]; System.arraycopy(coefNames, n_coef, o.offset_names, 0, n_offsets); final Vec start_column = p.start_column; final Vec stop_column = p.stop_column; o.min_time = p.start_column == null ? (long) stop_column.min() : (long) start_column.min() + 1; o.max_time = (long) stop_column.max(); final int n_time = new Vec.CollectDomain().doAll(stop_column).domain().length; o.time = MemoryManager.malloc8(n_time); o.n_risk = MemoryManager.malloc8d(n_time); o.n_event = MemoryManager.malloc8d(n_time); o.n_censor = MemoryManager.malloc8d(n_time); o.cumhaz_0 = MemoryManager.malloc8d(n_time); o.var_cumhaz_1 = MemoryManager.malloc8d(n_time); o.var_cumhaz_2 = malloc2DArray(n_time, n_coef); }
private static void run(Callable c, boolean read, int size) { // Count all i/o time from here, including all retry overheads long start_io_ms = System.currentTimeMillis(); while (true) { try { long start_ns = System.nanoTime(); // Blocking i/o call timing - without counting repeats c.call(); TimeLine.record_IOclose(start_ns, start_io_ms, read ? 1 : 0, size, Value.HDFS); break; // Explicitly ignore the following exceptions but // fail on the rest IOExceptions } catch (EOFException e) { ignoreAndWait(e, false); } catch (SocketTimeoutException e) { ignoreAndWait(e, false); } catch (IOException e) { ignoreAndWait(e, true); } catch (Exception e) { throw Log.errRTExcept(e); } } }
private static double[][] destandardize( double[][] centers, String[][] isCats, double[] means, double[] mults) { int K = centers.length; int N = centers[0].length; double[][] value = new double[K][N]; for (int clu = 0; clu < K; clu++) { System.arraycopy(centers[clu], 0, value[clu], 0, N); if (mults != null) { // Reverse standardization for (int col = 0; col < N; col++) if (isCats[col] == null) value[clu][col] = value[clu][col] / mults[col] + means[col]; } } return value; }
private void updateClusters( int[] clusters, int count, long chunk, long numrows, int rpc, long updatedRow) { final int offset = (int) (updatedRow - (rpc * chunk)); final Key chunkKey = ValueArray.getChunkKey(chunk, _job.dest()); final int[] message; if (count == clusters.length) message = clusters; else { message = new int[count]; System.arraycopy(clusters, 0, message, 0, message.length); } final int rows = ValueArray.rpc(chunk, rpc, numrows); new Atomic() { @Override public Value atomic(Value val) { assert val == null || val._key.equals(chunkKey); AutoBuffer b = new AutoBuffer(rows * ROW_SIZE); if (val != null) b._bb.put(val.memOrLoad()); for (int i = 0; i < message.length; i++) b.put4((offset + i) * 4, message[i]); b.position(b.limit()); return new Value(chunkKey, b.buf()); } }.invoke(chunkKey); }
/** * Start this task based on given top-level fork-join task representing job computation. * * @param fjtask top-level job computation task. * @return this job in {@link JobState#RUNNING} state * @see JobState * @see H2OCountedCompleter */ public /** FIXME: should be final or at least protected */ Job start(final H2OCountedCompleter fjtask) { assert state == JobState.CREATED : "Trying to run job which was already run?"; assert fjtask != null : "Starting a job with null working task is not permitted! Fix you API"; _fjtask = fjtask; start_time = System.currentTimeMillis(); state = JobState.RUNNING; // Save the full state of the job UKV.put(self(), this); // Update job list new TAtomic<List>() { @Override public List atomic(List old) { if (old == null) old = new List(); Key[] jobs = old._jobs; old._jobs = Arrays.copyOf(jobs, jobs.length + 1); old._jobs[jobs.length] = job_key; return old; } }.invoke(LIST); return this; }
/** * Main constructor * * @param params Model parameters * @param dinfo Data Info * @param nClasses number of classes (1 for regression, 0 for autoencoder) * @param train User-given training data frame, prepared by AdaptTestTrain * @param valid User-specified validation data frame, prepared by AdaptTestTrain */ public DeepLearningModelInfo( final DeepLearningParameters params, final DataInfo dinfo, int nClasses, Frame train, Frame valid) { _classification = nClasses > 1; _train = train; _valid = valid; data_info = dinfo; parameters = (DeepLearningParameters) params.clone(); // make a copy, don't change model's parameters DeepLearningParameters.Sanity.modifyParms( parameters, parameters, nClasses); // sanitize the model_info's parameters final int num_input = dinfo.fullN(); final int num_output = get_params()._autoencoder ? num_input : (_classification ? train.lastVec().cardinality() : 1); if (!get_params()._autoencoder) assert (num_output == nClasses); _saw_missing_cats = dinfo._cats > 0 ? new boolean[data_info._cats] : null; assert (num_input > 0); assert (num_output > 0); if (has_momenta() && adaDelta()) throw new IllegalArgumentException( "Cannot have non-zero momentum and adaptive rate at the same time."); final int layers = get_params()._hidden.length; // units (# neurons for each layer) units = new int[layers + 2]; if (get_params()._max_categorical_features <= Integer.MAX_VALUE - dinfo._nums) units[0] = Math.min(dinfo._nums + get_params()._max_categorical_features, num_input); else units[0] = num_input; System.arraycopy(get_params()._hidden, 0, units, 1, layers); units[layers + 1] = num_output; boolean printLevels = units[0] > 1000L; boolean warn = units[0] > 100000L; if (printLevels) { final String[][] domains = dinfo._adaptedFrame.domains(); int[] levels = new int[domains.length]; for (int i = 0; i < levels.length; ++i) { levels[i] = domains[i] != null ? domains[i].length : 0; } Arrays.sort(levels); if (warn) { Log.warn( "==================================================================================================================================="); Log.warn( num_input + " input features" + (dinfo._cats > 0 ? " (after categorical one-hot encoding)" : "") + ". Can be slow and require a lot of memory."); } if (levels[levels.length - 1] > 0) { int levelcutoff = levels[levels.length - 1 - Math.min(10, levels.length - 1)]; int count = 0; for (int i = 0; i < dinfo._adaptedFrame.numCols() - (get_params()._autoencoder ? 0 : 1) && count < 10; ++i) { if (dinfo._adaptedFrame.domains()[i] != null && dinfo._adaptedFrame.domains()[i].length >= levelcutoff) { if (warn) { Log.warn( "Categorical feature '" + dinfo._adaptedFrame._names[i] + "' has cardinality " + dinfo._adaptedFrame.domains()[i].length + "."); } else { Log.info( "Categorical feature '" + dinfo._adaptedFrame._names[i] + "' has cardinality " + dinfo._adaptedFrame.domains()[i].length + "."); } } count++; } } if (warn) { Log.warn("Suggestions:"); Log.warn(" *) Limit the size of the first hidden layer"); if (dinfo._cats > 0) { Log.warn( " *) Limit the total number of one-hot encoded features with the parameter 'max_categorical_features'"); Log.warn( " *) Run h2o.interaction(...,pairwise=F) on high-cardinality categorical columns to limit the factor count, see http://learn.h2o.ai"); } Log.warn( "==================================================================================================================================="); } } // weights (to connect layers) dense_row_weights = new Storage.DenseRowMatrix[layers + 1]; dense_col_weights = new Storage.DenseColMatrix[layers + 1]; // decide format of weight matrices row-major or col-major if (get_params()._col_major) dense_col_weights[0] = new Storage.DenseColMatrix(units[1], units[0]); else dense_row_weights[0] = new Storage.DenseRowMatrix(units[1], units[0]); for (int i = 1; i <= layers; ++i) dense_row_weights[i] = new Storage.DenseRowMatrix(units[i + 1] /*rows*/, units[i] /*cols*/); // biases (only for hidden layers and output layer) biases = new Storage.DenseVector[layers + 1]; for (int i = 0; i <= layers; ++i) biases[i] = new Storage.DenseVector(units[i + 1]); // average activation (only for hidden layers) if (get_params()._autoencoder && get_params()._sparsity_beta > 0) { avg_activations = new Storage.DenseVector[layers]; mean_a = new float[layers]; for (int i = 0; i < layers; ++i) avg_activations[i] = new Storage.DenseVector(units[i + 1]); } allocateHelperArrays(); // for diagnostics mean_rate = new float[units.length]; rms_rate = new float[units.length]; mean_bias = new float[units.length]; rms_bias = new float[units.length]; mean_weight = new float[units.length]; rms_weight = new float[units.length]; }
protected double doScoringAndSaveModel( boolean finalScoring, boolean oob, boolean build_tree_one_node) { double training_r2 = Double.NaN; // Training R^2 value, if computed long now = System.currentTimeMillis(); if (_firstScore == 0) _firstScore = now; long sinceLastScore = now - _timeLastScoreStart; boolean updated = false; new ProgressUpdate( "Built " + _model._output._ntrees + " trees so far (out of " + _parms._ntrees + ").") .fork(_progressKey); // Now model already contains tid-trees in serialized form if (_parms._score_each_iteration || finalScoring || (now - _firstScore < 4000) || // Score every time for 4 secs // Throttle scoring to keep the cost sane; limit to a 10% duty cycle & every 4 secs (sinceLastScore > 4000 && // Limit scoring updates to every 4sec (double) (_timeLastScoreEnd - _timeLastScoreStart) / sinceLastScore < 0.1)) { // 10% duty cycle checkMemoryFootPrint(); // If validation is specified we use a model for scoring, so we need to // update it! First we save model with trees (i.e., make them available // for scoring) and then update it with resulting error _model.update(_key); updated = true; Log.info("============================================================== "); SharedTreeModel.SharedTreeOutput out = _model._output; _timeLastScoreStart = now; // Score on training data new ProgressUpdate("Scoring the model.").fork(_progressKey); Score sc = new Score(this, true, oob, _model._output.getModelCategory()) .doAll(train(), build_tree_one_node); ModelMetrics mm = sc.makeModelMetrics(_model, _parms.train()); out._training_metrics = mm; if (oob) out._training_metrics._description = "Metrics reported on Out-Of-Bag training samples"; out._scored_train[out._ntrees].fillFrom(mm); if (out._ntrees > 0) Log.info("Training " + out._scored_train[out._ntrees].toString()); // Score again on validation data if (_parms._valid != null) { Score scv = new Score(this, false, false, _model._output.getModelCategory()) .doAll(valid(), build_tree_one_node); ModelMetrics mmv = scv.makeModelMetrics(_model, _parms.valid()); out._validation_metrics = mmv; out._scored_valid[out._ntrees].fillFrom(mmv); if (out._ntrees > 0) Log.info("Validation " + out._scored_valid[out._ntrees].toString()); } if (out._ntrees > 0) { // Compute variable importances out._model_summary = createModelSummaryTable(out); out._scoring_history = createScoringHistoryTable(out); out._varimp = new hex.VarImp(_improvPerVar, out._names); out._variable_importances = hex.ModelMetrics.calcVarImp(out._varimp); Log.info(out._model_summary.toString()); // For Debugging: // Log.info(out._scoring_history.toString()); // Log.info(out._variable_importances.toString()); } ConfusionMatrix cm = mm.cm(); if (cm != null) { if (cm._cm.length <= _parms._max_confusion_matrix_size) { Log.info(cm.toASCII()); } else { Log.info( "Confusion Matrix is too large (max_confusion_matrix_size=" + _parms._max_confusion_matrix_size + "): " + _nclass + " classes."); } } _timeLastScoreEnd = System.currentTimeMillis(); } // Double update - after either scoring or variable importance if (updated) _model.update(_key); return training_r2; }
public static class GLRMParameters extends Model.Parameters { public String algoName() { return "GLRM"; } public String fullName() { return "Generalized Low Rank Modeling"; } public String javaName() { return GLRMModel.class.getName(); } public DataInfo.TransformType _transform = DataInfo.TransformType.NONE; // Data transformation (demean to compare with PCA) public int _k = 1; // Rank of resulting XY matrix public GLRM.Initialization _init = GLRM.Initialization.PlusPlus; // Initialization of Y matrix public SVDParameters.Method _svd_method = SVDParameters.Method.Randomized; // SVD initialization method (for _init = SVD) public Key<Frame> _user_y; // User-specified Y matrix (for _init = User) public Key<Frame> _user_x; // User-specified X matrix (for _init = User) public boolean _expand_user_y = true; // Should categorical columns in _user_y be expanded via one-hot encoding? (for _init // = User) // Loss functions public Loss _loss = Loss.Quadratic; // Default loss function for numeric cols public Loss _multi_loss = Loss.Categorical; // Default loss function for categorical cols public int _period = 1; // Length of the period when _loss = Periodic public Loss[] _loss_by_col; // Override default loss function for specific columns public int[] _loss_by_col_idx; // Regularization functions public Regularizer _regularization_x = Regularizer.None; // Regularization function for X matrix public Regularizer _regularization_y = Regularizer.None; // Regularization function for Y matrix public double _gamma_x = 0; // Regularization weight on X matrix public double _gamma_y = 0; // Regularization weight on Y matrix // Optional parameters public int _max_iterations = 1000; // Max iterations public int _max_updates = 2 * _max_iterations; // Max number of updates (X or Y) public double _init_step_size = 1.0; // Initial step size (decrease until we hit min_step_size) public double _min_step_size = 1e-4; // Min step size public long _seed = System.nanoTime(); // RNG seed @Override protected long nFoldSeed() { return _seed; } // public Key<Frame> _representation_key; // Key to save X matrix public String _representation_name; public boolean _recover_svd = false; // Recover singular values and eigenvectors of XY at the end? public boolean _impute_original = false; // Reconstruct original training data by reversing _transform? public boolean _verbose = true; // Log when objective increases each iteration? // Quadratic -> Gaussian distribution ~ exp(-(a-u)^2) // Absolute -> Laplace distribution ~ exp(-|a-u|) public enum Loss { Quadratic(true), Absolute(true), Huber(true), Poisson(true), Periodic(true), // One-dimensional loss (numeric) Logistic(true, true), Hinge(true, true), // Boolean loss (categorical) Categorical(false), Ordinal(false); // Multi-dimensional loss (categorical) private boolean forNumeric; private boolean forBinary; Loss(boolean forNumeric) { this(forNumeric, false); } Loss(boolean forNumeric, boolean forBinary) { this.forNumeric = forNumeric; this.forBinary = forBinary; } public boolean isForNumeric() { return forNumeric; } public boolean isForCategorical() { return !forNumeric; } public boolean isForBinary() { return forBinary; } } // Non-negative matrix factorization (NNMF): r_x = r_y = NonNegative // Orthogonal NNMF: r_x = OneSparse, r_y = NonNegative // K-means clustering: r_x = UnitOneSparse, r_y = 0 (\gamma_y = 0) // Quadratic mixture: r_x = Simplex, r_y = 0 (\gamma_y = 0) public enum Regularizer { None, Quadratic, L2, L1, NonNegative, OneSparse, UnitOneSparse, Simplex } // Check if all elements of _loss_by_col are equal to a specific loss function private final boolean allLossEquals(Loss loss) { if (null == _loss_by_col) return false; boolean res = true; for (int i = 0; i < _loss_by_col.length; i++) { if (_loss_by_col[i] != loss) { res = false; break; } } return res; } // Closed form solution only if quadratic loss, no regularization or quadratic regularization // (same for X and Y), and no missing values public final boolean hasClosedForm() { long na_cnt = 0; Frame train = _train.get(); for (int i = 0; i < train.numCols(); i++) na_cnt += train.vec(i).naCnt(); return hasClosedForm(na_cnt); } public final boolean hasClosedForm(long na_cnt) { boolean loss_quad = (null == _loss_by_col && _loss == Quadratic) || (null != _loss_by_col && allLossEquals(Quadratic) && (_loss_by_col.length == _train.get().numCols() || _loss == Quadratic)); return na_cnt == 0 && ((loss_quad && (_gamma_x == 0 || _regularization_x == Regularizer.None || _regularization_x == GLRMParameters.Regularizer.Quadratic) && (_gamma_y == 0 || _regularization_y == Regularizer.None || _regularization_y == GLRMParameters.Regularizer.Quadratic))); } // L(u,a): Loss function public final double loss(double u, double a) { return loss(u, a, _loss); } public final double loss(double u, double a, Loss loss) { assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics"; switch (loss) { case Quadratic: return (u - a) * (u - a); case Absolute: return Math.abs(u - a); case Huber: return Math.abs(u - a) <= 1 ? 0.5 * (u - a) * (u - a) : Math.abs(u - a) - 0.5; case Poisson: assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0"; return Math.exp(u) + (a == 0 ? 0 : -a * u + a * Math.log(a) - a); // Since \lim_{a->0} a*log(a) = 0 case Hinge: // return Math.max(1-a*u,0); return Math.max(1 - (a == 0 ? -u : u), 0); // Booleans are coded {0,1} instead of {-1,1} case Logistic: // return Math.log(1 + Math.exp(-a * u)); return Math.log( 1 + Math.exp(a == 0 ? u : -u)); // Booleans are coded {0,1} instead of {-1,1} case Periodic: return 1 - Math.cos((a - u) * (2 * Math.PI) / _period); default: throw new RuntimeException("Unknown loss function " + loss); } } // \grad_u L(u,a): Gradient of loss function with respect to u public final double lgrad(double u, double a) { return lgrad(u, a, _loss); } public final double lgrad(double u, double a, Loss loss) { assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics"; switch (loss) { case Quadratic: return 2 * (u - a); case Absolute: return Math.signum(u - a); case Huber: return Math.abs(u - a) <= 1 ? u - a : Math.signum(u - a); case Poisson: assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0"; return Math.exp(u) - a; case Hinge: // return a*u <= 1 ? -a : 0; return a == 0 ? (-u <= 1 ? 1 : 0) : (u <= 1 ? -1 : 0); // Booleans are coded as {0,1} instead of {-1,1} case Logistic: // return -a/(1+Math.exp(a*u)); return a == 0 ? 1 / (1 + Math.exp(-u)) : -1 / (1 + Math.exp(u)); // Booleans are coded as {0,1} instead of {-1,1} case Periodic: return ((2 * Math.PI) / _period) * Math.sin((a - u) * (2 * Math.PI) / _period); default: throw new RuntimeException("Unknown loss function " + loss); } } // L(u,a): Multidimensional loss function public final double mloss(double[] u, int a) { return mloss(u, a, _multi_loss); } public static double mloss(double[] u, int a, Loss multi_loss) { assert multi_loss.isForCategorical() : "Loss function " + multi_loss + " not applicable to categoricals"; if (a < 0 || a > u.length - 1) throw new IllegalArgumentException( "Index must be between 0 and " + String.valueOf(u.length - 1)); double sum = 0; switch (multi_loss) { case Categorical: for (int i = 0; i < u.length; i++) sum += Math.max(1 + u[i], 0); sum += Math.max(1 - u[a], 0) - Math.max(1 + u[a], 0); return sum; case Ordinal: for (int i = 0; i < u.length - 1; i++) sum += Math.max(a > i ? 1 - u[i] : 1, 0); return sum; default: throw new RuntimeException("Unknown multidimensional loss function " + multi_loss); } } // \grad_u L(u,a): Gradient of multidimensional loss function with respect to u public final double[] mlgrad(double[] u, int a) { return mlgrad(u, a, _multi_loss); } public static double[] mlgrad(double[] u, int a, Loss multi_loss) { assert multi_loss.isForCategorical() : "Loss function " + multi_loss + " not applicable to categoricals"; if (a < 0 || a > u.length - 1) throw new IllegalArgumentException( "Index must be between 0 and " + String.valueOf(u.length - 1)); double[] grad = new double[u.length]; switch (multi_loss) { case Categorical: for (int i = 0; i < u.length; i++) grad[i] = (1 + u[i] > 0) ? 1 : 0; grad[a] = (1 - u[a] > 0) ? -1 : 0; return grad; case Ordinal: for (int i = 0; i < u.length - 1; i++) grad[i] = (a > i && 1 - u[i] > 0) ? -1 : 0; return grad; default: throw new RuntimeException("Unknown multidimensional loss function " + multi_loss); } } // r_i(x_i), r_j(y_j): Regularization function for single row x_i or column y_j public final double regularize_x(double[] u) { return regularize(u, _regularization_x); } public final double regularize_y(double[] u) { return regularize(u, _regularization_y); } public final double regularize(double[] u, Regularizer regularization) { if (u == null) return 0; double ureg = 0; switch (regularization) { case None: return 0; case Quadratic: for (int i = 0; i < u.length; i++) ureg += u[i] * u[i]; return ureg; case L2: for (int i = 0; i < u.length; i++) ureg += u[i] * u[i]; return Math.sqrt(ureg); case L1: for (int i = 0; i < u.length; i++) ureg += Math.abs(u[i]); return ureg; case NonNegative: for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; } return 0; case OneSparse: int card = 0; for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; else if (u[i] > 0) card++; } return card == 1 ? 0 : Double.POSITIVE_INFINITY; case UnitOneSparse: int ones = 0, zeros = 0; for (int i = 0; i < u.length; i++) { if (u[i] == 1) ones++; else if (u[i] == 0) zeros++; else return Double.POSITIVE_INFINITY; } return ones == 1 && zeros == u.length - 1 ? 0 : Double.POSITIVE_INFINITY; case Simplex: double sum = 0, absum = 0; for (int i = 0; i < u.length; i++) { if (u[i] < 0) return Double.POSITIVE_INFINITY; else { sum += u[i]; absum += Math.abs(u[i]); } } return MathUtils.equalsWithinRecSumErr(sum, 1.0, u.length, absum) ? 0 : Double.POSITIVE_INFINITY; default: throw new RuntimeException("Unknown regularization function " + regularization); } } // \sum_i r_i(x_i): Sum of regularization function for all entries of X public final double regularize_x(double[][] u) { return regularize(u, _regularization_x); } public final double regularize_y(double[][] u) { return regularize(u, _regularization_y); } public final double regularize(double[][] u, Regularizer regularization) { if (u == null || regularization == Regularizer.None) return 0; double ureg = 0; for (int i = 0; i < u.length; i++) { ureg += regularize(u[i], regularization); if (Double.isInfinite(ureg)) return ureg; } return ureg; } // \prox_{\alpha_k*r}(u): Proximal gradient of (step size) * (regularization function) evaluated // at vector u public final double[] rproxgrad_x(double[] u, double alpha, Random rand) { return rproxgrad(u, alpha, _gamma_x, _regularization_x, rand); } public final double[] rproxgrad_y(double[] u, double alpha, Random rand) { return rproxgrad(u, alpha, _gamma_y, _regularization_y, rand); } // public final double[] rproxgrad_x(double[] u, double alpha) { return rproxgrad(u, alpha, // _gamma_x, _regularization_x, RandomUtils.getRNG(_seed)); } // public final double[] rproxgrad_y(double[] u, double alpha) { return rproxgrad(u, alpha, // _gamma_y, _regularization_y, RandomUtils.getRNG(_seed)); } static double[] rproxgrad( double[] u, double alpha, double gamma, Regularizer regularization, Random rand) { if (u == null || alpha == 0 || gamma == 0) return u; double[] v = new double[u.length]; switch (regularization) { case None: return u; case Quadratic: for (int i = 0; i < u.length; i++) v[i] = u[i] / (1 + 2 * alpha * gamma); return v; case L2: // Proof uses Moreau decomposition; see section 6.5.1 of Parikh and Boyd // https://web.stanford.edu/~boyd/papers/pdf/prox_algs.pdf double weight = 1 - alpha * gamma / ArrayUtils.l2norm(u); if (weight < 0) return v; // Zero vector for (int i = 0; i < u.length; i++) v[i] = weight * u[i]; return v; case L1: for (int i = 0; i < u.length; i++) v[i] = Math.max(u[i] - alpha * gamma, 0) + Math.min(u[i] + alpha * gamma, 0); return v; case NonNegative: for (int i = 0; i < u.length; i++) v[i] = Math.max(u[i], 0); return v; case OneSparse: int idx = ArrayUtils.maxIndex(u, rand); v[idx] = u[idx] > 0 ? u[idx] : 1e-6; return v; case UnitOneSparse: idx = ArrayUtils.maxIndex(u, rand); v[idx] = 1; return v; case Simplex: // Proximal gradient algorithm by Chen and Ye in http://arxiv.org/pdf/1101.6081v2.pdf // 1) Sort input vector u in ascending order: u[1] <= ... <= u[n] int n = u.length; int[] idxs = new int[n]; for (int i = 0; i < n; i++) idxs[i] = i; ArrayUtils.sort(idxs, u); // 2) Calculate cumulative sum of u in descending order // cumsum(u) = (..., u[n-2]+u[n-1]+u[n], u[n-1]+u[n], u[n]) double[] ucsum = new double[n]; ucsum[n - 1] = u[idxs[n - 1]]; for (int i = n - 2; i >= 0; i--) ucsum[i] = ucsum[i + 1] + u[idxs[i]]; // 3) Let t_i = (\sum_{j=i+1}^n u[j] - 1)/(n - i) // For i = n-1,...,1, set optimal t* to first t_i >= u[i] double t = (ucsum[0] - 1) / n; // Default t* = (\sum_{j=1}^n u[j] - 1)/n for (int i = n - 1; i >= 1; i--) { double tmp = (ucsum[i] - 1) / (n - i); if (tmp >= u[idxs[i - 1]]) { t = tmp; break; } } // 4) Return max(u - t*, 0) as projection of u onto simplex double[] x = new double[u.length]; for (int i = 0; i < u.length; i++) x[i] = Math.max(u[i] - t, 0); return x; default: throw new RuntimeException("Unknown regularization function " + regularization); } } // Project X,Y matrices into appropriate subspace so regularizer is finite. Used during // initialization. public final double[] project_x(double[] u, Random rand) { return project(u, _regularization_x, rand); } public final double[] project_y(double[] u, Random rand) { return project(u, _regularization_y, rand); } public final double[] project(double[] u, Regularizer regularization, Random rand) { if (u == null) return u; switch (regularization) { // Domain is all real numbers case None: case Quadratic: case L2: case L1: return u; // Proximal operator of indicator function for a set C is (Euclidean) projection onto C case NonNegative: case OneSparse: case UnitOneSparse: return rproxgrad(u, 1, 1, regularization, rand); case Simplex: double reg = regularize( u, regularization); // Check if inside simplex before projecting since algo is // complicated if (reg == 0) return u; return rproxgrad(u, 1, 1, regularization, rand); default: throw new RuntimeException("Unknown regularization function " + regularization); } } // \hat A_{i,j} = \argmin_a L_{i,j}(x_iy_j, a): Data imputation for real numeric values public final double impute(double u) { return impute(u, _loss); } public static double impute(double u, Loss loss) { assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics"; switch (loss) { case Quadratic: case Absolute: case Huber: case Periodic: return u; case Poisson: return Math.exp(u) - 1; case Hinge: case Logistic: return u > 0 ? 1 : 0; // Booleans are coded as {0,1} instead of {-1,1} default: throw new RuntimeException("Unknown loss function " + loss); } } // \hat A_{i,j} = \argmin_a L_{i,j}(x_iy_j, a): Data imputation for categorical values // {0,1,2,...} // TODO: Is there a faster way to find the loss minimizer? public final int mimpute(double[] u) { return mimpute(u, _multi_loss); } public static int mimpute(double[] u, Loss multi_loss) { assert multi_loss.isForCategorical() : "Loss function " + multi_loss + " not applicable to categoricals"; switch (multi_loss) { case Categorical: case Ordinal: double[] cand = new double[u.length]; for (int a = 0; a < cand.length; a++) cand[a] = mloss(u, a, multi_loss); return ArrayUtils.minIndex(cand); default: throw new RuntimeException("Unknown multidimensional loss function " + multi_loss); } } }
@Override protected void compute2() { CoxPHModel model = null; try { Scope.enter(); _parms.read_lock_frames(CoxPH.this); init(true); applyScoringFrameSideEffects(); // The model to be built model = new CoxPHModel(dest(), _parms, new CoxPHModel.CoxPHOutput(CoxPH.this)); model.delete_and_lock(_key); applyTrainingFrameSideEffects(); int nResponses = 1; boolean useAllFactorLevels = false; final DataInfo dinfo = new DataInfo( Key.make(), _modelBuilderTrain, null, nResponses, useAllFactorLevels, DataInfo.TransformType.DEMEAN, TransformType.NONE, true, false, false, false, false, false); initStats(model, dinfo); final int n_offsets = (model._parms.offset_columns == null) ? 0 : model._parms.offset_columns.length; final int n_coef = dinfo.fullN() - n_offsets; final double[] step = MemoryManager.malloc8d(n_coef); final double[] oldCoef = MemoryManager.malloc8d(n_coef); final double[] newCoef = MemoryManager.malloc8d(n_coef); Arrays.fill(step, Double.NaN); Arrays.fill(oldCoef, Double.NaN); for (int j = 0; j < n_coef; ++j) newCoef[j] = model._parms.init; double oldLoglik = -Double.MAX_VALUE; final int n_time = (int) (model._output.max_time - model._output.min_time + 1); final boolean has_start_column = (model._parms.start_column != null); final boolean has_weights_column = (model._parms.weights_column != null); for (int i = 0; i <= model._parms.iter_max; ++i) { model._output.iter = i; final CoxPHTask coxMR = new CoxPHTask( self(), dinfo, newCoef, model._output.min_time, n_time, n_offsets, has_start_column, has_weights_column) .doAll(dinfo._adaptedFrame); final double newLoglik = calcLoglik(model, coxMR); if (newLoglik > oldLoglik) { if (i == 0) calcCounts(model, coxMR); calcModelStats(model, newCoef, newLoglik); calcCumhaz_0(model, coxMR); if (newLoglik == 0) model._output.lre = -Math.log10(Math.abs(oldLoglik - newLoglik)); else model._output.lre = -Math.log10(Math.abs((oldLoglik - newLoglik) / newLoglik)); if (model._output.lre >= model._parms.lre_min) break; Arrays.fill(step, 0); for (int j = 0; j < n_coef; ++j) for (int k = 0; k < n_coef; ++k) step[j] -= model._output.var_coef[j][k] * model._output.gradient[k]; for (int j = 0; j < n_coef; ++j) if (Double.isNaN(step[j]) || Double.isInfinite(step[j])) break; oldLoglik = newLoglik; System.arraycopy(newCoef, 0, oldCoef, 0, oldCoef.length); } else { for (int j = 0; j < n_coef; ++j) step[j] /= 2; } for (int j = 0; j < n_coef; ++j) newCoef[j] = oldCoef[j] - step[j]; } model.update(_key); } catch (Throwable t) { Job thisJob = DKV.getGet(_key); if (thisJob._state == JobState.CANCELLED) { Log.info("Job cancelled by user."); } else { t.printStackTrace(); failed(t); throw t; } } finally { updateModelOutput(); _parms.read_unlock_frames(CoxPH.this); Scope.exit(); done(); // Job done! } tryComplete(); }
/** * Returns job execution time in milliseconds. If job is not running then returns job execution * time. */ public final long runTimeMs() { long until = end_time != 0 ? end_time : System.currentTimeMillis(); return until - start_time; }
/** Marks job as finished and records job end time. */ public void remove() { end_time = System.currentTimeMillis(); if (state == JobState.RUNNING) state = JobState.DONE; // Overwrite handle - copy end_time, state, msg replaceByJobHandle(); }