@Override public float progress() { if (DKV.get(dest()) == null) return 0; GLMModel m = DKV.get(dest()).get(); float progress = (float) m.iteration() / (float) max_iter; // TODO, do something smarter here return progress; }
public void nextLambda(final GLMIterationTask glmt) { // We're done with this lambda, launch validation H2OCallback fin = new H2OCallback<GLMValidationTask>() { @Override public void callback(GLMValidationTask tsk) { boolean improved = _model.setAndTestValidation(_lambdaIdx, tsk._res); _model.clone().update(self()); if ((improved || _runAllLambdas) && _lambdaIdx < (lambda.length - 1)) { // continue with next lambda value? glmt._val = null; ++_lambdaIdx; new Iteration().callback(glmt); } else // nope, we're done GLM2.this.complete(); // signal we're done to anyone waiting for the job } @Override public boolean onExceptionalCompletion(Throwable ex, CountedCompleter cc) { GLM2.this.cancel(ex); return true; } }; if (GLM2.this.n_folds >= 2) xvalidate(_model.clone(), _lambdaIdx, fin); else new GLMValidationTask(_model.clone(), _lambdaIdx, fin).asyncExec(_dinfo._adaptedFrame); }
private double[] setNewBeta(final double[] newBeta) { final double[] fullBeta; if (_activeCols != null) { fullBeta = MemoryManager.malloc8d(_dinfo.fullN() + 1); int j = 0; for (int i : _activeCols) fullBeta[i] = newBeta[j++]; assert j == newBeta.length - 1; fullBeta[fullBeta.length - 1] = newBeta[j]; } else { assert newBeta.length == _dinfo.fullN() + 1; fullBeta = newBeta; } final double[] newBetaDeNorm; if (_dinfo._standardize) { newBetaDeNorm = fullBeta.clone(); double norm = 0.0; // Reverse any normalization on the intercept // denormalize only the numeric coefs (categoricals are not normalized) final int numoff = _dinfo.numStart(); for (int i = numoff; i < fullBeta.length - 1; i++) { double b = newBetaDeNorm[i] * _dinfo._normMul[i - numoff]; norm += b * _dinfo._normSub[i - numoff]; // Also accumulate the intercept adjustment newBetaDeNorm[i] = b; } newBetaDeNorm[newBetaDeNorm.length - 1] -= norm; } else newBetaDeNorm = null; _model.setLambdaSubmodel( _lambdaIdx, newBetaDeNorm == null ? fullBeta : newBetaDeNorm, newBetaDeNorm == null ? null : fullBeta, (_iter + 1)); _model.clone().update(self()); return fullBeta; }
@Override public void map(GLMModel m) { _res = (GLMModel) m.clone(); _res._output = (GLMOutput) _res._output.clone(); Submodel sm = Double.isNaN(_lambda) ? _res._output._submodels[_res._output._best_lambda_idx] : _res._output.submodelForLambda(_lambda); assert sm != null : "GLM[" + m._key + "]: missing submodel for lambda " + _lambda; sm = (Submodel) sm.clone(); _res._output._submodels = new Submodel[] {sm}; _res._output.setSubmodelIdx(0); }
protected void complete() { if (_addedL2 > 0) { String warn = "Added L2 penalty (rho = " + _addedL2 + ") due to non-spd matrix. "; if (_model.warnings == null || _model.warnings.length == 0) _model.warnings = new String[] {warn}; else { _model.warnings = Arrays.copyOf(_model.warnings, _model.warnings.length + 1); _model.warnings[_model.warnings.length - 1] = warn; } _model.update(self()); } _model.unlock(self()); if (_dinfo._nfolds == 0) remove(); // Remove/complete job only for top-level, not xval GLM2s if (_fjtask != null) _fjtask.tryComplete(); }
@Override public void cancel(Throwable ex) { if (isCancelledOrCrashed()) return; if (_model != null) _model.unlock(self()); if (ex instanceof JobCancelledException) { if (!isCancelledOrCrashed()) cancel(); } else super.cancel(ex); }
public GLMModelV3 make_model(int version, MakeGLMModelV3 args) { GLMModel model = DKV.getGet(args.model.key()); if (model == null) throw new IllegalArgumentException("missing source model " + args.model); String[] names = model._output.coefficientNames(); Map<String, Double> coefs = model.coefficients(); for (int i = 0; i < args.names.length; ++i) coefs.put(args.names[i], args.beta[i]); double[] beta = model.beta().clone(); for (int i = 0; i < beta.length; ++i) beta[i] = coefs.get(names[i]); GLMModel m = new GLMModel( args.dest != null ? args.dest.key() : Key.make(), model._parms, null, new double[] {.5}, Double.NaN, Double.NaN, -1); DataInfo dinfo = model.dinfo(); dinfo.setPredictorTransform(TransformType.NONE); // GLMOutput(DataInfo dinfo, String[] column_names, String[][] domains, String[] // coefficient_names, boolean binomial) { m._output = new GLMOutput( model.dinfo(), model._output._names, model._output._domains, model._output.coefficientNames(), model._output._binomial, beta); DKV.put(m._key, m); GLMModelV3 res = new GLMModelV3(); res.fillFromImpl(m); return res; }
public GLMXValidation(GLMModel mainModel, GLMModel[] xvalModels, int lambdaIdx, long nobs) { super(mainModel._dataKey, mainModel.ymu, mainModel.glm, mainModel.rank(lambdaIdx)); xval_models = new Key[xvalModels.length]; for (int i = 0; i < xvalModels.length; ++i) { add(xvalModels[i].validation()); xval_models[i] = xvalModels[i]._key; } this.nobs = nobs; finalize_AIC_AUC(); }
protected void nextLambda(final GLMIterationTask glmt, GLMValidation val) { currentLambdaIter = 0; boolean improved = _model.setAndTestValidation(_lambdaIdx, val); _model.clone().update(self()); boolean done = false; // _iter < max_iter && (improved || _runAllLambdas) && _lambdaIdx < // (lambda.length-1); if (_iter == max_iter) { Log.info("GLM2 reached max #iterations."); done = true; } else if (!improved && !_runAllLambdas) { Log.info("GLM2 converged as solution stopped improving with decreasing lambda."); done = true; } else if (_lambdaIdx == lambda.length - 1) { Log.info("GLM2 done with all given lambdas."); done = true; } else if (_activeCols != null && _activeCols.length + 1 >= MAX_PREDICTORS) { Log.info( "GLM2 reached maximum allowed number of predictors at lambda = " + lambda[_lambdaIdx]); done = true; } if (!done) { // continue with next lambda value? ++_lambdaIdx; glmt._val = null; if (glmt._gram == null) { // assume we had lambda search with strong rules // we use strong rules so we can't really used this gram for the next lambda computation // (different sets of coefficients) // I expect that: // 1) beta has been expanded to match current set of active cols // 2) it is new GLMIteration ready to be launched // caller (nextLambda(glmt,beta)) is expected to ensure this... assert _activeCols == null || (glmt._beta.length == _activeCols.length + 1); assert !glmt.isDone(); glmt.asyncExec(_activeData._adaptedFrame); } else // we have the right gram, just solve with with next lambda new Iteration().callback(glmt); } else // nope, we're done GLM2.this.complete(); // signal we're done to anyone waiting for the job }
private void xvalidate(final GLMModel model, int lambdaIxd, final H2OCountedCompleter cmp) { final Key[] keys = new Key[n_folds]; H2OCallback callback = new H2OCallback() { @Override public void callback(H2OCountedCompleter t) { try { GLMModel[] models = new GLMModel[keys.length]; // we got the xval models, now compute their validations... for (int i = 0; i < models.length; ++i) models[i] = DKV.get(keys[i]).get(); new GLMXValidationTask(model, _lambdaIdx, models, cmp) .asyncExec(_dinfo._adaptedFrame); } catch (Throwable ex) { cmp.completeExceptionally(ex); } } @Override public boolean onExceptionalCompletion(Throwable ex, CountedCompleter caller) { cmp.completeExceptionally(ex); return true; } }; callback.addToPendingCount(n_folds - 1); double proximal_penalty = 0; for (int i = 0; i < n_folds; ++i) new GLM2( this.description + "xval " + i, self(), keys[i] = Key.make(destination_key + "_" + _lambdaIdx + "_xval" + i), _dinfo.getFold(i, n_folds), _glm, new double[] {lambda[_lambdaIdx]}, model.alpha, 0, model.beta_eps, self(), model.norm_beta(lambdaIxd), proximal_penalty) .run(callback); }
private void xvalidate(final GLMModel model, int lambdaIxd, final H2OCountedCompleter cmp) { final Key[] keys = new Key[n_folds]; GLM2[] glms = new GLM2[n_folds]; for (int i = 0; i < n_folds; ++i) glms[i] = new GLM2( this.description + "xval " + i, self(), keys[i] = Key.make(destination_key + "_" + _lambdaIdx + "_xval" + i), _dinfo.getFold(i, n_folds), _glm, new double[] {lambda[_lambdaIdx]}, model.alpha, 0, model.beta_eps, self(), model.norm_beta(lambdaIxd), higher_accuracy, prior, 0); H2O.submitTask( new ParallelGLMs( GLM2.this, glms, H2O.CLOUD.size(), new H2OCallback(GLM2.this) { @Override public void callback(H2OCountedCompleter t) { GLMModel[] models = new GLMModel[keys.length]; // we got the xval models, now compute their validations... for (int i = 0; i < models.length; ++i) models[i] = DKV.get(keys[i]).get(); new GLMXValidationTask(model, _lambdaIdx, models, cmp) .asyncExec(_dinfo._adaptedFrame); } })); }
public GLMModel clone2() { GLMModel res = clone(); res._output = (GLMOutput) res._output.clone(); return res; }
@Override protected void map(GLMModel glmModel) { glmModel._output.pickBestModel(false); glmModel.update(_jobKey); glmModel.unlock(_jobKey); }
@Override public void cancel(Throwable ex) { if (_model != null) _model.unlock(self()); if (ex instanceof JobCancelledException) cancel(); else super.cancel(ex); }
private void run(final double ymu, final long nobs, LMAXTask lmaxt) { String[] warns = null; if ((!lambda_search || !strong_rules_enabled) && (_dinfo.fullN() > MAX_PREDICTORS)) throw new IllegalArgumentException( "Too many predictors! GLM can only handle " + MAX_PREDICTORS + " predictors, got " + _dinfo.fullN() + ", try to run with strong_rules enabled."); if (lambda_search) { max_iter = Math.max(300, max_iter); assert lmaxt != null : "running lambda search, but don't know what is the lambda max!"; final double lmax = lmaxt.lmax(); final double lambda_min_ratio = _dinfo._adaptedFrame.numRows() > _dinfo.fullN() ? 0.0001 : 0.01; final double d = Math.pow(lambda_min_ratio, 0.01); lambda = new double[100]; lambda[0] = lmax; for (int i = 1; i < lambda.length; ++i) lambda[i] = lambda[i - 1] * d; _runAllLambdas = false; } else if (alpha[0] > 0 && lmaxt != null) { // make sure we start with lambda max (and discard all lambda > lambda max) final double lmax = lmaxt.lmax(); int i = 0; while (i < lambda.length && lambda[i] > lmax) ++i; if (i != 0) { Log.info( "GLM: removing " + i + " lambdas > lambda_max: " + Arrays.toString(Arrays.copyOf(lambda, i))); warns = i == lambda.length ? new String[] { "Removed " + i + " lambdas > lambda_max", "No lambdas < lambda_max, returning null model." } : new String[] {"Removed " + i + " lambdas > lambda_max"}; } lambda = i == lambda.length ? new double[] {lambda_max} : Arrays.copyOfRange(lambda, i, lambda.length); } _model = new GLMModel( GLM2.this, dest(), _dinfo, _glm, beta_epsilon, alpha[0], lambda_max, lambda, ymu, prior); _model.warnings = warns; _model.clone().delete_and_lock(self()); if (lambda[0] == lambda_max && alpha[0] > 0) { // fill-in trivial solution for lambda max _beta = MemoryManager.malloc8d(_dinfo.fullN() + 1); _beta[_beta.length - 1] = _glm.link(ymu) + _iceptAdjust; _model.setLambdaSubmodel(0, _beta, _beta, 0); if (lmaxt != null) _model.setAndTestValidation(0, lmaxt._val); _lambdaIdx = 1; } if (_lambdaIdx == lambda.length) // ran only with one lambda > lambda_max => return null model GLM2.this.complete(); // signal we're done to anyone waiting for the job else { ++_iter; if (lmaxt != null && strong_rules_enabled) activeCols(lambda[_lambdaIdx], lmaxt.lmax(), lmaxt.gradient(l2pen())); Log.info( "GLM2 staring GLM after " + (System.currentTimeMillis() - start) + "ms of preprocessing (mean/lmax/strong rules computation)"); new GLMIterationTask( GLM2.this, _activeData, _glm, true, false, false, null, _ymu = ymu, _reg = 1.0 / nobs, new Iteration()) .asyncExec(_activeData._adaptedFrame); } }