public GLMGridSearch( int maxP, Key jobKey, Key dstKey, DataInfo dinfo, GLMParams glm, double[] lambdas, double[] alphas, int nfolds, double betaEpsilon) { super(jobKey, dstKey); description = "GLM Grid with params " + glm.toString() + "on data " + dinfo.toString(); _maxParallelism = maxP; _jobs = new GLM2[alphas.length]; _idx = new AtomicInteger(_maxParallelism); for (int i = 0; i < _jobs.length; ++i) _jobs[i] = new GLM2( "GLM grid(" + i + ")", self(), Key.make(dstKey.toString() + "_" + i), dinfo, glm, lambdas, alphas[i], nfolds, betaEpsilon, self()); }
public void add(double yreal, double ymodel) { null_deviance += _glm.deviance(yreal, _ymu); if (_glm.family == Family.binomial) // classification -> update confusion matrix too for (int i = 0; i < DEFAULT_THRESHOLDS.length; ++i) _cms[i].add((int) yreal, (ymodel >= DEFAULT_THRESHOLDS[i]) ? 1 : 0); if (Double.isNaN(_glm.deviance(yreal, ymodel))) System.out.println("NaN from yreal=" + yreal + ", ymodel=" + ymodel); residual_deviance += _glm.deviance(yreal, ymodel); ++nobs; avg_err += (ymodel - yreal) * (ymodel - yreal); if (_glm.family == Family.poisson) { // aic for poisson long y = Math.round(yreal); double logfactorial = 0; for (long i = 2; i <= y; ++i) logfactorial += Math.log(i); _aic2 += (yreal * Math.log(ymodel) - logfactorial - ymodel); } }
private void run(final double ymu, final long nobs, LMAXTask lmaxt) { String[] warns = null; if ((!lambda_search || !strong_rules_enabled) && (_dinfo.fullN() > MAX_PREDICTORS)) throw new IllegalArgumentException( "Too many predictors! GLM can only handle " + MAX_PREDICTORS + " predictors, got " + _dinfo.fullN() + ", try to run with strong_rules enabled."); if (lambda_search) { max_iter = Math.max(300, max_iter); assert lmaxt != null : "running lambda search, but don't know what is the lambda max!"; final double lmax = lmaxt.lmax(); final double lambda_min_ratio = _dinfo._adaptedFrame.numRows() > _dinfo.fullN() ? 0.0001 : 0.01; final double d = Math.pow(lambda_min_ratio, 0.01); lambda = new double[100]; lambda[0] = lmax; for (int i = 1; i < lambda.length; ++i) lambda[i] = lambda[i - 1] * d; _runAllLambdas = false; } else if (alpha[0] > 0 && lmaxt != null) { // make sure we start with lambda max (and discard all lambda > lambda max) final double lmax = lmaxt.lmax(); int i = 0; while (i < lambda.length && lambda[i] > lmax) ++i; if (i != 0) { Log.info( "GLM: removing " + i + " lambdas > lambda_max: " + Arrays.toString(Arrays.copyOf(lambda, i))); warns = i == lambda.length ? new String[] { "Removed " + i + " lambdas > lambda_max", "No lambdas < lambda_max, returning null model." } : new String[] {"Removed " + i + " lambdas > lambda_max"}; } lambda = i == lambda.length ? new double[] {lambda_max} : Arrays.copyOfRange(lambda, i, lambda.length); } _model = new GLMModel( GLM2.this, dest(), _dinfo, _glm, beta_epsilon, alpha[0], lambda_max, lambda, ymu, prior); _model.warnings = warns; _model.clone().delete_and_lock(self()); if (lambda[0] == lambda_max && alpha[0] > 0) { // fill-in trivial solution for lambda max _beta = MemoryManager.malloc8d(_dinfo.fullN() + 1); _beta[_beta.length - 1] = _glm.link(ymu) + _iceptAdjust; _model.setLambdaSubmodel(0, _beta, _beta, 0); if (lmaxt != null) _model.setAndTestValidation(0, lmaxt._val); _lambdaIdx = 1; } if (_lambdaIdx == lambda.length) // ran only with one lambda > lambda_max => return null model GLM2.this.complete(); // signal we're done to anyone waiting for the job else { ++_iter; if (lmaxt != null && strong_rules_enabled) activeCols(lambda[_lambdaIdx], lmaxt.lmax(), lmaxt.gradient(l2pen())); Log.info( "GLM2 staring GLM after " + (System.currentTimeMillis() - start) + "ms of preprocessing (mean/lmax/strong rules computation)"); new GLMIterationTask( GLM2.this, _activeData, _glm, true, false, false, null, _ymu = ymu, _reg = 1.0 / nobs, new Iteration()) .asyncExec(_activeData._adaptedFrame); } }