Beispiel #1
0
 public GLMGridSearch(
     int maxP,
     Key jobKey,
     Key dstKey,
     DataInfo dinfo,
     GLMParams glm,
     double[] lambdas,
     double[] alphas,
     int nfolds,
     double betaEpsilon) {
   super(jobKey, dstKey);
   description = "GLM Grid with params " + glm.toString() + "on data " + dinfo.toString();
   _maxParallelism = maxP;
   _jobs = new GLM2[alphas.length];
   _idx = new AtomicInteger(_maxParallelism);
   for (int i = 0; i < _jobs.length; ++i)
     _jobs[i] =
         new GLM2(
             "GLM grid(" + i + ")",
             self(),
             Key.make(dstKey.toString() + "_" + i),
             dinfo,
             glm,
             lambdas,
             alphas[i],
             nfolds,
             betaEpsilon,
             self());
 }
Beispiel #2
0
 public void add(double yreal, double ymodel) {
   null_deviance += _glm.deviance(yreal, _ymu);
   if (_glm.family == Family.binomial) // classification -> update confusion matrix too
   for (int i = 0; i < DEFAULT_THRESHOLDS.length; ++i)
       _cms[i].add((int) yreal, (ymodel >= DEFAULT_THRESHOLDS[i]) ? 1 : 0);
   if (Double.isNaN(_glm.deviance(yreal, ymodel)))
     System.out.println("NaN from yreal=" + yreal + ", ymodel=" + ymodel);
   residual_deviance += _glm.deviance(yreal, ymodel);
   ++nobs;
   avg_err += (ymodel - yreal) * (ymodel - yreal);
   if (_glm.family == Family.poisson) { // aic for poisson
     long y = Math.round(yreal);
     double logfactorial = 0;
     for (long i = 2; i <= y; ++i) logfactorial += Math.log(i);
     _aic2 += (yreal * Math.log(ymodel) - logfactorial - ymodel);
   }
 }
Beispiel #3
0
 private void run(final double ymu, final long nobs, LMAXTask lmaxt) {
   String[] warns = null;
   if ((!lambda_search || !strong_rules_enabled) && (_dinfo.fullN() > MAX_PREDICTORS))
     throw new IllegalArgumentException(
         "Too many predictors! GLM can only handle "
             + MAX_PREDICTORS
             + " predictors, got "
             + _dinfo.fullN()
             + ", try to run with strong_rules enabled.");
   if (lambda_search) {
     max_iter = Math.max(300, max_iter);
     assert lmaxt != null : "running lambda search, but don't know what is the lambda max!";
     final double lmax = lmaxt.lmax();
     final double lambda_min_ratio =
         _dinfo._adaptedFrame.numRows() > _dinfo.fullN() ? 0.0001 : 0.01;
     final double d = Math.pow(lambda_min_ratio, 0.01);
     lambda = new double[100];
     lambda[0] = lmax;
     for (int i = 1; i < lambda.length; ++i) lambda[i] = lambda[i - 1] * d;
     _runAllLambdas = false;
   } else if (alpha[0] > 0
       && lmaxt
           != null) { // make sure we start with lambda max (and discard all lambda > lambda max)
     final double lmax = lmaxt.lmax();
     int i = 0;
     while (i < lambda.length && lambda[i] > lmax) ++i;
     if (i != 0) {
       Log.info(
           "GLM: removing "
               + i
               + " lambdas > lambda_max: "
               + Arrays.toString(Arrays.copyOf(lambda, i)));
       warns =
           i == lambda.length
               ? new String[] {
                 "Removed " + i + " lambdas > lambda_max",
                 "No lambdas < lambda_max, returning null model."
               }
               : new String[] {"Removed " + i + " lambdas > lambda_max"};
     }
     lambda =
         i == lambda.length
             ? new double[] {lambda_max}
             : Arrays.copyOfRange(lambda, i, lambda.length);
   }
   _model =
       new GLMModel(
           GLM2.this,
           dest(),
           _dinfo,
           _glm,
           beta_epsilon,
           alpha[0],
           lambda_max,
           lambda,
           ymu,
           prior);
   _model.warnings = warns;
   _model.clone().delete_and_lock(self());
   if (lambda[0] == lambda_max && alpha[0] > 0) { // fill-in trivial solution for lambda max
     _beta = MemoryManager.malloc8d(_dinfo.fullN() + 1);
     _beta[_beta.length - 1] = _glm.link(ymu) + _iceptAdjust;
     _model.setLambdaSubmodel(0, _beta, _beta, 0);
     if (lmaxt != null) _model.setAndTestValidation(0, lmaxt._val);
     _lambdaIdx = 1;
   }
   if (_lambdaIdx == lambda.length) // ran only with one lambda > lambda_max => return null model
   GLM2.this.complete(); // signal we're done to anyone waiting for the job
   else {
     ++_iter;
     if (lmaxt != null && strong_rules_enabled)
       activeCols(lambda[_lambdaIdx], lmaxt.lmax(), lmaxt.gradient(l2pen()));
     Log.info(
         "GLM2 staring GLM after "
             + (System.currentTimeMillis() - start)
             + "ms of preprocessing (mean/lmax/strong rules computation)");
     new GLMIterationTask(
             GLM2.this,
             _activeData,
             _glm,
             true,
             false,
             false,
             null,
             _ymu = ymu,
             _reg = 1.0 / nobs,
             new Iteration())
         .asyncExec(_activeData._adaptedFrame);
   }
 }