示例#1
0
文件: GLM2.java 项目: rohit2412/h2o
 private double[] setNewBeta(final double[] newBeta) {
   final double[] fullBeta;
   if (_activeCols != null) {
     fullBeta = MemoryManager.malloc8d(_dinfo.fullN() + 1);
     int j = 0;
     for (int i : _activeCols) fullBeta[i] = newBeta[j++];
     assert j == newBeta.length - 1;
     fullBeta[fullBeta.length - 1] = newBeta[j];
   } else {
     assert newBeta.length == _dinfo.fullN() + 1;
     fullBeta = newBeta;
   }
   final double[] newBetaDeNorm;
   if (_dinfo._standardize) {
     newBetaDeNorm = fullBeta.clone();
     double norm = 0.0; // Reverse any normalization on the intercept
     // denormalize only the numeric coefs (categoricals are not normalized)
     final int numoff = _dinfo.numStart();
     for (int i = numoff; i < fullBeta.length - 1; i++) {
       double b = newBetaDeNorm[i] * _dinfo._normMul[i - numoff];
       norm += b * _dinfo._normSub[i - numoff]; // Also accumulate the intercept adjustment
       newBetaDeNorm[i] = b;
     }
     newBetaDeNorm[newBetaDeNorm.length - 1] -= norm;
   } else newBetaDeNorm = null;
   _model.setLambdaSubmodel(
       _lambdaIdx,
       newBetaDeNorm == null ? fullBeta : newBetaDeNorm,
       newBetaDeNorm == null ? null : fullBeta,
       (_iter + 1));
   _model.clone().update(self());
   return fullBeta;
 }
示例#2
0
文件: GLM2.java 项目: jayfans3/h2o
 public GLM2(
     String desc,
     Key jobKey,
     Key dest,
     DataInfo dinfo,
     GLMParams glm,
     double[] lambda,
     double alpha,
     int nfolds,
     double betaEpsilon,
     Key parentJob,
     double[] beta,
     double proximalPenalty) {
   assert beta == null || beta.length == (dinfo.fullN() + 1)
       : "unexpected size of beta, got length " + beta.length + ", expected " + dinfo.fullN();
   job_key = jobKey;
   description = desc;
   destination_key = dest;
   beta_epsilon = betaEpsilon;
   _beta = beta;
   _dinfo = dinfo;
   _glm = glm;
   this.lambda = lambda;
   _beta = beta;
   if ((_proximalPenalty = proximalPenalty) != 0) _wgiven = beta;
   this.alpha = new double[] {alpha};
   n_folds = nfolds;
   source = dinfo._adaptedFrame;
   response = dinfo._adaptedFrame.lastVec();
   _jobName = dest.toString() + ((nfolds > 1) ? ("[" + dinfo._foldId + "]") : "");
 }
示例#3
0
 @Override
 protected float[] score0(Chunk[] chks, int row_in_chunk, double[] tmp, float[] preds) {
   double eta = 0.0;
   final double[] b = beta();
   if (!_parms.useAllFactorLvls) { // skip level 0 of all factors
     for (int i = 0; i < _dinfo._catOffsets.length - 1; ++i)
       if (chks[i].at0(row_in_chunk) != 0)
         eta += b[_dinfo._catOffsets[i] + (int) (chks[i].at0(row_in_chunk) - 1)];
   } else { // do not skip any levels!
     for (int i = 0; i < _dinfo._catOffsets.length - 1; ++i)
       eta += b[_dinfo._catOffsets[i] + (int) chks[i].at0(row_in_chunk)];
   }
   final int noff = _dinfo.numStart() - _dinfo._cats;
   for (int i = _dinfo._cats; i < b.length - 1 - noff; ++i)
     eta += b[noff + i] * chks[i].at0(row_in_chunk);
   eta += b[b.length - 1]; // add intercept
   double mu = _parms.linkInv(eta);
   preds[0] = (float) mu;
   if (_parms.family == Family.binomial) { // threshold for prediction
     if (Double.isNaN(mu)) {
       preds[0] = Float.NaN;
       preds[1] = Float.NaN;
       preds[2] = Float.NaN;
     } else {
       preds[0] = (mu >= _output._threshold ? 1 : 0);
       preds[1] = 1.0f - (float) mu; // class 0
       preds[2] = (float) mu; // class 1
     }
   }
   return preds;
 }
示例#4
0
 public GLMOutput(DataInfo dinfo, boolean binomial) {
   String[] cnames = dinfo.coefNames();
   String[] pnames = dinfo._adaptedFrame.names();
   _coefficient_names = Arrays.copyOf(cnames, cnames.length + 1);
   _coefficient_names[cnames.length] = "Intercept";
   _binomial = binomial;
 }
示例#5
0
文件: GLM2.java 项目: jayfans3/h2o
 public GLMGridSearch(
     int maxP,
     Key jobKey,
     Key dstKey,
     DataInfo dinfo,
     GLMParams glm,
     double[] lambdas,
     double[] alphas,
     int nfolds,
     double betaEpsilon) {
   super(jobKey, dstKey);
   description = "GLM Grid with params " + glm.toString() + "on data " + dinfo.toString();
   _maxParallelism = maxP;
   _jobs = new GLM2[alphas.length];
   _idx = new AtomicInteger(_maxParallelism);
   for (int i = 0; i < _jobs.length; ++i)
     _jobs[i] =
         new GLM2(
             "GLM grid(" + i + ")",
             self(),
             Key.make(dstKey.toString() + "_" + i),
             dinfo,
             glm,
             lambdas,
             alphas[i],
             nfolds,
             betaEpsilon,
             self());
 }
示例#6
0
文件: GLM2.java 项目: rohit2412/h2o
 private final double[] expandVec(double[] beta, final int[] activeCols) {
   if (activeCols == null) return beta;
   double[] res = MemoryManager.malloc8d(_dinfo.fullN() + 1);
   int i = 0;
   for (int c : activeCols) res[c] = beta[i++];
   res[res.length - 1] = beta[beta.length - 1];
   return res;
 }
示例#7
0
文件: GLM2.java 项目: rohit2412/h2o
 private final double[] resizeVec(
     double[] beta, final int[] activeCols, final int[] oldActiveCols) {
   if (activeCols == null || Arrays.equals(activeCols, oldActiveCols)) return beta;
   double[] full = MemoryManager.malloc8d(_dinfo.fullN() + 1);
   int i = 0;
   for (int c : oldActiveCols) full[c] = beta[i++];
   assert i == beta.length - 1;
   full[full.length - 1] = beta[i];
   return contractVec(full, activeCols);
 }
示例#8
0
文件: GLM2.java 项目: rohit2412/h2o
 // filter the current active columns using the strong rules
 // note: strong rules are update so tha they keep all previous coefficients in, to prevent issues
 // with line-search
 private int[] activeCols(final double l1, final double l2, final double[] grad) {
   final double rhs = alpha[0] * (2 * l1 - l2);
   int[] cols = MemoryManager.malloc4(_dinfo.fullN());
   int selected = 0;
   int j = 0;
   if (_activeCols == null) _activeCols = new int[] {-1};
   for (int i = 0; i < _dinfo.fullN(); ++i)
     if ((j < _activeCols.length && i == _activeCols[j]) || grad[i] > rhs || grad[i] < -rhs) {
       cols[selected++] = i;
       if (j < _activeCols.length && i == _activeCols[j]) ++j;
     }
   if (!strong_rules_enabled || selected == _dinfo.fullN()) {
     _activeCols = null;
     _activeData._adaptedFrame = _dinfo._adaptedFrame;
     _activeData = _dinfo;
   } else {
     _activeCols = Arrays.copyOf(cols, selected);
     _activeData = _dinfo.filterExpandedColumns(_activeCols);
   }
   Log.info(
       "GLM2 strong rule at lambda="
           + l1
           + ", got "
           + selected
           + " active cols out of "
           + _dinfo.fullN()
           + " total.");
   return _activeCols;
 }
示例#9
0
文件: GLM2.java 项目: jayfans3/h2o
  private void xvalidate(final GLMModel model, int lambdaIxd, final H2OCountedCompleter cmp) {
    final Key[] keys = new Key[n_folds];
    H2OCallback callback =
        new H2OCallback() {
          @Override
          public void callback(H2OCountedCompleter t) {
            try {
              GLMModel[] models = new GLMModel[keys.length];
              // we got the xval models, now compute their validations...
              for (int i = 0; i < models.length; ++i) models[i] = DKV.get(keys[i]).get();
              new GLMXValidationTask(model, _lambdaIdx, models, cmp)
                  .asyncExec(_dinfo._adaptedFrame);
            } catch (Throwable ex) {
              cmp.completeExceptionally(ex);
            }
          }

          @Override
          public boolean onExceptionalCompletion(Throwable ex, CountedCompleter caller) {
            cmp.completeExceptionally(ex);
            return true;
          }
        };
    callback.addToPendingCount(n_folds - 1);
    double proximal_penalty = 0;
    for (int i = 0; i < n_folds; ++i)
      new GLM2(
              this.description + "xval " + i,
              self(),
              keys[i] = Key.make(destination_key + "_" + _lambdaIdx + "_xval" + i),
              _dinfo.getFold(i, n_folds),
              _glm,
              new double[] {lambda[_lambdaIdx]},
              model.alpha,
              0,
              model.beta_eps,
              self(),
              model.norm_beta(lambdaIxd),
              proximal_penalty)
          .run(callback);
  }
示例#10
0
文件: GLM2.java 项目: jayfans3/h2o
 @Override
 protected Response serve() {
   init();
   link = family.defaultLink; // TODO
   tweedie_link_power = 1 - tweedie_variance_power; // TODO
   Frame fr =
       DataInfo.prepareFrame(source, response, ignored_cols, family == Family.binomial, true);
   _dinfo = new DataInfo(fr, 1, standardize);
   _glm = new GLMParams(family, tweedie_variance_power, link, tweedie_link_power);
   if (alpha.length > 1) { // grid search
     if (destination_key == null) destination_key = Key.make("GLMGridModel_" + Key.make());
     if (job_key == null) job_key = Key.make("GLMGridJob_" + Key.make());
     Job j = gridSearch(self(), destination_key, _dinfo, _glm, lambda, alpha, n_folds);
     return GLMGridView.redirect(this, j.dest());
   } else {
     if (destination_key == null) destination_key = Key.make("GLMModel_" + Key.make());
     if (job_key == null) job_key = Key.make("GLM2Job_" + Key.make());
     fork();
     return GLMProgress.redirect(this, job_key, dest());
   }
 }
示例#11
0
文件: GLM2.java 项目: rohit2412/h2o
  public void run(final boolean doLog) {
    if (doLog) logStart();
    System.out.println("running with " + _dinfo.fullN() + " predictors");
    _activeData = _dinfo;
    assert alpha.length == 1;
    start = System.currentTimeMillis();

    if (highAccuracy() || lambda_search) // shortcut for fast & simple mode
    new YMUTask(
              GLM2.this,
              _dinfo,
              new H2OCallback<YMUTask>(GLM2.this) {
                @Override
                public void callback(final YMUTask ymut) {
                  run(ymut.ymu(), ymut.nobs());
                }
              })
          .asyncExec(_dinfo._adaptedFrame);
    else {
      double ymu = _dinfo._adaptedFrame.lastVec().mean();
      run(ymu, _dinfo._adaptedFrame.numRows()); // shortcut for quick & simple
    }
  }
示例#12
0
文件: GLM2.java 项目: rohit2412/h2o
 @Override
 public void init() {
   super.init();
   if (lambda_search && lambda.length > 1)
     throw new IllegalArgumentException(
         "Can not supply both lambda_search and multiple lambdas. If lambda_search is on, GLM expects only one value of lambda, representing the lambda min (smallest lambda in the lambda search).");
   // check the response
   if (response.isEnum() && family != Family.binomial)
     throw new IllegalArgumentException(
         "Invalid response variable, trying to run regression with categorical response!");
   switch (family) {
     case poisson:
     case tweedie:
       if (response.min() < 0)
         throw new IllegalArgumentException(
             "Illegal response column for family='" + family + "', response must be >= 0.");
       break;
     case gamma:
       if (response.min() <= 0)
         throw new IllegalArgumentException(
             "Invalid response for family='Gamma', response must be > 0!");
       break;
     case binomial:
       if (response.min() < 0 || response.max() > 1)
         throw new IllegalArgumentException(
             "Illegal response column for family='Binomial', response must in <0,1> range!");
       break;
     default:
       // pass
   }
   Frame fr =
       DataInfo.prepareFrame(
           source, response, ignored_cols, family == Family.binomial, true, true);
   _dinfo = new DataInfo(fr, 1, use_all_factor_levels || lambda_search, standardize, false);
   if (higher_accuracy) setHighAccuracy();
 }
示例#13
0
文件: GLM2.java 项目: rohit2412/h2o
 private void xvalidate(final GLMModel model, int lambdaIxd, final H2OCountedCompleter cmp) {
   final Key[] keys = new Key[n_folds];
   GLM2[] glms = new GLM2[n_folds];
   for (int i = 0; i < n_folds; ++i)
     glms[i] =
         new GLM2(
             this.description + "xval " + i,
             self(),
             keys[i] = Key.make(destination_key + "_" + _lambdaIdx + "_xval" + i),
             _dinfo.getFold(i, n_folds),
             _glm,
             new double[] {lambda[_lambdaIdx]},
             model.alpha,
             0,
             model.beta_eps,
             self(),
             model.norm_beta(lambdaIxd),
             higher_accuracy,
             prior,
             0);
   H2O.submitTask(
       new ParallelGLMs(
           GLM2.this,
           glms,
           H2O.CLOUD.size(),
           new H2OCallback(GLM2.this) {
             @Override
             public void callback(H2OCountedCompleter t) {
               GLMModel[] models = new GLMModel[keys.length];
               // we got the xval models, now compute their validations...
               for (int i = 0; i < models.length; ++i) models[i] = DKV.get(keys[i]).get();
               new GLMXValidationTask(model, _lambdaIdx, models, cmp)
                   .asyncExec(_dinfo._adaptedFrame);
             }
           }));
 }
示例#14
0
文件: GLM2.java 项目: rohit2412/h2o
 private void run(final double ymu, final long nobs, LMAXTask lmaxt) {
   String[] warns = null;
   if ((!lambda_search || !strong_rules_enabled) && (_dinfo.fullN() > MAX_PREDICTORS))
     throw new IllegalArgumentException(
         "Too many predictors! GLM can only handle "
             + MAX_PREDICTORS
             + " predictors, got "
             + _dinfo.fullN()
             + ", try to run with strong_rules enabled.");
   if (lambda_search) {
     max_iter = Math.max(300, max_iter);
     assert lmaxt != null : "running lambda search, but don't know what is the lambda max!";
     final double lmax = lmaxt.lmax();
     final double lambda_min_ratio =
         _dinfo._adaptedFrame.numRows() > _dinfo.fullN() ? 0.0001 : 0.01;
     final double d = Math.pow(lambda_min_ratio, 0.01);
     lambda = new double[100];
     lambda[0] = lmax;
     for (int i = 1; i < lambda.length; ++i) lambda[i] = lambda[i - 1] * d;
     _runAllLambdas = false;
   } else if (alpha[0] > 0
       && lmaxt
           != null) { // make sure we start with lambda max (and discard all lambda > lambda max)
     final double lmax = lmaxt.lmax();
     int i = 0;
     while (i < lambda.length && lambda[i] > lmax) ++i;
     if (i != 0) {
       Log.info(
           "GLM: removing "
               + i
               + " lambdas > lambda_max: "
               + Arrays.toString(Arrays.copyOf(lambda, i)));
       warns =
           i == lambda.length
               ? new String[] {
                 "Removed " + i + " lambdas > lambda_max",
                 "No lambdas < lambda_max, returning null model."
               }
               : new String[] {"Removed " + i + " lambdas > lambda_max"};
     }
     lambda =
         i == lambda.length
             ? new double[] {lambda_max}
             : Arrays.copyOfRange(lambda, i, lambda.length);
   }
   _model =
       new GLMModel(
           GLM2.this,
           dest(),
           _dinfo,
           _glm,
           beta_epsilon,
           alpha[0],
           lambda_max,
           lambda,
           ymu,
           prior);
   _model.warnings = warns;
   _model.clone().delete_and_lock(self());
   if (lambda[0] == lambda_max && alpha[0] > 0) { // fill-in trivial solution for lambda max
     _beta = MemoryManager.malloc8d(_dinfo.fullN() + 1);
     _beta[_beta.length - 1] = _glm.link(ymu) + _iceptAdjust;
     _model.setLambdaSubmodel(0, _beta, _beta, 0);
     if (lmaxt != null) _model.setAndTestValidation(0, lmaxt._val);
     _lambdaIdx = 1;
   }
   if (_lambdaIdx == lambda.length) // ran only with one lambda > lambda_max => return null model
   GLM2.this.complete(); // signal we're done to anyone waiting for the job
   else {
     ++_iter;
     if (lmaxt != null && strong_rules_enabled)
       activeCols(lambda[_lambdaIdx], lmaxt.lmax(), lmaxt.gradient(l2pen()));
     Log.info(
         "GLM2 staring GLM after "
             + (System.currentTimeMillis() - start)
             + "ms of preprocessing (mean/lmax/strong rules computation)");
     new GLMIterationTask(
             GLM2.this,
             _activeData,
             _glm,
             true,
             false,
             false,
             null,
             _ymu = ymu,
             _reg = 1.0 / nobs,
             new Iteration())
         .asyncExec(_activeData._adaptedFrame);
   }
 }