예제 #1
0
파일: GLM2.java 프로젝트: jayfans3/h2o
 @Override
 public float progress() {
   if (DKV.get(dest()) == null) return 0;
   GLMModel m = DKV.get(dest()).get();
   float progress = (float) m.iteration() / (float) max_iter; // TODO, do something smarter here
   return progress;
 }
예제 #2
0
파일: GLM2.java 프로젝트: jayfans3/h2o
  public void nextLambda(final GLMIterationTask glmt) {
    // We're done with this lambda, launch validation
    H2OCallback fin =
        new H2OCallback<GLMValidationTask>() {
          @Override
          public void callback(GLMValidationTask tsk) {
            boolean improved = _model.setAndTestValidation(_lambdaIdx, tsk._res);
            _model.clone().update(self());
            if ((improved || _runAllLambdas)
                && _lambdaIdx < (lambda.length - 1)) { // continue with next lambda value?
              glmt._val = null;
              ++_lambdaIdx;
              new Iteration().callback(glmt);
            } else // nope, we're done
            GLM2.this.complete(); // signal we're done to anyone waiting for the job
          }

          @Override
          public boolean onExceptionalCompletion(Throwable ex, CountedCompleter cc) {
            GLM2.this.cancel(ex);
            return true;
          }
        };
    if (GLM2.this.n_folds >= 2) xvalidate(_model.clone(), _lambdaIdx, fin);
    else new GLMValidationTask(_model.clone(), _lambdaIdx, fin).asyncExec(_dinfo._adaptedFrame);
  }
예제 #3
0
파일: GLM2.java 프로젝트: rohit2412/h2o
 private double[] setNewBeta(final double[] newBeta) {
   final double[] fullBeta;
   if (_activeCols != null) {
     fullBeta = MemoryManager.malloc8d(_dinfo.fullN() + 1);
     int j = 0;
     for (int i : _activeCols) fullBeta[i] = newBeta[j++];
     assert j == newBeta.length - 1;
     fullBeta[fullBeta.length - 1] = newBeta[j];
   } else {
     assert newBeta.length == _dinfo.fullN() + 1;
     fullBeta = newBeta;
   }
   final double[] newBetaDeNorm;
   if (_dinfo._standardize) {
     newBetaDeNorm = fullBeta.clone();
     double norm = 0.0; // Reverse any normalization on the intercept
     // denormalize only the numeric coefs (categoricals are not normalized)
     final int numoff = _dinfo.numStart();
     for (int i = numoff; i < fullBeta.length - 1; i++) {
       double b = newBetaDeNorm[i] * _dinfo._normMul[i - numoff];
       norm += b * _dinfo._normSub[i - numoff]; // Also accumulate the intercept adjustment
       newBetaDeNorm[i] = b;
     }
     newBetaDeNorm[newBetaDeNorm.length - 1] -= norm;
   } else newBetaDeNorm = null;
   _model.setLambdaSubmodel(
       _lambdaIdx,
       newBetaDeNorm == null ? fullBeta : newBetaDeNorm,
       newBetaDeNorm == null ? null : fullBeta,
       (_iter + 1));
   _model.clone().update(self());
   return fullBeta;
 }
예제 #4
0
 @Override
 public void map(GLMModel m) {
   _res = (GLMModel) m.clone();
   _res._output = (GLMOutput) _res._output.clone();
   Submodel sm =
       Double.isNaN(_lambda)
           ? _res._output._submodels[_res._output._best_lambda_idx]
           : _res._output.submodelForLambda(_lambda);
   assert sm != null : "GLM[" + m._key + "]: missing submodel for lambda " + _lambda;
   sm = (Submodel) sm.clone();
   _res._output._submodels = new Submodel[] {sm};
   _res._output.setSubmodelIdx(0);
 }
예제 #5
0
파일: GLM2.java 프로젝트: jayfans3/h2o
 protected void complete() {
   if (_addedL2 > 0) {
     String warn = "Added L2 penalty (rho = " + _addedL2 + ")  due to non-spd matrix. ";
     if (_model.warnings == null || _model.warnings.length == 0)
       _model.warnings = new String[] {warn};
     else {
       _model.warnings = Arrays.copyOf(_model.warnings, _model.warnings.length + 1);
       _model.warnings[_model.warnings.length - 1] = warn;
     }
     _model.update(self());
   }
   _model.unlock(self());
   if (_dinfo._nfolds == 0) remove(); // Remove/complete job only for top-level, not xval GLM2s
   if (_fjtask != null) _fjtask.tryComplete();
 }
예제 #6
0
파일: GLM2.java 프로젝트: rohit2412/h2o
 @Override
 public void cancel(Throwable ex) {
   if (isCancelledOrCrashed()) return;
   if (_model != null) _model.unlock(self());
   if (ex instanceof JobCancelledException) {
     if (!isCancelledOrCrashed()) cancel();
   } else super.cancel(ex);
 }
예제 #7
0
 public GLMModelV3 make_model(int version, MakeGLMModelV3 args) {
   GLMModel model = DKV.getGet(args.model.key());
   if (model == null) throw new IllegalArgumentException("missing source model " + args.model);
   String[] names = model._output.coefficientNames();
   Map<String, Double> coefs = model.coefficients();
   for (int i = 0; i < args.names.length; ++i) coefs.put(args.names[i], args.beta[i]);
   double[] beta = model.beta().clone();
   for (int i = 0; i < beta.length; ++i) beta[i] = coefs.get(names[i]);
   GLMModel m =
       new GLMModel(
           args.dest != null ? args.dest.key() : Key.make(),
           model._parms,
           null,
           new double[] {.5},
           Double.NaN,
           Double.NaN,
           -1);
   DataInfo dinfo = model.dinfo();
   dinfo.setPredictorTransform(TransformType.NONE);
   // GLMOutput(DataInfo dinfo, String[] column_names, String[][] domains, String[]
   // coefficient_names, boolean binomial) {
   m._output =
       new GLMOutput(
           model.dinfo(),
           model._output._names,
           model._output._domains,
           model._output.coefficientNames(),
           model._output._binomial,
           beta);
   DKV.put(m._key, m);
   GLMModelV3 res = new GLMModelV3();
   res.fillFromImpl(m);
   return res;
 }
예제 #8
0
 public GLMXValidation(GLMModel mainModel, GLMModel[] xvalModels, int lambdaIdx, long nobs) {
   super(mainModel._dataKey, mainModel.ymu, mainModel.glm, mainModel.rank(lambdaIdx));
   xval_models = new Key[xvalModels.length];
   for (int i = 0; i < xvalModels.length; ++i) {
     add(xvalModels[i].validation());
     xval_models[i] = xvalModels[i]._key;
   }
   this.nobs = nobs;
   finalize_AIC_AUC();
 }
예제 #9
0
파일: GLM2.java 프로젝트: rohit2412/h2o
 protected void nextLambda(final GLMIterationTask glmt, GLMValidation val) {
   currentLambdaIter = 0;
   boolean improved = _model.setAndTestValidation(_lambdaIdx, val);
   _model.clone().update(self());
   boolean done = false; // _iter < max_iter && (improved || _runAllLambdas) && _lambdaIdx <
   // (lambda.length-1);
   if (_iter == max_iter) {
     Log.info("GLM2 reached max #iterations.");
     done = true;
   } else if (!improved && !_runAllLambdas) {
     Log.info("GLM2 converged as solution stopped improving with decreasing lambda.");
     done = true;
   } else if (_lambdaIdx == lambda.length - 1) {
     Log.info("GLM2 done with all given lambdas.");
     done = true;
   } else if (_activeCols != null && _activeCols.length + 1 >= MAX_PREDICTORS) {
     Log.info(
         "GLM2 reached maximum allowed number of predictors at lambda = " + lambda[_lambdaIdx]);
     done = true;
   }
   if (!done) { // continue with next lambda value?
     ++_lambdaIdx;
     glmt._val = null;
     if (glmt._gram == null) { // assume we had lambda search with strong rules
       // we use strong rules so we can't really used this gram for the next lambda computation
       // (different sets of coefficients)
       // I expect that:
       //  1) beta has been expanded to match current set of active cols
       //  2) it is new GLMIteration ready to be launched
       // caller (nextLambda(glmt,beta)) is expected to ensure this...
       assert _activeCols == null || (glmt._beta.length == _activeCols.length + 1);
       assert !glmt.isDone();
       glmt.asyncExec(_activeData._adaptedFrame);
     } else // we have the right gram, just solve with with next lambda
     new Iteration().callback(glmt);
   } else // nope, we're done
   GLM2.this.complete(); // signal we're done to anyone waiting for the job
 }
예제 #10
0
파일: GLM2.java 프로젝트: jayfans3/h2o
  private void xvalidate(final GLMModel model, int lambdaIxd, final H2OCountedCompleter cmp) {
    final Key[] keys = new Key[n_folds];
    H2OCallback callback =
        new H2OCallback() {
          @Override
          public void callback(H2OCountedCompleter t) {
            try {
              GLMModel[] models = new GLMModel[keys.length];
              // we got the xval models, now compute their validations...
              for (int i = 0; i < models.length; ++i) models[i] = DKV.get(keys[i]).get();
              new GLMXValidationTask(model, _lambdaIdx, models, cmp)
                  .asyncExec(_dinfo._adaptedFrame);
            } catch (Throwable ex) {
              cmp.completeExceptionally(ex);
            }
          }

          @Override
          public boolean onExceptionalCompletion(Throwable ex, CountedCompleter caller) {
            cmp.completeExceptionally(ex);
            return true;
          }
        };
    callback.addToPendingCount(n_folds - 1);
    double proximal_penalty = 0;
    for (int i = 0; i < n_folds; ++i)
      new GLM2(
              this.description + "xval " + i,
              self(),
              keys[i] = Key.make(destination_key + "_" + _lambdaIdx + "_xval" + i),
              _dinfo.getFold(i, n_folds),
              _glm,
              new double[] {lambda[_lambdaIdx]},
              model.alpha,
              0,
              model.beta_eps,
              self(),
              model.norm_beta(lambdaIxd),
              proximal_penalty)
          .run(callback);
  }
예제 #11
0
파일: GLM2.java 프로젝트: rohit2412/h2o
 private void xvalidate(final GLMModel model, int lambdaIxd, final H2OCountedCompleter cmp) {
   final Key[] keys = new Key[n_folds];
   GLM2[] glms = new GLM2[n_folds];
   for (int i = 0; i < n_folds; ++i)
     glms[i] =
         new GLM2(
             this.description + "xval " + i,
             self(),
             keys[i] = Key.make(destination_key + "_" + _lambdaIdx + "_xval" + i),
             _dinfo.getFold(i, n_folds),
             _glm,
             new double[] {lambda[_lambdaIdx]},
             model.alpha,
             0,
             model.beta_eps,
             self(),
             model.norm_beta(lambdaIxd),
             higher_accuracy,
             prior,
             0);
   H2O.submitTask(
       new ParallelGLMs(
           GLM2.this,
           glms,
           H2O.CLOUD.size(),
           new H2OCallback(GLM2.this) {
             @Override
             public void callback(H2OCountedCompleter t) {
               GLMModel[] models = new GLMModel[keys.length];
               // we got the xval models, now compute their validations...
               for (int i = 0; i < models.length; ++i) models[i] = DKV.get(keys[i]).get();
               new GLMXValidationTask(model, _lambdaIdx, models, cmp)
                   .asyncExec(_dinfo._adaptedFrame);
             }
           }));
 }
예제 #12
0
파일: GLMModel.java 프로젝트: vsynych/h2o-3
 public GLMModel clone2() {
   GLMModel res = clone();
   res._output = (GLMOutput) res._output.clone();
   return res;
 }
예제 #13
0
 @Override
 protected void map(GLMModel glmModel) {
   glmModel._output.pickBestModel(false);
   glmModel.update(_jobKey);
   glmModel.unlock(_jobKey);
 }
예제 #14
0
파일: GLM2.java 프로젝트: jayfans3/h2o
 @Override
 public void cancel(Throwable ex) {
   if (_model != null) _model.unlock(self());
   if (ex instanceof JobCancelledException) cancel();
   else super.cancel(ex);
 }
예제 #15
0
파일: GLM2.java 프로젝트: rohit2412/h2o
 private void run(final double ymu, final long nobs, LMAXTask lmaxt) {
   String[] warns = null;
   if ((!lambda_search || !strong_rules_enabled) && (_dinfo.fullN() > MAX_PREDICTORS))
     throw new IllegalArgumentException(
         "Too many predictors! GLM can only handle "
             + MAX_PREDICTORS
             + " predictors, got "
             + _dinfo.fullN()
             + ", try to run with strong_rules enabled.");
   if (lambda_search) {
     max_iter = Math.max(300, max_iter);
     assert lmaxt != null : "running lambda search, but don't know what is the lambda max!";
     final double lmax = lmaxt.lmax();
     final double lambda_min_ratio =
         _dinfo._adaptedFrame.numRows() > _dinfo.fullN() ? 0.0001 : 0.01;
     final double d = Math.pow(lambda_min_ratio, 0.01);
     lambda = new double[100];
     lambda[0] = lmax;
     for (int i = 1; i < lambda.length; ++i) lambda[i] = lambda[i - 1] * d;
     _runAllLambdas = false;
   } else if (alpha[0] > 0
       && lmaxt
           != null) { // make sure we start with lambda max (and discard all lambda > lambda max)
     final double lmax = lmaxt.lmax();
     int i = 0;
     while (i < lambda.length && lambda[i] > lmax) ++i;
     if (i != 0) {
       Log.info(
           "GLM: removing "
               + i
               + " lambdas > lambda_max: "
               + Arrays.toString(Arrays.copyOf(lambda, i)));
       warns =
           i == lambda.length
               ? new String[] {
                 "Removed " + i + " lambdas > lambda_max",
                 "No lambdas < lambda_max, returning null model."
               }
               : new String[] {"Removed " + i + " lambdas > lambda_max"};
     }
     lambda =
         i == lambda.length
             ? new double[] {lambda_max}
             : Arrays.copyOfRange(lambda, i, lambda.length);
   }
   _model =
       new GLMModel(
           GLM2.this,
           dest(),
           _dinfo,
           _glm,
           beta_epsilon,
           alpha[0],
           lambda_max,
           lambda,
           ymu,
           prior);
   _model.warnings = warns;
   _model.clone().delete_and_lock(self());
   if (lambda[0] == lambda_max && alpha[0] > 0) { // fill-in trivial solution for lambda max
     _beta = MemoryManager.malloc8d(_dinfo.fullN() + 1);
     _beta[_beta.length - 1] = _glm.link(ymu) + _iceptAdjust;
     _model.setLambdaSubmodel(0, _beta, _beta, 0);
     if (lmaxt != null) _model.setAndTestValidation(0, lmaxt._val);
     _lambdaIdx = 1;
   }
   if (_lambdaIdx == lambda.length) // ran only with one lambda > lambda_max => return null model
   GLM2.this.complete(); // signal we're done to anyone waiting for the job
   else {
     ++_iter;
     if (lmaxt != null && strong_rules_enabled)
       activeCols(lambda[_lambdaIdx], lmaxt.lmax(), lmaxt.gradient(l2pen()));
     Log.info(
         "GLM2 staring GLM after "
             + (System.currentTimeMillis() - start)
             + "ms of preprocessing (mean/lmax/strong rules computation)");
     new GLMIterationTask(
             GLM2.this,
             _activeData,
             _glm,
             true,
             false,
             false,
             null,
             _ymu = ymu,
             _reg = 1.0 / nobs,
             new Iteration())
         .asyncExec(_activeData._adaptedFrame);
   }
 }