コード例 #1
0
ファイル: PCAModelView.java プロジェクト: Jfeng3/h2o
 @Override
 public boolean toHTML(StringBuilder sb) {
   pca_model.generateHTML("", sb);
   return true;
 }
コード例 #2
0
ファイル: PCA.java プロジェクト: liangexiang/h2o-3
    // Main worker thread
    @Override
    protected void compute2() {
      PCAModel model = null;
      DataInfo dinfo = null;
      DataInfo xinfo = null;
      Frame x = null;

      try {
        init(true); // Initialize parameters
        _parms.read_lock_frames(PCA.this); // Fetch & read-lock input frames
        if (error_count() > 0)
          throw new IllegalArgumentException("Found validation errors: " + validationErrors());

        // The model to be built
        model = new PCAModel(dest(), _parms, new PCAModel.PCAOutput(PCA.this));
        model.delete_and_lock(_key);

        if (_parms._pca_method == PCAParameters.Method.GramSVD) {
          dinfo =
              new DataInfo(
                  Key.make(),
                  _train,
                  null,
                  0,
                  _parms._use_all_factor_levels,
                  _parms._transform,
                  DataInfo.TransformType.NONE,
                  /* skipMissing */ true, /* missingBucket */
                  false, /* weights */
                  false, /* offset */
                  false, /* intercept */
                  false);
          DKV.put(dinfo._key, dinfo);

          // Calculate and save Gram matrix of training data
          // NOTE: Gram computes A'A/n where n = nrow(A) = number of rows in training set (excluding
          // rows with NAs)
          GramTask gtsk = new Gram.GramTask(self(), dinfo).doAll(dinfo._adaptedFrame);
          Gram gram =
              gtsk._gram; // TODO: This ends up with all NaNs if training data has too many missing
          // values
          assert gram.fullN() == _ncolExp;

          // Compute SVD of Gram A'A/n using JAMA library
          // Note: Singular values ordered in weakly descending order by algorithm
          Matrix gramJ = new Matrix(gtsk._gram.getXX());
          SingularValueDecomposition svdJ = gramJ.svd();
          computeStatsFillModel(model, dinfo, svdJ, gram, gtsk._nobs);

        } else if (_parms._pca_method == PCAParameters.Method.Power) {
          SVDModel.SVDParameters parms = new SVDModel.SVDParameters();
          parms._train = _parms._train;
          parms._ignored_columns = _parms._ignored_columns;
          parms._ignore_const_cols = _parms._ignore_const_cols;
          parms._score_each_iteration = _parms._score_each_iteration;
          parms._use_all_factor_levels = _parms._use_all_factor_levels;
          parms._transform = _parms._transform;
          parms._nv = _parms._k;
          parms._max_iterations = _parms._max_iterations;
          parms._seed = _parms._seed;

          // Calculate standard deviation and projection as well
          parms._only_v = false;
          parms._u_name = _parms._loading_name;
          parms._keep_u = _parms._keep_loading;

          SVDModel svd = null;
          SVD job = null;
          try {
            job = new EmbeddedSVD(_key, _progressKey, parms);
            svd = job.trainModel().get();
            if (job.isCancelledOrCrashed()) PCA.this.cancel();
          } finally {
            if (job != null) job.remove();
            if (svd != null) svd.remove();
          }
          // Recover PCA results from SVD model
          computeStatsFillModel(model, svd);

        } else if (_parms._pca_method == PCAParameters.Method.GLRM) {
          GLRMModel.GLRMParameters parms = new GLRMModel.GLRMParameters();
          parms._train = _parms._train;
          parms._ignored_columns = _parms._ignored_columns;
          parms._ignore_const_cols = _parms._ignore_const_cols;
          parms._score_each_iteration = _parms._score_each_iteration;
          parms._transform = _parms._transform;
          parms._k = _parms._k;
          parms._max_iterations = _parms._max_iterations;
          parms._seed = _parms._seed;

          parms._recover_svd = true;
          parms._loss = GLRMModel.GLRMParameters.Loss.L2;
          parms._gamma_x = 0;
          parms._gamma_y = 0;

          GLRMModel glrm = null;
          GLRM job = null;
          try {
            job = new EmbeddedGLRM(_key, _progressKey, parms);
            glrm = job.trainModel().get();
            if (job.isCancelledOrCrashed()) PCA.this.cancel();
          } finally {
            if (job != null) job.remove();
            if (glrm != null) {
              glrm._parms._loading_key.get().delete();
              glrm.remove();
            }
          }
          // Recover PCA results from GLRM model
          computeStatsFillModel(model, glrm);
        }

        model.update(self());
        update(1);
        done();
      } catch (Throwable t) {
        Job thisJob = DKV.getGet(_key);
        if (thisJob._state == JobState.CANCELLED) {
          Log.info("Job cancelled by user.");
        } else {
          t.printStackTrace();
          failed(t);
          throw t;
        }
      } finally {
        _parms.read_unlock_frames(PCA.this);
        if (model != null) model.unlock(_key);
        if (dinfo != null) dinfo.remove();
        if (xinfo != null) xinfo.remove();
        if (x != null && !_parms._keep_loading) x.delete();
      }
      tryComplete();
    }