Пример #1
0
    // Stopping criteria
    boolean isDone(KMeansModel model, double[][] newCenters, double[][] oldCenters) {
      if (!isRunning()) return true; // Stopped/cancelled
      // Stopped for running out iterations
      if (model._output._iterations >= _parms._max_iterations) return true;

      // Compute average change in standardized cluster centers
      if (oldCenters == null) return false; // No prior iteration, not stopping
      double average_change = 0;
      for (int clu = 0; clu < _parms._k; clu++)
        average_change +=
            hex.genmodel.GenModel.KMeans_distance(
                oldCenters[clu], newCenters[clu], _isCats, null, null);
      average_change /= _parms._k; // Average change per cluster
      model._output._avg_centroids_chg =
          ArrayUtils.copyAndFillOf(
              model._output._avg_centroids_chg,
              model._output._avg_centroids_chg.length + 1,
              average_change);
      model._output._training_time_ms =
          ArrayUtils.copyAndFillOf(
              model._output._training_time_ms,
              model._output._training_time_ms.length + 1,
              System.currentTimeMillis());
      return average_change < TOLERANCE;
    }
    protected void calcCounts(CoxPHModel model, final CoxPHTask coxMR) {
      CoxPHModel.CoxPHParameters p = model._parms;
      CoxPHModel.CoxPHOutput o = model._output;

      o.n_missing = o.n - coxMR.n;
      o.n = coxMR.n;
      for (int j = 0; j < o.x_mean_cat.length; j++)
        o.x_mean_cat[j] = coxMR.sumWeightedCatX[j] / coxMR.sumWeights;
      for (int j = 0; j < o.x_mean_num.length; j++)
        o.x_mean_num[j] = coxMR.dinfo()._normSub[j] + coxMR.sumWeightedNumX[j] / coxMR.sumWeights;
      System.arraycopy(
          coxMR.dinfo()._normSub, o.x_mean_num.length, o.mean_offset, 0, o.mean_offset.length);
      int nz = 0;
      for (int t = 0; t < coxMR.countEvents.length; ++t) {
        o.total_event += coxMR.countEvents[t];
        if (coxMR.sizeEvents[t] > 0 || coxMR.sizeCensored[t] > 0) {
          o.time[nz] = o.min_time + t;
          o.n_risk[nz] = coxMR.sizeRiskSet[t];
          o.n_event[nz] = coxMR.sizeEvents[t];
          o.n_censor[nz] = coxMR.sizeCensored[t];
          nz++;
        }
      }
      if (p.start_column == null)
        for (int t = o.n_risk.length - 2; t >= 0; --t) o.n_risk[t] += o.n_risk[t + 1];
    }
    protected void initStats(final CoxPHModel model, final DataInfo dinfo) {
      CoxPHModel.CoxPHParameters p = model._parms;
      CoxPHModel.CoxPHOutput o = model._output;

      o.n = p.stop_column.length();
      o.data_info = dinfo;
      final int n_offsets = (p.offset_columns == null) ? 0 : p.offset_columns.length;
      final int n_coef = o.data_info.fullN() - n_offsets;
      final String[] coefNames = o.data_info.coefNames();
      o.coef_names = new String[n_coef];
      System.arraycopy(coefNames, 0, o.coef_names, 0, n_coef);
      o.coef = MemoryManager.malloc8d(n_coef);
      o.exp_coef = MemoryManager.malloc8d(n_coef);
      o.exp_neg_coef = MemoryManager.malloc8d(n_coef);
      o.se_coef = MemoryManager.malloc8d(n_coef);
      o.z_coef = MemoryManager.malloc8d(n_coef);
      o.gradient = MemoryManager.malloc8d(n_coef);
      o.hessian = malloc2DArray(n_coef, n_coef);
      o.var_coef = malloc2DArray(n_coef, n_coef);
      o.x_mean_cat = MemoryManager.malloc8d(n_coef - (o.data_info._nums - n_offsets));
      o.x_mean_num = MemoryManager.malloc8d(o.data_info._nums - n_offsets);
      o.mean_offset = MemoryManager.malloc8d(n_offsets);
      o.offset_names = new String[n_offsets];
      System.arraycopy(coefNames, n_coef, o.offset_names, 0, n_offsets);

      final Vec start_column = p.start_column;
      final Vec stop_column = p.stop_column;
      o.min_time =
          p.start_column == null ? (long) stop_column.min() : (long) start_column.min() + 1;
      o.max_time = (long) stop_column.max();

      final int n_time = new Vec.CollectDomain().doAll(stop_column).domain().length;
      o.time = MemoryManager.malloc8(n_time);
      o.n_risk = MemoryManager.malloc8d(n_time);
      o.n_event = MemoryManager.malloc8d(n_time);
      o.n_censor = MemoryManager.malloc8d(n_time);
      o.cumhaz_0 = MemoryManager.malloc8d(n_time);
      o.var_cumhaz_1 = MemoryManager.malloc8d(n_time);
      o.var_cumhaz_2 = malloc2DArray(n_time, n_coef);
    }
Пример #4
0
 private static double[][] destandardize(
     double[][] centers, String[][] isCats, double[] means, double[] mults) {
   int K = centers.length;
   int N = centers[0].length;
   double[][] value = new double[K][N];
   for (int clu = 0; clu < K; clu++) {
     System.arraycopy(centers[clu], 0, value[clu], 0, N);
     if (mults != null) { // Reverse standardization
       for (int col = 0; col < N; col++)
         if (isCats[col] == null) value[clu][col] = value[clu][col] / mults[col] + means[col];
     }
   }
   return value;
 }
Пример #5
0
  public static class PCAParameters extends Model.Parameters {
    public DataInfo.TransformType _transform = DataInfo.TransformType.NONE; // Data transformation
    public Method _pca_method = Method.GramSVD; // Method for computing PCA
    public int _k = 1; // Number of principal components
    public int _max_iterations = 1000; // Max iterations
    public long _seed = System.nanoTime(); // RNG seed
    // public Key<Frame> _loading_key;
    public String _loading_name; // Loading only generated if pca_method = Power
    public boolean _keep_loading = true;
    public boolean _use_all_factor_levels =
        false; // When expanding categoricals, should first level be kept or dropped?
    public boolean _compute_metrics =
        true; // Should a second pass be made through data to compute metrics?

    public enum Method {
      GramSVD,
      Power,
      GLRM
    }
  }
Пример #6
0
    /**
     * Train a Deep Learning neural net model
     *
     * @param model Input model (e.g., from initModel(), or from a previous training run)
     * @return Trained model
     */
    public final DeepLearningModel trainModel(DeepLearningModel model) {
      Frame validScoreFrame = null;
      Frame train, trainScoreFrame;
      try {
        //      if (checkpoint == null && !quiet_mode) logStart(); //if checkpoint is given, some
        // Job's params might be uninitialized (but the restarted model's parameters are correct)
        if (model == null) {
          model = DKV.get(dest()).get();
        }
        Log.info(
            "Model category: "
                + (_parms._autoencoder
                    ? "Auto-Encoder"
                    : isClassifier() ? "Classification" : "Regression"));
        final long model_size = model.model_info().size();
        Log.info(
            "Number of model parameters (weights/biases): " + String.format("%,d", model_size));
        model.write_lock(_job);
        _job.update(0, "Setting up training data...");
        final DeepLearningParameters mp = model.model_info().get_params();

        // temporary frames of the same "name" as the orig _train/_valid (asking the parameter's
        // Key, not the actual frame)
        // Note: don't put into DKV or they would overwrite the _train/_valid frames!
        Frame tra_fr = new Frame(mp._train, _train.names(), _train.vecs());
        Frame val_fr = _valid != null ? new Frame(mp._valid, _valid.names(), _valid.vecs()) : null;

        train = tra_fr;
        if (model._output.isClassifier() && mp._balance_classes) {
          _job.update(0, "Balancing class distribution of training data...");
          float[] trainSamplingFactors =
              new float
                  [train
                      .lastVec()
                      .domain()
                      .length]; // leave initialized to 0 -> will be filled up below
          if (mp._class_sampling_factors != null) {
            if (mp._class_sampling_factors.length != train.lastVec().domain().length)
              throw new IllegalArgumentException(
                  "class_sampling_factors must have "
                      + train.lastVec().domain().length
                      + " elements");
            trainSamplingFactors =
                mp._class_sampling_factors.clone(); // clone: don't modify the original
          }
          train =
              sampleFrameStratified(
                  train,
                  train.lastVec(),
                  train.vec(model._output.weightsName()),
                  trainSamplingFactors,
                  (long) (mp._max_after_balance_size * train.numRows()),
                  mp._seed,
                  true,
                  false);
          Vec l = train.lastVec();
          Vec w = train.vec(model._output.weightsName());
          MRUtils.ClassDist cd = new MRUtils.ClassDist(l);
          model._output._modelClassDist =
              _weights != null ? cd.doAll(l, w).rel_dist() : cd.doAll(l).rel_dist();
        }
        model.training_rows = train.numRows();
        if (_weights != null && _weights.min() == 0 && _weights.max() == 1 && _weights.isInt()) {
          model.training_rows = Math.round(train.numRows() * _weights.mean());
          Log.warn(
              "Not counting "
                  + (train.numRows() - model.training_rows)
                  + " rows with weight=0 towards an epoch.");
        }
        Log.info("One epoch corresponds to " + model.training_rows + " training data rows.");
        trainScoreFrame =
            sampleFrame(
                train,
                mp._score_training_samples,
                mp._seed); // training scoring dataset is always sampled uniformly from the training
                           // dataset
        if (trainScoreFrame != train) Scope.track(trainScoreFrame);

        if (!_parms._quiet_mode)
          Log.info("Number of chunks of the training data: " + train.anyVec().nChunks());
        if (val_fr != null) {
          model.validation_rows = val_fr.numRows();
          // validation scoring dataset can be sampled in multiple ways from the given validation
          // dataset
          if (model._output.isClassifier()
              && mp._balance_classes
              && mp._score_validation_sampling
                  == DeepLearningParameters.ClassSamplingMethod.Stratified) {
            _job.update(0, "Sampling validation data (stratified)...");
            validScoreFrame =
                sampleFrameStratified(
                    val_fr,
                    val_fr.lastVec(),
                    val_fr.vec(model._output.weightsName()),
                    null,
                    mp._score_validation_samples > 0
                        ? mp._score_validation_samples
                        : val_fr.numRows(),
                    mp._seed + 1,
                    false /* no oversampling */,
                    false);
          } else {
            _job.update(0, "Sampling validation data...");
            validScoreFrame = sampleFrame(val_fr, mp._score_validation_samples, mp._seed + 1);
            if (validScoreFrame != val_fr) Scope.track(validScoreFrame);
          }
          if (!_parms._quiet_mode)
            Log.info(
                "Number of chunks of the validation data: " + validScoreFrame.anyVec().nChunks());
        }

        // Set train_samples_per_iteration size (cannot be done earlier since this depends on
        // whether stratified sampling is done)
        model.actual_train_samples_per_iteration =
            computeTrainSamplesPerIteration(mp, model.training_rows, model);
        // Determine whether shuffling is enforced
        if (mp._replicate_training_data
            && (model.actual_train_samples_per_iteration
                == model.training_rows * (mp._single_node_mode ? 1 : H2O.CLOUD.size()))
            && !mp._shuffle_training_data
            && H2O.CLOUD.size() > 1
            && !mp._reproducible) {
          if (!mp._quiet_mode)
            Log.info(
                "Enabling training data shuffling, because all nodes train on the full dataset (replicated training data).");
          mp._shuffle_training_data = true;
        }
        if (!mp._shuffle_training_data
            && model.actual_train_samples_per_iteration == model.training_rows
            && train.anyVec().nChunks() == 1) {
          if (!mp._quiet_mode)
            Log.info(
                "Enabling training data shuffling to avoid training rows in the same order over and over (no Hogwild since there's only 1 chunk).");
          mp._shuffle_training_data = true;
        }

        //        if (!mp._quiet_mode) Log.info("Initial model:\n" + model.model_info());
        long now = System.currentTimeMillis();
        model._timeLastIterationEnter = now;
        if (_parms._autoencoder) {
          _job.update(0, "Scoring null model of autoencoder...");
          if (!mp._quiet_mode) Log.info("Scoring the null model of the autoencoder.");
          model.doScoring(
              trainScoreFrame,
              validScoreFrame,
              _job._key,
              0,
              false); // get the null model reconstruction error
        }
        // put the initial version of the model into DKV
        model.update(_job);
        model.total_setup_time_ms += now - _job.start_time();
        Log.info("Total setup time: " + PrettyPrint.msecs(model.total_setup_time_ms, true));
        Log.info("Starting to train the Deep Learning model.");
        _job.update(0, "Training...");

        // main loop
        for (; ; ) {
          model.iterations++;
          model.set_model_info(
              mp._epochs == 0
                  ? model.model_info()
                  : H2O.CLOUD.size() > 1 && mp._replicate_training_data
                      ? (mp._single_node_mode
                          ? new DeepLearningTask2(
                                  _job._key,
                                  train,
                                  model.model_info(),
                                  rowFraction(train, mp, model),
                                  model.iterations)
                              .doAll(Key.make(H2O.SELF))
                              .model_info()
                          : // replicated data + single node mode
                          new DeepLearningTask2(
                                  _job._key,
                                  train,
                                  model.model_info(),
                                  rowFraction(train, mp, model),
                                  model.iterations)
                              .doAllNodes()
                              .model_info())
                      : // replicated data + multi-node mode
                      new DeepLearningTask(
                              _job._key,
                              model.model_info(),
                              rowFraction(train, mp, model),
                              model.iterations)
                          .doAll(train)
                          .model_info()); // distributed data (always in multi-node mode)
          if (stop_requested() && !timeout()) break; // cancellation
          if (!model.doScoring(
              trainScoreFrame, validScoreFrame, _job._key, model.iterations, false))
            break; // finished training (or early stopping or convergence)
          if (timeout()) break; // stop after scoring
        }

        // replace the model with the best model so far (if it's better)
        if (!stop_requested()
            && _parms._overwrite_with_best_model
            && model.actual_best_model_key != null
            && _parms._nfolds == 0) {
          DeepLearningModel best_model = DKV.getGet(model.actual_best_model_key);
          if (best_model != null
              && best_model.loss() < model.loss()
              && Arrays.equals(best_model.model_info().units, model.model_info().units)) {
            if (!_parms._quiet_mode)
              Log.info("Setting the model to be the best model so far (based on scoring history).");
            DeepLearningModelInfo mi = best_model.model_info().deep_clone();
            // Don't cheat - count full amount of training samples, since that's the amount of
            // training it took to train (without finding anything better)
            mi.set_processed_global(model.model_info().get_processed_global());
            mi.set_processed_local(model.model_info().get_processed_local());
            model.set_model_info(mi);
            model.update(_job);
            model.doScoring(trainScoreFrame, validScoreFrame, _job._key, model.iterations, true);
            assert (best_model.loss() == model.loss());
          }
        }
        // store coefficient names for future use
        // possibly change
        model.model_info().data_info().coefNames();
        if (!_parms._quiet_mode) {
          Log.info(
              "==============================================================================================================================================================================");
          if (stop_requested()) {
            Log.info("Deep Learning model training was interrupted.");
          } else {
            Log.info("Finished training the Deep Learning model.");
            Log.info(model);
          }
          Log.info(
              "==============================================================================================================================================================================");
        }
      } finally {
        if (model != null) {
          model.deleteElasticAverageModels();
          model.unlock(_job);
          if (model.actual_best_model_key != null) {
            assert (model.actual_best_model_key != model._key);
            DKV.remove(model.actual_best_model_key);
          }
        }
      }
      return model;
    }
Пример #7
0
  protected double doScoringAndSaveModel(
      boolean finalScoring, boolean oob, boolean build_tree_one_node) {
    double training_r2 = Double.NaN; // Training R^2 value, if computed
    long now = System.currentTimeMillis();
    if (_firstScore == 0) _firstScore = now;
    long sinceLastScore = now - _timeLastScoreStart;
    boolean updated = false;
    new ProgressUpdate(
            "Built " + _model._output._ntrees + " trees so far (out of " + _parms._ntrees + ").")
        .fork(_progressKey);
    // Now model already contains tid-trees in serialized form
    if (_parms._score_each_iteration
        || finalScoring
        || (now - _firstScore < 4000)
        || // Score every time for 4 secs
        // Throttle scoring to keep the cost sane; limit to a 10% duty cycle & every 4 secs
        (sinceLastScore > 4000
            && // Limit scoring updates to every 4sec
            (double) (_timeLastScoreEnd - _timeLastScoreStart) / sinceLastScore
                < 0.1)) { // 10% duty cycle

      checkMemoryFootPrint();

      // If validation is specified we use a model for scoring, so we need to
      // update it!  First we save model with trees (i.e., make them available
      // for scoring) and then update it with resulting error
      _model.update(_key);
      updated = true;

      Log.info("============================================================== ");
      SharedTreeModel.SharedTreeOutput out = _model._output;
      _timeLastScoreStart = now;
      // Score on training data
      new ProgressUpdate("Scoring the model.").fork(_progressKey);
      Score sc =
          new Score(this, true, oob, _model._output.getModelCategory())
              .doAll(train(), build_tree_one_node);
      ModelMetrics mm = sc.makeModelMetrics(_model, _parms.train());
      out._training_metrics = mm;
      if (oob)
        out._training_metrics._description = "Metrics reported on Out-Of-Bag training samples";
      out._scored_train[out._ntrees].fillFrom(mm);
      if (out._ntrees > 0) Log.info("Training " + out._scored_train[out._ntrees].toString());

      // Score again on validation data
      if (_parms._valid != null) {
        Score scv =
            new Score(this, false, false, _model._output.getModelCategory())
                .doAll(valid(), build_tree_one_node);
        ModelMetrics mmv = scv.makeModelMetrics(_model, _parms.valid());
        out._validation_metrics = mmv;
        out._scored_valid[out._ntrees].fillFrom(mmv);
        if (out._ntrees > 0) Log.info("Validation " + out._scored_valid[out._ntrees].toString());
      }

      if (out._ntrees > 0) { // Compute variable importances
        out._model_summary = createModelSummaryTable(out);
        out._scoring_history = createScoringHistoryTable(out);
        out._varimp = new hex.VarImp(_improvPerVar, out._names);
        out._variable_importances = hex.ModelMetrics.calcVarImp(out._varimp);
        Log.info(out._model_summary.toString());
        // For Debugging:
        //        Log.info(out._scoring_history.toString());
        //        Log.info(out._variable_importances.toString());
      }

      ConfusionMatrix cm = mm.cm();
      if (cm != null) {
        if (cm._cm.length <= _parms._max_confusion_matrix_size) {
          Log.info(cm.toASCII());
        } else {
          Log.info(
              "Confusion Matrix is too large (max_confusion_matrix_size="
                  + _parms._max_confusion_matrix_size
                  + "): "
                  + _nclass
                  + " classes.");
        }
      }
      _timeLastScoreEnd = System.currentTimeMillis();
    }

    // Double update - after either scoring or variable importance
    if (updated) _model.update(_key);
    return training_r2;
  }
Пример #8
0
    public static void userMain(String[] args) {
      H2O.main(args);
      TestUtil.stall_till_cloudsize(NODES);

      List<Class> tests = new ArrayList<Class>();

      // Classes to test:
      // tests = JUnitRunner.all();

      // Neural Net - deprecated
      //      tests.add(NeuralNetSpiralsTest.class); //compare NeuralNet vs reference
      //      tests.add(NeuralNetIrisTest.class); //compare NeuralNet vs reference

      // Chunk tests
      //      tests.add(C0LChunkTest.class);
      //      tests.add(C0DChunkTest.class);
      //      tests.add(C1ChunkTest.class);
      //      tests.add(C1NChunkTest.class);
      //      tests.add(C1SChunkTest.class);
      //      tests.add(C2ChunkTest.class);
      //      tests.add(C2SChunkTest.class);
      //      tests.add(C4ChunkTest.class);
      //      tests.add(C4FChunkTest.class);
      //      tests.add(C4SChunkTest.class);
      //      tests.add(C8ChunkTest.class);
      //      tests.add(C8DChunkTest.class);
      //      tests.add(C16ChunkTest.class);
      //      tests.add(CBSChunkTest.class);
      //      tests.add(CX0ChunkTest.class);
      //      tests.add(CXIChunkTest.class);
      //      tests.add(CXDChunkTest.class);
      //      tests.add(VecTest.class);

      // Deep Learning tests
      //      tests.add(DeepLearningVsNeuralNet.class); //only passes for NODES=1, not clear why
      //      tests.add(DeepLearningAutoEncoderTest.class); //test Deep Learning convergence
      //      tests.add(DeepLearningAutoEncoderCategoricalTest.class); //test Deep Learning
      // convergence
      //      tests.add(DeepLearningSpiralsTest.class); //test Deep Learning convergence
      //      tests.add(DeepLearningIrisTest.Short.class); //compare Deep Learning vs reference
      //      tests.add(DeepLearningIrisTest.Long.class); //compare Deep Learning vs reference
      tests.add(DeepLearningProstateTest.Short.class); // test Deep Learning
      //      tests.add(DeepLearningMissingTest.class); //test Deep Learning
      //      tests.add(DeepLearningProstateTest.Long.class); //test Deep Learning
      //      tests.add(NeuronsTest.class); //test Deep Learning
      //      tests.add(MRUtilsTest.class); //test MR sampling/rebalancing
      //      tests.add(DropoutTest.class); //test NN Dropput

      //      tests.add(ParserTest2.class);
      //      tests.add(ParserTest2.ParseAllSmalldata.class);
      //      tests.add(KMeans2Test.class);
      //      tests.add(KMeans2RandomTest.class);
      //      tests.add(GLMRandomTest.Short.class);
      //      tests.add(SpeeDRFTest.class);
      //      tests.add(SpeeDRFTest2.class);
      ////      tests.add(GLMTest2.class);
      //      tests.add(DRFTest.class);
      //      tests.add(DRFTest2.class);
      //      tests.add(GBMTest.class);
      //      tests.add(KMeans2Test.class);
      //      tests.add(PCATest.class);
      //      tests.add(NetworkTestTest.class);

      // Uncomment this to sleep here and use the browser.
      // try { Thread.sleep(10000000); } catch (Exception _) {}

      JUnitCore junit = new JUnitCore();
      junit.addListener(new LogListener());
      Result result = junit.run(tests.toArray(new Class[0]));
      if (result.getFailures().size() == 0) {
        Log.info("SUCCESS!");
        System.exit(0);
      } else {
        Log.info("FAIL!");
        System.exit(1);
      }
    }
Пример #9
0
  public static class GLRMParameters extends Model.Parameters {
    public String algoName() {
      return "GLRM";
    }

    public String fullName() {
      return "Generalized Low Rank Modeling";
    }

    public String javaName() {
      return GLRMModel.class.getName();
    }

    public DataInfo.TransformType _transform =
        DataInfo.TransformType.NONE; // Data transformation (demean to compare with PCA)
    public int _k = 1; // Rank of resulting XY matrix
    public GLRM.Initialization _init = GLRM.Initialization.PlusPlus; // Initialization of Y matrix
    public SVDParameters.Method _svd_method =
        SVDParameters.Method.Randomized; // SVD initialization method (for _init = SVD)
    public Key<Frame> _user_y; // User-specified Y matrix (for _init = User)
    public Key<Frame> _user_x; // User-specified X matrix (for _init = User)
    public boolean _expand_user_y =
        true; // Should categorical columns in _user_y be expanded via one-hot encoding? (for _init
              // = User)

    // Loss functions
    public Loss _loss = Loss.Quadratic; // Default loss function for numeric cols
    public Loss _multi_loss = Loss.Categorical; // Default loss function for categorical cols
    public int _period = 1; // Length of the period when _loss = Periodic
    public Loss[] _loss_by_col; // Override default loss function for specific columns
    public int[] _loss_by_col_idx;

    // Regularization functions
    public Regularizer _regularization_x = Regularizer.None; // Regularization function for X matrix
    public Regularizer _regularization_y = Regularizer.None; // Regularization function for Y matrix
    public double _gamma_x = 0; // Regularization weight on X matrix
    public double _gamma_y = 0; // Regularization weight on Y matrix

    // Optional parameters
    public int _max_iterations = 1000; // Max iterations
    public int _max_updates = 2 * _max_iterations; // Max number of updates (X or Y)
    public double _init_step_size = 1.0; // Initial step size (decrease until we hit min_step_size)
    public double _min_step_size = 1e-4; // Min step size
    public long _seed = System.nanoTime(); // RNG seed

    @Override
    protected long nFoldSeed() {
      return _seed;
    }

    // public Key<Frame> _representation_key;     // Key to save X matrix
    public String _representation_name;
    public boolean _recover_svd =
        false; // Recover singular values and eigenvectors of XY at the end?
    public boolean _impute_original =
        false; // Reconstruct original training data by reversing _transform?
    public boolean _verbose = true; // Log when objective increases each iteration?

    // Quadratic -> Gaussian distribution ~ exp(-(a-u)^2)
    // Absolute -> Laplace distribution ~ exp(-|a-u|)
    public enum Loss {
      Quadratic(true),
      Absolute(true),
      Huber(true),
      Poisson(true),
      Periodic(true), // One-dimensional loss (numeric)
      Logistic(true, true),
      Hinge(true, true), // Boolean loss (categorical)
      Categorical(false),
      Ordinal(false); // Multi-dimensional loss (categorical)

      private boolean forNumeric;
      private boolean forBinary;

      Loss(boolean forNumeric) {
        this(forNumeric, false);
      }

      Loss(boolean forNumeric, boolean forBinary) {
        this.forNumeric = forNumeric;
        this.forBinary = forBinary;
      }

      public boolean isForNumeric() {
        return forNumeric;
      }

      public boolean isForCategorical() {
        return !forNumeric;
      }

      public boolean isForBinary() {
        return forBinary;
      }
    }

    // Non-negative matrix factorization (NNMF): r_x = r_y = NonNegative
    // Orthogonal NNMF: r_x = OneSparse, r_y = NonNegative
    // K-means clustering: r_x = UnitOneSparse, r_y = 0 (\gamma_y = 0)
    // Quadratic mixture: r_x = Simplex, r_y = 0 (\gamma_y = 0)
    public enum Regularizer {
      None,
      Quadratic,
      L2,
      L1,
      NonNegative,
      OneSparse,
      UnitOneSparse,
      Simplex
    }

    // Check if all elements of _loss_by_col are equal to a specific loss function
    private final boolean allLossEquals(Loss loss) {
      if (null == _loss_by_col) return false;

      boolean res = true;
      for (int i = 0; i < _loss_by_col.length; i++) {
        if (_loss_by_col[i] != loss) {
          res = false;
          break;
        }
      }
      return res;
    }

    // Closed form solution only if quadratic loss, no regularization or quadratic regularization
    // (same for X and Y), and no missing values
    public final boolean hasClosedForm() {
      long na_cnt = 0;
      Frame train = _train.get();
      for (int i = 0; i < train.numCols(); i++) na_cnt += train.vec(i).naCnt();
      return hasClosedForm(na_cnt);
    }

    public final boolean hasClosedForm(long na_cnt) {
      boolean loss_quad =
          (null == _loss_by_col && _loss == Quadratic)
              || (null != _loss_by_col
                  && allLossEquals(Quadratic)
                  && (_loss_by_col.length == _train.get().numCols() || _loss == Quadratic));

      return na_cnt == 0
          && ((loss_quad
              && (_gamma_x == 0
                  || _regularization_x == Regularizer.None
                  || _regularization_x == GLRMParameters.Regularizer.Quadratic)
              && (_gamma_y == 0
                  || _regularization_y == Regularizer.None
                  || _regularization_y == GLRMParameters.Regularizer.Quadratic)));
    }

    // L(u,a): Loss function
    public final double loss(double u, double a) {
      return loss(u, a, _loss);
    }

    public final double loss(double u, double a, Loss loss) {
      assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics";
      switch (loss) {
        case Quadratic:
          return (u - a) * (u - a);
        case Absolute:
          return Math.abs(u - a);
        case Huber:
          return Math.abs(u - a) <= 1 ? 0.5 * (u - a) * (u - a) : Math.abs(u - a) - 0.5;
        case Poisson:
          assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0";
          return Math.exp(u)
              + (a == 0 ? 0 : -a * u + a * Math.log(a) - a); // Since \lim_{a->0} a*log(a) = 0
        case Hinge:
          // return Math.max(1-a*u,0);
          return Math.max(1 - (a == 0 ? -u : u), 0); // Booleans are coded {0,1} instead of {-1,1}
        case Logistic:
          // return Math.log(1 + Math.exp(-a * u));
          return Math.log(
              1 + Math.exp(a == 0 ? u : -u)); // Booleans are coded {0,1} instead of {-1,1}
        case Periodic:
          return 1 - Math.cos((a - u) * (2 * Math.PI) / _period);
        default:
          throw new RuntimeException("Unknown loss function " + loss);
      }
    }

    // \grad_u L(u,a): Gradient of loss function with respect to u
    public final double lgrad(double u, double a) {
      return lgrad(u, a, _loss);
    }

    public final double lgrad(double u, double a, Loss loss) {
      assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics";
      switch (loss) {
        case Quadratic:
          return 2 * (u - a);
        case Absolute:
          return Math.signum(u - a);
        case Huber:
          return Math.abs(u - a) <= 1 ? u - a : Math.signum(u - a);
        case Poisson:
          assert a >= 0 : "Poisson loss L(u,a) requires variable a >= 0";
          return Math.exp(u) - a;
        case Hinge:
          // return a*u <= 1 ? -a : 0;
          return a == 0
              ? (-u <= 1 ? 1 : 0)
              : (u <= 1 ? -1 : 0); // Booleans are coded as {0,1} instead of {-1,1}
        case Logistic:
          // return -a/(1+Math.exp(a*u));
          return a == 0
              ? 1 / (1 + Math.exp(-u))
              : -1 / (1 + Math.exp(u)); // Booleans are coded as {0,1} instead of {-1,1}
        case Periodic:
          return ((2 * Math.PI) / _period) * Math.sin((a - u) * (2 * Math.PI) / _period);
        default:
          throw new RuntimeException("Unknown loss function " + loss);
      }
    }

    // L(u,a): Multidimensional loss function
    public final double mloss(double[] u, int a) {
      return mloss(u, a, _multi_loss);
    }

    public static double mloss(double[] u, int a, Loss multi_loss) {
      assert multi_loss.isForCategorical()
          : "Loss function " + multi_loss + " not applicable to categoricals";
      if (a < 0 || a > u.length - 1)
        throw new IllegalArgumentException(
            "Index must be between 0 and " + String.valueOf(u.length - 1));

      double sum = 0;
      switch (multi_loss) {
        case Categorical:
          for (int i = 0; i < u.length; i++) sum += Math.max(1 + u[i], 0);
          sum += Math.max(1 - u[a], 0) - Math.max(1 + u[a], 0);
          return sum;
        case Ordinal:
          for (int i = 0; i < u.length - 1; i++) sum += Math.max(a > i ? 1 - u[i] : 1, 0);
          return sum;
        default:
          throw new RuntimeException("Unknown multidimensional loss function " + multi_loss);
      }
    }

    // \grad_u L(u,a): Gradient of multidimensional loss function with respect to u
    public final double[] mlgrad(double[] u, int a) {
      return mlgrad(u, a, _multi_loss);
    }

    public static double[] mlgrad(double[] u, int a, Loss multi_loss) {
      assert multi_loss.isForCategorical()
          : "Loss function " + multi_loss + " not applicable to categoricals";
      if (a < 0 || a > u.length - 1)
        throw new IllegalArgumentException(
            "Index must be between 0 and " + String.valueOf(u.length - 1));

      double[] grad = new double[u.length];
      switch (multi_loss) {
        case Categorical:
          for (int i = 0; i < u.length; i++) grad[i] = (1 + u[i] > 0) ? 1 : 0;
          grad[a] = (1 - u[a] > 0) ? -1 : 0;
          return grad;
        case Ordinal:
          for (int i = 0; i < u.length - 1; i++) grad[i] = (a > i && 1 - u[i] > 0) ? -1 : 0;
          return grad;
        default:
          throw new RuntimeException("Unknown multidimensional loss function " + multi_loss);
      }
    }

    // r_i(x_i), r_j(y_j): Regularization function for single row x_i or column y_j
    public final double regularize_x(double[] u) {
      return regularize(u, _regularization_x);
    }

    public final double regularize_y(double[] u) {
      return regularize(u, _regularization_y);
    }

    public final double regularize(double[] u, Regularizer regularization) {
      if (u == null) return 0;
      double ureg = 0;

      switch (regularization) {
        case None:
          return 0;
        case Quadratic:
          for (int i = 0; i < u.length; i++) ureg += u[i] * u[i];
          return ureg;
        case L2:
          for (int i = 0; i < u.length; i++) ureg += u[i] * u[i];
          return Math.sqrt(ureg);
        case L1:
          for (int i = 0; i < u.length; i++) ureg += Math.abs(u[i]);
          return ureg;
        case NonNegative:
          for (int i = 0; i < u.length; i++) {
            if (u[i] < 0) return Double.POSITIVE_INFINITY;
          }
          return 0;
        case OneSparse:
          int card = 0;
          for (int i = 0; i < u.length; i++) {
            if (u[i] < 0) return Double.POSITIVE_INFINITY;
            else if (u[i] > 0) card++;
          }
          return card == 1 ? 0 : Double.POSITIVE_INFINITY;
        case UnitOneSparse:
          int ones = 0, zeros = 0;
          for (int i = 0; i < u.length; i++) {
            if (u[i] == 1) ones++;
            else if (u[i] == 0) zeros++;
            else return Double.POSITIVE_INFINITY;
          }
          return ones == 1 && zeros == u.length - 1 ? 0 : Double.POSITIVE_INFINITY;
        case Simplex:
          double sum = 0, absum = 0;
          for (int i = 0; i < u.length; i++) {
            if (u[i] < 0) return Double.POSITIVE_INFINITY;
            else {
              sum += u[i];
              absum += Math.abs(u[i]);
            }
          }
          return MathUtils.equalsWithinRecSumErr(sum, 1.0, u.length, absum)
              ? 0
              : Double.POSITIVE_INFINITY;
        default:
          throw new RuntimeException("Unknown regularization function " + regularization);
      }
    }

    // \sum_i r_i(x_i): Sum of regularization function for all entries of X
    public final double regularize_x(double[][] u) {
      return regularize(u, _regularization_x);
    }

    public final double regularize_y(double[][] u) {
      return regularize(u, _regularization_y);
    }

    public final double regularize(double[][] u, Regularizer regularization) {
      if (u == null || regularization == Regularizer.None) return 0;

      double ureg = 0;
      for (int i = 0; i < u.length; i++) {
        ureg += regularize(u[i], regularization);
        if (Double.isInfinite(ureg)) return ureg;
      }
      return ureg;
    }

    // \prox_{\alpha_k*r}(u): Proximal gradient of (step size) * (regularization function) evaluated
    // at vector u
    public final double[] rproxgrad_x(double[] u, double alpha, Random rand) {
      return rproxgrad(u, alpha, _gamma_x, _regularization_x, rand);
    }

    public final double[] rproxgrad_y(double[] u, double alpha, Random rand) {
      return rproxgrad(u, alpha, _gamma_y, _regularization_y, rand);
    }
    // public final double[] rproxgrad_x(double[] u, double alpha) { return rproxgrad(u, alpha,
    // _gamma_x, _regularization_x, RandomUtils.getRNG(_seed)); }
    // public final double[] rproxgrad_y(double[] u, double alpha) { return rproxgrad(u, alpha,
    // _gamma_y, _regularization_y, RandomUtils.getRNG(_seed)); }
    static double[] rproxgrad(
        double[] u, double alpha, double gamma, Regularizer regularization, Random rand) {
      if (u == null || alpha == 0 || gamma == 0) return u;
      double[] v = new double[u.length];

      switch (regularization) {
        case None:
          return u;
        case Quadratic:
          for (int i = 0; i < u.length; i++) v[i] = u[i] / (1 + 2 * alpha * gamma);
          return v;
        case L2:
          // Proof uses Moreau decomposition; see section 6.5.1 of Parikh and Boyd
          // https://web.stanford.edu/~boyd/papers/pdf/prox_algs.pdf
          double weight = 1 - alpha * gamma / ArrayUtils.l2norm(u);
          if (weight < 0) return v; // Zero vector
          for (int i = 0; i < u.length; i++) v[i] = weight * u[i];
          return v;
        case L1:
          for (int i = 0; i < u.length; i++)
            v[i] = Math.max(u[i] - alpha * gamma, 0) + Math.min(u[i] + alpha * gamma, 0);
          return v;
        case NonNegative:
          for (int i = 0; i < u.length; i++) v[i] = Math.max(u[i], 0);
          return v;
        case OneSparse:
          int idx = ArrayUtils.maxIndex(u, rand);
          v[idx] = u[idx] > 0 ? u[idx] : 1e-6;
          return v;
        case UnitOneSparse:
          idx = ArrayUtils.maxIndex(u, rand);
          v[idx] = 1;
          return v;
        case Simplex:
          // Proximal gradient algorithm by Chen and Ye in http://arxiv.org/pdf/1101.6081v2.pdf
          // 1) Sort input vector u in ascending order: u[1] <= ... <= u[n]
          int n = u.length;
          int[] idxs = new int[n];
          for (int i = 0; i < n; i++) idxs[i] = i;
          ArrayUtils.sort(idxs, u);

          // 2) Calculate cumulative sum of u in descending order
          // cumsum(u) = (..., u[n-2]+u[n-1]+u[n], u[n-1]+u[n], u[n])
          double[] ucsum = new double[n];
          ucsum[n - 1] = u[idxs[n - 1]];
          for (int i = n - 2; i >= 0; i--) ucsum[i] = ucsum[i + 1] + u[idxs[i]];

          // 3) Let t_i = (\sum_{j=i+1}^n u[j] - 1)/(n - i)
          // For i = n-1,...,1, set optimal t* to first t_i >= u[i]
          double t = (ucsum[0] - 1) / n; // Default t* = (\sum_{j=1}^n u[j] - 1)/n
          for (int i = n - 1; i >= 1; i--) {
            double tmp = (ucsum[i] - 1) / (n - i);
            if (tmp >= u[idxs[i - 1]]) {
              t = tmp;
              break;
            }
          }

          // 4) Return max(u - t*, 0) as projection of u onto simplex
          double[] x = new double[u.length];
          for (int i = 0; i < u.length; i++) x[i] = Math.max(u[i] - t, 0);
          return x;
        default:
          throw new RuntimeException("Unknown regularization function " + regularization);
      }
    }

    // Project X,Y matrices into appropriate subspace so regularizer is finite. Used during
    // initialization.
    public final double[] project_x(double[] u, Random rand) {
      return project(u, _regularization_x, rand);
    }

    public final double[] project_y(double[] u, Random rand) {
      return project(u, _regularization_y, rand);
    }

    public final double[] project(double[] u, Regularizer regularization, Random rand) {
      if (u == null) return u;

      switch (regularization) {
          // Domain is all real numbers
        case None:
        case Quadratic:
        case L2:
        case L1:
          return u;
          // Proximal operator of indicator function for a set C is (Euclidean) projection onto C
        case NonNegative:
        case OneSparse:
        case UnitOneSparse:
          return rproxgrad(u, 1, 1, regularization, rand);
        case Simplex:
          double reg =
              regularize(
                  u,
                  regularization); // Check if inside simplex before projecting since algo is
                                   // complicated
          if (reg == 0) return u;
          return rproxgrad(u, 1, 1, regularization, rand);
        default:
          throw new RuntimeException("Unknown regularization function " + regularization);
      }
    }

    // \hat A_{i,j} = \argmin_a L_{i,j}(x_iy_j, a): Data imputation for real numeric values
    public final double impute(double u) {
      return impute(u, _loss);
    }

    public static double impute(double u, Loss loss) {
      assert loss.isForNumeric() : "Loss function " + loss + " not applicable to numerics";
      switch (loss) {
        case Quadratic:
        case Absolute:
        case Huber:
        case Periodic:
          return u;
        case Poisson:
          return Math.exp(u) - 1;
        case Hinge:
        case Logistic:
          return u > 0 ? 1 : 0; // Booleans are coded as {0,1} instead of {-1,1}
        default:
          throw new RuntimeException("Unknown loss function " + loss);
      }
    }

    // \hat A_{i,j} = \argmin_a L_{i,j}(x_iy_j, a): Data imputation for categorical values
    // {0,1,2,...}
    // TODO: Is there a faster way to find the loss minimizer?
    public final int mimpute(double[] u) {
      return mimpute(u, _multi_loss);
    }

    public static int mimpute(double[] u, Loss multi_loss) {
      assert multi_loss.isForCategorical()
          : "Loss function " + multi_loss + " not applicable to categoricals";
      switch (multi_loss) {
        case Categorical:
        case Ordinal:
          double[] cand = new double[u.length];
          for (int a = 0; a < cand.length; a++) cand[a] = mloss(u, a, multi_loss);
          return ArrayUtils.minIndex(cand);
        default:
          throw new RuntimeException("Unknown multidimensional loss function " + multi_loss);
      }
    }
  }
Пример #10
0
    @Override
    protected void compute2() {
      CoxPHModel model = null;
      try {
        Scope.enter();
        _parms.read_lock_frames(CoxPH.this);
        init(true);

        applyScoringFrameSideEffects();

        // The model to be built
        model = new CoxPHModel(dest(), _parms, new CoxPHModel.CoxPHOutput(CoxPH.this));
        model.delete_and_lock(_key);

        applyTrainingFrameSideEffects();

        int nResponses = 1;
        boolean useAllFactorLevels = false;
        final DataInfo dinfo =
            new DataInfo(
                Key.make(),
                _modelBuilderTrain,
                null,
                nResponses,
                useAllFactorLevels,
                DataInfo.TransformType.DEMEAN,
                TransformType.NONE,
                true,
                false,
                false,
                false,
                false,
                false);
        initStats(model, dinfo);

        final int n_offsets =
            (model._parms.offset_columns == null) ? 0 : model._parms.offset_columns.length;
        final int n_coef = dinfo.fullN() - n_offsets;
        final double[] step = MemoryManager.malloc8d(n_coef);
        final double[] oldCoef = MemoryManager.malloc8d(n_coef);
        final double[] newCoef = MemoryManager.malloc8d(n_coef);
        Arrays.fill(step, Double.NaN);
        Arrays.fill(oldCoef, Double.NaN);
        for (int j = 0; j < n_coef; ++j) newCoef[j] = model._parms.init;
        double oldLoglik = -Double.MAX_VALUE;
        final int n_time = (int) (model._output.max_time - model._output.min_time + 1);
        final boolean has_start_column = (model._parms.start_column != null);
        final boolean has_weights_column = (model._parms.weights_column != null);
        for (int i = 0; i <= model._parms.iter_max; ++i) {
          model._output.iter = i;

          final CoxPHTask coxMR =
              new CoxPHTask(
                      self(),
                      dinfo,
                      newCoef,
                      model._output.min_time,
                      n_time,
                      n_offsets,
                      has_start_column,
                      has_weights_column)
                  .doAll(dinfo._adaptedFrame);

          final double newLoglik = calcLoglik(model, coxMR);
          if (newLoglik > oldLoglik) {
            if (i == 0) calcCounts(model, coxMR);

            calcModelStats(model, newCoef, newLoglik);
            calcCumhaz_0(model, coxMR);

            if (newLoglik == 0) model._output.lre = -Math.log10(Math.abs(oldLoglik - newLoglik));
            else model._output.lre = -Math.log10(Math.abs((oldLoglik - newLoglik) / newLoglik));
            if (model._output.lre >= model._parms.lre_min) break;

            Arrays.fill(step, 0);
            for (int j = 0; j < n_coef; ++j)
              for (int k = 0; k < n_coef; ++k)
                step[j] -= model._output.var_coef[j][k] * model._output.gradient[k];
            for (int j = 0; j < n_coef; ++j)
              if (Double.isNaN(step[j]) || Double.isInfinite(step[j])) break;

            oldLoglik = newLoglik;
            System.arraycopy(newCoef, 0, oldCoef, 0, oldCoef.length);
          } else {
            for (int j = 0; j < n_coef; ++j) step[j] /= 2;
          }

          for (int j = 0; j < n_coef; ++j) newCoef[j] = oldCoef[j] - step[j];
        }

        model.update(_key);
      } catch (Throwable t) {
        Job thisJob = DKV.getGet(_key);
        if (thisJob._state == JobState.CANCELLED) {
          Log.info("Job cancelled by user.");
        } else {
          t.printStackTrace();
          failed(t);
          throw t;
        }
      } finally {
        updateModelOutput();
        _parms.read_unlock_frames(CoxPH.this);
        Scope.exit();
        done(); // Job done!
      }
      tryComplete();
    }