예제 #1
0
  protected double doScoringAndSaveModel(
      boolean finalScoring, boolean oob, boolean build_tree_one_node) {
    double training_r2 = Double.NaN; // Training R^2 value, if computed
    long now = System.currentTimeMillis();
    if (_firstScore == 0) _firstScore = now;
    long sinceLastScore = now - _timeLastScoreStart;
    boolean updated = false;
    new ProgressUpdate(
            "Built " + _model._output._ntrees + " trees so far (out of " + _parms._ntrees + ").")
        .fork(_progressKey);
    // Now model already contains tid-trees in serialized form
    if (_parms._score_each_iteration
        || finalScoring
        || (now - _firstScore < 4000)
        || // Score every time for 4 secs
        // Throttle scoring to keep the cost sane; limit to a 10% duty cycle & every 4 secs
        (sinceLastScore > 4000
            && // Limit scoring updates to every 4sec
            (double) (_timeLastScoreEnd - _timeLastScoreStart) / sinceLastScore
                < 0.1)) { // 10% duty cycle

      checkMemoryFootPrint();

      // If validation is specified we use a model for scoring, so we need to
      // update it!  First we save model with trees (i.e., make them available
      // for scoring) and then update it with resulting error
      _model.update(_key);
      updated = true;

      Log.info("============================================================== ");
      SharedTreeModel.SharedTreeOutput out = _model._output;
      _timeLastScoreStart = now;
      // Score on training data
      new ProgressUpdate("Scoring the model.").fork(_progressKey);
      Score sc =
          new Score(this, true, oob, _model._output.getModelCategory())
              .doAll(train(), build_tree_one_node);
      ModelMetrics mm = sc.makeModelMetrics(_model, _parms.train());
      out._training_metrics = mm;
      if (oob)
        out._training_metrics._description = "Metrics reported on Out-Of-Bag training samples";
      out._scored_train[out._ntrees].fillFrom(mm);
      if (out._ntrees > 0) Log.info("Training " + out._scored_train[out._ntrees].toString());

      // Score again on validation data
      if (_parms._valid != null) {
        Score scv =
            new Score(this, false, false, _model._output.getModelCategory())
                .doAll(valid(), build_tree_one_node);
        ModelMetrics mmv = scv.makeModelMetrics(_model, _parms.valid());
        out._validation_metrics = mmv;
        out._scored_valid[out._ntrees].fillFrom(mmv);
        if (out._ntrees > 0) Log.info("Validation " + out._scored_valid[out._ntrees].toString());
      }

      if (out._ntrees > 0) { // Compute variable importances
        out._model_summary = createModelSummaryTable(out);
        out._scoring_history = createScoringHistoryTable(out);
        out._varimp = new hex.VarImp(_improvPerVar, out._names);
        out._variable_importances = hex.ModelMetrics.calcVarImp(out._varimp);
        Log.info(out._model_summary.toString());
        // For Debugging:
        //        Log.info(out._scoring_history.toString());
        //        Log.info(out._variable_importances.toString());
      }

      ConfusionMatrix cm = mm.cm();
      if (cm != null) {
        if (cm._cm.length <= _parms._max_confusion_matrix_size) {
          Log.info(cm.toASCII());
        } else {
          Log.info(
              "Confusion Matrix is too large (max_confusion_matrix_size="
                  + _parms._max_confusion_matrix_size
                  + "): "
                  + _nclass
                  + " classes.");
        }
      }
      _timeLastScoreEnd = System.currentTimeMillis();
    }

    // Double update - after either scoring or variable importance
    if (updated) _model.update(_key);
    return training_r2;
  }