Beispiel #1
0
  @Override
  public void modifyParmsForCrossValidationMainModel(ModelBuilder[] cvModelBuilders) {
    _parms._overwrite_with_best_model = false;

    if (_parms._stopping_rounds == 0 && _parms._max_runtime_secs == 0)
      return; // No exciting changes to stopping conditions
    // Extract stopping conditions from each CV model, and compute the best stopping answer
    _parms._stopping_rounds = 0;
    _parms._max_runtime_secs = 0;
    double sum = 0;
    for (ModelBuilder cvmb : cvModelBuilders)
      sum += ((DeepLearningModel) DKV.getGet(cvmb.dest())).last_scored().epoch_counter;
    _parms._epochs = sum / cvModelBuilders.length;
    if (!_parms._quiet_mode) {
      warn(
          "_epochs",
          "Setting optimal _epochs to "
              + _parms._epochs
              + " for cross-validation main model based on early stopping of cross-validation models.");
      warn(
          "_stopping_rounds",
          "Disabling convergence-based early stopping for cross-validation main model.");
      warn(
          "_max_runtime_secs",
          "Disabling maximum allowed runtime for cross-validation main model.");
    }
  }
  /**
   * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
   * call is expected to be overridden in the subclasses and each subclass will start with
   * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
   * be fast; heavy-weight prep needs to wait for the trainModel() call.
   */
  @Override
  public void init(boolean expensive) {
    super.init(expensive);

    if ((_parms.start_column != null) && !_parms.start_column.isInt())
      error("start_column", "start time must be null or of type integer");

    if (!_parms.stop_column.isInt()) error("stop_column", "stop time must be of type integer");

    if (!_parms.event_column.isInt() && !_parms.event_column.isCategorical())
      error("event_column", "event must be of type integer or factor");

    if (Double.isNaN(_parms.lre_min) || _parms.lre_min <= 0)
      error("lre_min", "lre_min must be a positive number");

    if (_parms.iter_max < 1) error("iter_max", "iter_max must be a positive integer");

    final int MAX_TIME_BINS = 10000;
    final long min_time =
        (_parms.start_column == null)
            ? (long) _parms.stop_column.min()
            : (long) _parms.start_column.min() + 1;
    final int n_time = (int) (_parms.stop_column.max() - min_time + 1);
    if (n_time < 1) error("start_column", "start times must be strictly less than stop times");
    if (n_time > MAX_TIME_BINS)
      error(
          "stop_column",
          "number of distinct stop times is "
              + n_time
              + "; maximum number allowed is "
              + MAX_TIME_BINS);
  }
Beispiel #3
0
  @SuppressWarnings("unused") // called through reflection by RequestServer
  /** Return the metadata for a REST API Route, specified either by number or path. */
  public MetadataV3 fetchRoute(int version, MetadataV3 docs) {
    Route route = null;
    if (null != docs.path && null != docs.http_method) {
      route = RequestServer.lookup(docs.http_method, docs.path);
    } else {
      // Linear scan for the route, plus each route is asked for in-order
      // during doc-gen leading to an O(n^2) execution cost.
      int i = 0;
      for (Route r : RequestServer.routes())
        if (i++ == docs.num) {
          route = r;
          break;
        }
      // Crash-n-burn if route not found (old code thru an AIOOBE), so we
      // something similarly bad.
      docs.routes =
          new RouteBase[] {(RouteBase) Schema.schema(version, Route.class).fillFromImpl(route)};
    }

    Schema sinput, soutput;
    if (route._handler_class.equals(water.api.ModelBuilderHandler.class)) {
      String ss[] = route._url_pattern_raw.split("/");
      String algoURLName = ss[3]; // {}/{3}/{ModelBuilders}/{gbm}/{parameters}
      int version2 = Integer.valueOf(ss[1]);
      String algoName =
          ModelBuilder.algoName(algoURLName); // gbm -> GBM; deeplearning -> DeepLearning
      String schemaDir = ModelBuilder.schemaDirectory(algoURLName);
      String inputSchemaName = schemaDir + algoName + "V" + version2; // hex.schemas.GBMV3
      sinput = (Schema) TypeMap.theFreezable(TypeMap.onIce(inputSchemaName));
      sinput.init_meta();
      // hex.schemas.GBMModelV3$GBMModelOutputV3
      String outputSchemaName =
          schemaDir + algoName + "ModelV" + version2 + "$" + algoName + "ModelOutputV" + version2;
      soutput = (Schema) TypeMap.theFreezable(TypeMap.onIce(outputSchemaName));
      soutput.init_meta();
    } else {
      sinput = Schema.newInstance(Handler.getHandlerMethodInputSchema(route._handler_method));
      soutput = Schema.newInstance(Handler.getHandlerMethodOutputSchema(route._handler_method));
    }
    docs.routes[0].markdown = route.markdown(sinput, soutput).toString();
    return docs;
  }
Beispiel #4
0
  /**
   * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
   * call is expected to be overridden in the subclasses and each subclass will start with
   * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
   * be fast; heavy-weight prep needs to wait for the trainModel() call.
   *
   * <p>Validate the requested ntrees; precompute actual ntrees. Validate the number of classes to
   * predict on; validate a checkpoint.
   */
  @Override
  public void init(boolean expensive) {
    super.init(expensive);
    if (H2O.ARGS.client && _parms._build_tree_one_node)
      error("_build_tree_one_node", "Cannot run on a single node in client mode");
    if (_vresponse != null) _vresponse_key = _vresponse._key;
    if (_response != null) _response_key = _response._key;
    if (_nclass > SharedTreeModel.SharedTreeParameters.MAX_SUPPORTED_LEVELS)
      error("_nclass", "Too many levels in response column!");

    if (_parms._min_rows < 0) error("_min_rows", "Requested min_rows must be greater than 0");

    if (_parms._ntrees < 0 || _parms._ntrees > 100000)
      error("_ntrees", "Requested ntrees must be between 1 and 100000");
    _ntrees = _parms._ntrees; // Total trees in final model
    if (_parms._checkpoint) { // Asking to continue from checkpoint?
      Value cv = DKV.get(_parms._model_id);
      if (cv != null) { // Look for prior model
        M checkpointModel = cv.get();
        if (_parms._ntrees < checkpointModel._output._ntrees + 1)
          error(
              "_ntrees",
              "Requested ntrees must be between "
                  + checkpointModel._output._ntrees
                  + 1
                  + " and 100000");
        _ntrees = _parms._ntrees - checkpointModel._output._ntrees; // Needed trees
      }
    }
    if (_parms._nbins <= 1) error("_nbins", "_nbins must be > 1.");
    if (_parms._nbins >= 1 << 16) error("_nbins", "_nbins must be < " + (1 << 16));
    if (_parms._nbins_cats <= 1) error("_nbins_cats", "_nbins_cats must be > 1.");
    if (_parms._nbins_cats >= 1 << 16) error("_nbins_cats", "_nbins_cats must be < " + (1 << 16));
    if (_parms._max_depth <= 0) error("_max_depth", "_max_depth must be > 0.");
    if (_parms._min_rows <= 0) error("_min_rows", "_min_rows must be > 0.");
    if (_parms._distribution == Distributions.Family.tweedie) {
      _parms._distribution.tweedie.p = _parms._tweedie_power;
    }
    if (_train != null) {
      double sumWeights =
          _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1);
      if (sumWeights
          < 2 * _parms._min_rows) // Need at least 2*min_rows weighted rows to split even once
      error(
            "_min_rows",
            "The dataset size is too small to split for min_rows="
                + _parms._min_rows
                + ": must have at least "
                + 2 * _parms._min_rows
                + " (weighted) rows, but have only "
                + sumWeights
                + ".");
    }
    if (_train != null) _ncols = _train.numCols() - 1 - numSpecialCols();
  }
Beispiel #5
0
  /**
   * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
   * call is expected to be overridden in the subclasses and each subclass will start with
   * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
   * be fast; heavy-weight prep needs to wait for the trainModel() call.
   *
   * <p>Validate the requested ntrees; precompute actual ntrees. Validate the number of classes to
   * predict on; validate a checkpoint.
   */
  @Override
  public void init(boolean expensive) {
    super.init(expensive);
    if (H2O.ARGS.client && _parms._build_tree_one_node)
      error("_build_tree_one_node", "Cannot run on a single node in client mode");
    if (_vresponse != null) _vresponse_key = _vresponse._key;
    if (_response != null) _response_key = _response._key;

    if (_parms._min_rows < 0) error("_min_rows", "Requested min_rows must be greater than 0");

    if (_parms._ntrees < 0 || _parms._ntrees > MAX_NTREES)
      error("_ntrees", "Requested ntrees must be between 1 and " + MAX_NTREES);
    _ntrees = _parms._ntrees; // Total trees in final model
    if (_parms.hasCheckpoint()) { // Asking to continue from checkpoint?
      Value cv = DKV.get(_parms._checkpoint);
      if (cv != null) { // Look for prior model
        M checkpointModel = cv.get();
        try {
          _parms.validateWithCheckpoint(checkpointModel._parms);
        } catch (H2OIllegalArgumentException e) {
          error(e.values.get("argument").toString(), e.values.get("value").toString());
        }
        if (_parms._ntrees < checkpointModel._output._ntrees + 1)
          error(
              "_ntrees",
              "If checkpoint is specified then requested ntrees must be higher than "
                  + (checkpointModel._output._ntrees + 1));

        // Compute number of trees to build for this checkpoint
        _ntrees = _parms._ntrees - checkpointModel._output._ntrees; // Needed trees
      }
    }
    if (_parms._nbins <= 1) error("_nbins", "_nbins must be > 1.");
    if (_parms._nbins >= 1 << 16) error("_nbins", "_nbins must be < " + (1 << 16));
    if (_parms._nbins_cats <= 1) error("_nbins_cats", "_nbins_cats must be > 1.");
    if (_parms._nbins_cats >= 1 << 16) error("_nbins_cats", "_nbins_cats must be < " + (1 << 16));
    if (_parms._max_depth <= 0) error("_max_depth", "_max_depth must be > 0.");
    if (_parms._min_rows <= 0) error("_min_rows", "_min_rows must be > 0.");
    if (_train != null) {
      double sumWeights =
          _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1);
      if (sumWeights
          < 2 * _parms._min_rows) // Need at least 2*min_rows weighted rows to split even once
      error(
            "_min_rows",
            "The dataset size is too small to split for min_rows="
                + _parms._min_rows
                + ": must have at least "
                + 2 * _parms._min_rows
                + " (weighted) rows, but have only "
                + sumWeights
                + ".");
    }
    if (_train != null) _ncols = _train.numCols() - 1 - numSpecialCols();
  }
Beispiel #6
0
 /**
  * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
  * call is expected to be overridden in the subclasses and each subclass will start with
  * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
  * be fast; heavy-weight prep needs to wait for the trainModel() call.
  *
  * <p>Validate the probs.
  */
 @Override
 public void init(boolean expensive) {
   super.init(expensive);
   for (double p : _parms._probs)
     if (p < 0.0 || p > 1.0) error("_probs", "Probabilities must be between 0 and 1");
   _ncols =
       train().numCols() - numSpecialCols(); // offset/weights/nfold - should only ever be weights
   if (numSpecialCols() == 1 && _weights == null)
     throw new IllegalArgumentException(
         "The only special Vec that is supported for Quantiles is observation weights.");
   if (numSpecialCols() > 1)
     throw new IllegalArgumentException("Cannot handle more than 1 special vec (weights)");
 }
  public static H2OModelBuilderIllegalArgumentException makeFromBuilder(ModelBuilder builder) {
    Model.Parameters parameters = builder._parms;
    String algo = builder._parms.algoName();
    String msg =
        "Illegal argument(s) for "
            + algo
            + " model: "
            + builder.dest()
            + ".  Details: "
            + builder.validationErrors();

    H2OModelBuilderIllegalArgumentException exception =
        new H2OModelBuilderIllegalArgumentException(msg, msg);

    exception.values = new IcedHashMap.IcedHashMapStringObject();
    exception.values.put("algo", algo);
    exception.values.put("parameters", parameters);
    exception.values.put("error_count", builder.error_count());
    exception.values.put("messages", builder._messages);

    return exception;
  }
Beispiel #8
0
  @Override
  public void init(boolean expensive) {
    super.init(expensive);
    // if (_parms._loading_key == null) _parms._loading_key = Key.make("PCALoading_" + Key.rand());
    if (_parms._loading_name == null || _parms._loading_name.length() == 0)
      _parms._loading_name = "PCALoading_" + Key.rand();
    if (_parms._max_iterations < 1 || _parms._max_iterations > 1e6)
      error("_max_iterations", "max_iterations must be between 1 and 1e6 inclusive");

    if (_train == null) return;
    if (_train.numCols() < 2) error("_train", "_train must have more than one column");
    _ncolExp = _train.numColsExp(_parms._use_all_factor_levels, false);

    // TODO: Initialize _parms._k = min(ncolExp(_train), nrow(_train)) if not set
    int k_min = (int) Math.min(_ncolExp, _train.numRows());
    if (_parms._k < 1 || _parms._k > k_min) error("_k", "_k must be between 1 and " + k_min);
    if (!_parms._use_all_factor_levels && _parms._pca_method == PCAParameters.Method.GLRM)
      error("_use_all_factor_levels", "GLRM only implemented for _use_all_factor_levels = true");

    if (expensive && error_count() == 0) checkMemoryFootPrint();
  }
Beispiel #9
0
 /**
  * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
  * call is expected to be overridden in the subclasses and each subclass will start with
  * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
  * be fast; heavy-weight prep needs to wait for the trainModel() call.
  *
  * <p>Validate the very large number of arguments in the DL Parameter directly.
  */
 @Override
 public void init(boolean expensive) {
   super.init(expensive);
   _parms.validate(this, expensive);
   if (expensive && error_count() == 0) checkMemoryFootPrint();
 }
Beispiel #10
0
 /** Create the corresponding impl object, as well as its parameters object. */
 @Override
 public final B createImpl() {
   return ModelBuilder.make(getSchemaType(), null, null);
 }