@Override public void modifyParmsForCrossValidationMainModel(ModelBuilder[] cvModelBuilders) { _parms._overwrite_with_best_model = false; if (_parms._stopping_rounds == 0 && _parms._max_runtime_secs == 0) return; // No exciting changes to stopping conditions // Extract stopping conditions from each CV model, and compute the best stopping answer _parms._stopping_rounds = 0; _parms._max_runtime_secs = 0; double sum = 0; for (ModelBuilder cvmb : cvModelBuilders) sum += ((DeepLearningModel) DKV.getGet(cvmb.dest())).last_scored().epoch_counter; _parms._epochs = sum / cvModelBuilders.length; if (!_parms._quiet_mode) { warn( "_epochs", "Setting optimal _epochs to " + _parms._epochs + " for cross-validation main model based on early stopping of cross-validation models."); warn( "_stopping_rounds", "Disabling convergence-based early stopping for cross-validation main model."); warn( "_max_runtime_secs", "Disabling maximum allowed runtime for cross-validation main model."); } }
/** * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This * call is expected to be overridden in the subclasses and each subclass will start with * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to * be fast; heavy-weight prep needs to wait for the trainModel() call. */ @Override public void init(boolean expensive) { super.init(expensive); if ((_parms.start_column != null) && !_parms.start_column.isInt()) error("start_column", "start time must be null or of type integer"); if (!_parms.stop_column.isInt()) error("stop_column", "stop time must be of type integer"); if (!_parms.event_column.isInt() && !_parms.event_column.isCategorical()) error("event_column", "event must be of type integer or factor"); if (Double.isNaN(_parms.lre_min) || _parms.lre_min <= 0) error("lre_min", "lre_min must be a positive number"); if (_parms.iter_max < 1) error("iter_max", "iter_max must be a positive integer"); final int MAX_TIME_BINS = 10000; final long min_time = (_parms.start_column == null) ? (long) _parms.stop_column.min() : (long) _parms.start_column.min() + 1; final int n_time = (int) (_parms.stop_column.max() - min_time + 1); if (n_time < 1) error("start_column", "start times must be strictly less than stop times"); if (n_time > MAX_TIME_BINS) error( "stop_column", "number of distinct stop times is " + n_time + "; maximum number allowed is " + MAX_TIME_BINS); }
@SuppressWarnings("unused") // called through reflection by RequestServer /** Return the metadata for a REST API Route, specified either by number or path. */ public MetadataV3 fetchRoute(int version, MetadataV3 docs) { Route route = null; if (null != docs.path && null != docs.http_method) { route = RequestServer.lookup(docs.http_method, docs.path); } else { // Linear scan for the route, plus each route is asked for in-order // during doc-gen leading to an O(n^2) execution cost. int i = 0; for (Route r : RequestServer.routes()) if (i++ == docs.num) { route = r; break; } // Crash-n-burn if route not found (old code thru an AIOOBE), so we // something similarly bad. docs.routes = new RouteBase[] {(RouteBase) Schema.schema(version, Route.class).fillFromImpl(route)}; } Schema sinput, soutput; if (route._handler_class.equals(water.api.ModelBuilderHandler.class)) { String ss[] = route._url_pattern_raw.split("/"); String algoURLName = ss[3]; // {}/{3}/{ModelBuilders}/{gbm}/{parameters} int version2 = Integer.valueOf(ss[1]); String algoName = ModelBuilder.algoName(algoURLName); // gbm -> GBM; deeplearning -> DeepLearning String schemaDir = ModelBuilder.schemaDirectory(algoURLName); String inputSchemaName = schemaDir + algoName + "V" + version2; // hex.schemas.GBMV3 sinput = (Schema) TypeMap.theFreezable(TypeMap.onIce(inputSchemaName)); sinput.init_meta(); // hex.schemas.GBMModelV3$GBMModelOutputV3 String outputSchemaName = schemaDir + algoName + "ModelV" + version2 + "$" + algoName + "ModelOutputV" + version2; soutput = (Schema) TypeMap.theFreezable(TypeMap.onIce(outputSchemaName)); soutput.init_meta(); } else { sinput = Schema.newInstance(Handler.getHandlerMethodInputSchema(route._handler_method)); soutput = Schema.newInstance(Handler.getHandlerMethodOutputSchema(route._handler_method)); } docs.routes[0].markdown = route.markdown(sinput, soutput).toString(); return docs; }
/** * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This * call is expected to be overridden in the subclasses and each subclass will start with * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to * be fast; heavy-weight prep needs to wait for the trainModel() call. * * <p>Validate the requested ntrees; precompute actual ntrees. Validate the number of classes to * predict on; validate a checkpoint. */ @Override public void init(boolean expensive) { super.init(expensive); if (H2O.ARGS.client && _parms._build_tree_one_node) error("_build_tree_one_node", "Cannot run on a single node in client mode"); if (_vresponse != null) _vresponse_key = _vresponse._key; if (_response != null) _response_key = _response._key; if (_nclass > SharedTreeModel.SharedTreeParameters.MAX_SUPPORTED_LEVELS) error("_nclass", "Too many levels in response column!"); if (_parms._min_rows < 0) error("_min_rows", "Requested min_rows must be greater than 0"); if (_parms._ntrees < 0 || _parms._ntrees > 100000) error("_ntrees", "Requested ntrees must be between 1 and 100000"); _ntrees = _parms._ntrees; // Total trees in final model if (_parms._checkpoint) { // Asking to continue from checkpoint? Value cv = DKV.get(_parms._model_id); if (cv != null) { // Look for prior model M checkpointModel = cv.get(); if (_parms._ntrees < checkpointModel._output._ntrees + 1) error( "_ntrees", "Requested ntrees must be between " + checkpointModel._output._ntrees + 1 + " and 100000"); _ntrees = _parms._ntrees - checkpointModel._output._ntrees; // Needed trees } } if (_parms._nbins <= 1) error("_nbins", "_nbins must be > 1."); if (_parms._nbins >= 1 << 16) error("_nbins", "_nbins must be < " + (1 << 16)); if (_parms._nbins_cats <= 1) error("_nbins_cats", "_nbins_cats must be > 1."); if (_parms._nbins_cats >= 1 << 16) error("_nbins_cats", "_nbins_cats must be < " + (1 << 16)); if (_parms._max_depth <= 0) error("_max_depth", "_max_depth must be > 0."); if (_parms._min_rows <= 0) error("_min_rows", "_min_rows must be > 0."); if (_parms._distribution == Distributions.Family.tweedie) { _parms._distribution.tweedie.p = _parms._tweedie_power; } if (_train != null) { double sumWeights = _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1); if (sumWeights < 2 * _parms._min_rows) // Need at least 2*min_rows weighted rows to split even once error( "_min_rows", "The dataset size is too small to split for min_rows=" + _parms._min_rows + ": must have at least " + 2 * _parms._min_rows + " (weighted) rows, but have only " + sumWeights + "."); } if (_train != null) _ncols = _train.numCols() - 1 - numSpecialCols(); }
/** * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This * call is expected to be overridden in the subclasses and each subclass will start with * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to * be fast; heavy-weight prep needs to wait for the trainModel() call. * * <p>Validate the requested ntrees; precompute actual ntrees. Validate the number of classes to * predict on; validate a checkpoint. */ @Override public void init(boolean expensive) { super.init(expensive); if (H2O.ARGS.client && _parms._build_tree_one_node) error("_build_tree_one_node", "Cannot run on a single node in client mode"); if (_vresponse != null) _vresponse_key = _vresponse._key; if (_response != null) _response_key = _response._key; if (_parms._min_rows < 0) error("_min_rows", "Requested min_rows must be greater than 0"); if (_parms._ntrees < 0 || _parms._ntrees > MAX_NTREES) error("_ntrees", "Requested ntrees must be between 1 and " + MAX_NTREES); _ntrees = _parms._ntrees; // Total trees in final model if (_parms.hasCheckpoint()) { // Asking to continue from checkpoint? Value cv = DKV.get(_parms._checkpoint); if (cv != null) { // Look for prior model M checkpointModel = cv.get(); try { _parms.validateWithCheckpoint(checkpointModel._parms); } catch (H2OIllegalArgumentException e) { error(e.values.get("argument").toString(), e.values.get("value").toString()); } if (_parms._ntrees < checkpointModel._output._ntrees + 1) error( "_ntrees", "If checkpoint is specified then requested ntrees must be higher than " + (checkpointModel._output._ntrees + 1)); // Compute number of trees to build for this checkpoint _ntrees = _parms._ntrees - checkpointModel._output._ntrees; // Needed trees } } if (_parms._nbins <= 1) error("_nbins", "_nbins must be > 1."); if (_parms._nbins >= 1 << 16) error("_nbins", "_nbins must be < " + (1 << 16)); if (_parms._nbins_cats <= 1) error("_nbins_cats", "_nbins_cats must be > 1."); if (_parms._nbins_cats >= 1 << 16) error("_nbins_cats", "_nbins_cats must be < " + (1 << 16)); if (_parms._max_depth <= 0) error("_max_depth", "_max_depth must be > 0."); if (_parms._min_rows <= 0) error("_min_rows", "_min_rows must be > 0."); if (_train != null) { double sumWeights = _train.numRows() * (hasWeightCol() ? _train.vec(_parms._weights_column).mean() : 1); if (sumWeights < 2 * _parms._min_rows) // Need at least 2*min_rows weighted rows to split even once error( "_min_rows", "The dataset size is too small to split for min_rows=" + _parms._min_rows + ": must have at least " + 2 * _parms._min_rows + " (weighted) rows, but have only " + sumWeights + "."); } if (_train != null) _ncols = _train.numCols() - 1 - numSpecialCols(); }
/** * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This * call is expected to be overridden in the subclasses and each subclass will start with * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to * be fast; heavy-weight prep needs to wait for the trainModel() call. * * <p>Validate the probs. */ @Override public void init(boolean expensive) { super.init(expensive); for (double p : _parms._probs) if (p < 0.0 || p > 1.0) error("_probs", "Probabilities must be between 0 and 1"); _ncols = train().numCols() - numSpecialCols(); // offset/weights/nfold - should only ever be weights if (numSpecialCols() == 1 && _weights == null) throw new IllegalArgumentException( "The only special Vec that is supported for Quantiles is observation weights."); if (numSpecialCols() > 1) throw new IllegalArgumentException("Cannot handle more than 1 special vec (weights)"); }
public static H2OModelBuilderIllegalArgumentException makeFromBuilder(ModelBuilder builder) { Model.Parameters parameters = builder._parms; String algo = builder._parms.algoName(); String msg = "Illegal argument(s) for " + algo + " model: " + builder.dest() + ". Details: " + builder.validationErrors(); H2OModelBuilderIllegalArgumentException exception = new H2OModelBuilderIllegalArgumentException(msg, msg); exception.values = new IcedHashMap.IcedHashMapStringObject(); exception.values.put("algo", algo); exception.values.put("parameters", parameters); exception.values.put("error_count", builder.error_count()); exception.values.put("messages", builder._messages); return exception; }
@Override public void init(boolean expensive) { super.init(expensive); // if (_parms._loading_key == null) _parms._loading_key = Key.make("PCALoading_" + Key.rand()); if (_parms._loading_name == null || _parms._loading_name.length() == 0) _parms._loading_name = "PCALoading_" + Key.rand(); if (_parms._max_iterations < 1 || _parms._max_iterations > 1e6) error("_max_iterations", "max_iterations must be between 1 and 1e6 inclusive"); if (_train == null) return; if (_train.numCols() < 2) error("_train", "_train must have more than one column"); _ncolExp = _train.numColsExp(_parms._use_all_factor_levels, false); // TODO: Initialize _parms._k = min(ncolExp(_train), nrow(_train)) if not set int k_min = (int) Math.min(_ncolExp, _train.numRows()); if (_parms._k < 1 || _parms._k > k_min) error("_k", "_k must be between 1 and " + k_min); if (!_parms._use_all_factor_levels && _parms._pca_method == PCAParameters.Method.GLRM) error("_use_all_factor_levels", "GLRM only implemented for _use_all_factor_levels = true"); if (expensive && error_count() == 0) checkMemoryFootPrint(); }
/** * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This * call is expected to be overridden in the subclasses and each subclass will start with * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to * be fast; heavy-weight prep needs to wait for the trainModel() call. * * <p>Validate the very large number of arguments in the DL Parameter directly. */ @Override public void init(boolean expensive) { super.init(expensive); _parms.validate(this, expensive); if (expensive && error_count() == 0) checkMemoryFootPrint(); }
/** Create the corresponding impl object, as well as its parameters object. */ @Override public final B createImpl() { return ModelBuilder.make(getSchemaType(), null, null); }