@Override public void onCompletion(CountedCompleter caller) { ScoreBuildHistogram sbh = (ScoreBuildHistogram) caller; // System.out.println(sbh.profString()); final int leafk = _leafs[_k]; int tmax = _tree.len(); // Number of total splits in tree K for (int leaf = leafk; leaf < tmax; leaf++) { // Visit all the new splits (leaves) DTree.UndecidedNode udn = _tree.undecided(leaf); // System.out.println((_st._nclass==1?"Regression":("Class // "+_fr2.vecs()[_st._ncols].domain()[_k]))+",\n Undecided node:"+udn); // Replace the Undecided with the Split decision DTree.DecidedNode dn = _st.makeDecided(udn, sbh._hcs[leaf - leafk]); // System.out.println(dn + // " > Split: " + dn._split + " L/R:" + dn._split._n0+" + // "+dn._split._n1); if (dn._split._col == -1) udn.do_not_split(); else { _did_split = true; DTree.Split s = dn._split; // Accumulate squared error improvements per variable AtomicUtils.FloatArray.add(_improvPerVar, s.col(), (float) (s.pre_split_se() - s.se())); } } _leafs[_k] = tmax; // Setup leafs for next tree level int new_leafs = _tree.len() - tmax; _hcs[_k] = new DHistogram[new_leafs][ /*ncol*/]; for (int nl = tmax; nl < _tree.len(); nl++) _hcs[_k][nl - tmax] = _tree.undecided(nl)._hs; if (_did_split) _tree._depth++; }
/** * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This * call is expected to be overridden in the subclasses and each subclass will start with * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to * be fast; heavy-weight prep needs to wait for the trainModel() call. */ @Override public void init(boolean expensive) { super.init(expensive); // Initialize local variables if (!(0.0 < _parms._sample_rate && _parms._sample_rate <= 1.0)) throw new IllegalArgumentException( "Sample rate should be interval (0,1> but it is " + _parms._sample_rate); if (_parms._mtries < 1 && _parms._mtries != -1) error( "_mtries", "mtries must be -1 (converted to sqrt(features)), or >= 1 but it is " + _parms._mtries); if (_train != null) { int ncols = _train.numCols(); if (_parms._mtries != -1 && !(1 <= _parms._mtries && _parms._mtries < ncols)) error( "_mtries", "Computed mtries should be -1 or in interval <1,#cols> but it is " + _parms._mtries); } if (_parms._sample_rate == 1f && _valid == null) error( "_sample_rate", "Sample rate is 100% and no validation dataset is specified. There are no OOB data to compute out-of-bag error estimation!"); if (hasOffset()) error("_offset_column", "Offsets are not yet supported for DRF."); if (hasOffset() && isClassifier()) { error("_offset_column", "Offset is only supported for regression."); } }
/** * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This * call is expected to be overridden in the subclasses and each subclass will start with * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to * be fast; heavy-weight prep needs to wait for the trainModel() call. */ @Override public void init(boolean expensive) { super.init(expensive); // Initialize local variables if (!(0.0 < _parms._sample_rate && _parms._sample_rate <= 1.0)) throw new IllegalArgumentException( "Sample rate should be interval [0,1] but it is " + _parms._sample_rate); if (_parms._mtries < 1 && _parms._mtries != -1) error( "_mtries", "mtries must be -1 (converted to sqrt(features)), or >= 1 but it is " + _parms._mtries); if (_train != null) { int ncols = _train.numCols(); if (_parms._mtries != -1 && !(1 <= _parms._mtries && _parms._mtries < ncols)) error( "_mtries", "Computed mtries should be -1 or in interval [1," + ncols + "] but it is " + _parms._mtries); } if (_parms._distribution == Distribution.Family.AUTO) { if (_nclass == 1) _parms._distribution = Distribution.Family.gaussian; if (_nclass >= 2) _parms._distribution = Distribution.Family.multinomial; } if (expensive) { _initialPrediction = isClassifier() ? 0 : getInitialValue(); } if (_parms._sample_rate == 1f && _valid == null) error( "_sample_rate", "Sample rate is 100% and no validation dataset is specified. There are no OOB data to compute out-of-bag error estimation!"); if (hasOffsetCol()) error("_offset_column", "Offsets are not yet supported for DRF."); if (hasOffsetCol() && isClassifier()) { error("_offset_column", "Offset is only supported for regression."); } }