Exemplo n.º 1
0
    @Override
    public void onCompletion(CountedCompleter caller) {
      ScoreBuildHistogram sbh = (ScoreBuildHistogram) caller;
      // System.out.println(sbh.profString());

      final int leafk = _leafs[_k];
      int tmax = _tree.len(); // Number of total splits in tree K
      for (int leaf = leafk; leaf < tmax; leaf++) { // Visit all the new splits (leaves)
        DTree.UndecidedNode udn = _tree.undecided(leaf);
        //        System.out.println((_st._nclass==1?"Regression":("Class
        // "+_fr2.vecs()[_st._ncols].domain()[_k]))+",\n  Undecided node:"+udn);
        // Replace the Undecided with the Split decision
        DTree.DecidedNode dn = _st.makeDecided(udn, sbh._hcs[leaf - leafk]);
        //        System.out.println(dn +
        //                           "  > Split: " + dn._split + " L/R:" + dn._split._n0+" +
        // "+dn._split._n1);
        if (dn._split._col == -1) udn.do_not_split();
        else {
          _did_split = true;
          DTree.Split s = dn._split; // Accumulate squared error improvements per variable
          AtomicUtils.FloatArray.add(_improvPerVar, s.col(), (float) (s.pre_split_se() - s.se()));
        }
      }
      _leafs[_k] = tmax; // Setup leafs for next tree level
      int new_leafs = _tree.len() - tmax;
      _hcs[_k] = new DHistogram[new_leafs][ /*ncol*/];
      for (int nl = tmax; nl < _tree.len(); nl++) _hcs[_k][nl - tmax] = _tree.undecided(nl)._hs;
      if (_did_split) _tree._depth++;
    }
Exemplo n.º 2
0
 /**
  * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
  * call is expected to be overridden in the subclasses and each subclass will start with
  * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
  * be fast; heavy-weight prep needs to wait for the trainModel() call.
  */
 @Override
 public void init(boolean expensive) {
   super.init(expensive);
   // Initialize local variables
   if (!(0.0 < _parms._sample_rate && _parms._sample_rate <= 1.0))
     throw new IllegalArgumentException(
         "Sample rate should be interval (0,1> but it is " + _parms._sample_rate);
   if (_parms._mtries < 1 && _parms._mtries != -1)
     error(
         "_mtries",
         "mtries must be -1 (converted to sqrt(features)), or >= 1 but it is " + _parms._mtries);
   if (_train != null) {
     int ncols = _train.numCols();
     if (_parms._mtries != -1 && !(1 <= _parms._mtries && _parms._mtries < ncols))
       error(
           "_mtries",
           "Computed mtries should be -1 or in interval <1,#cols> but it is " + _parms._mtries);
   }
   if (_parms._sample_rate == 1f && _valid == null)
     error(
         "_sample_rate",
         "Sample rate is 100% and no validation dataset is specified.  There are no OOB data to compute out-of-bag error estimation!");
   if (hasOffset()) error("_offset_column", "Offsets are not yet supported for DRF.");
   if (hasOffset() && isClassifier()) {
     error("_offset_column", "Offset is only supported for regression.");
   }
 }
Exemplo n.º 3
0
 /**
  * Initialize the ModelBuilder, validating all arguments and preparing the training frame. This
  * call is expected to be overridden in the subclasses and each subclass will start with
  * "super.init();". This call is made by the front-end whenever the GUI is clicked, and needs to
  * be fast; heavy-weight prep needs to wait for the trainModel() call.
  */
 @Override
 public void init(boolean expensive) {
   super.init(expensive);
   // Initialize local variables
   if (!(0.0 < _parms._sample_rate && _parms._sample_rate <= 1.0))
     throw new IllegalArgumentException(
         "Sample rate should be interval [0,1] but it is " + _parms._sample_rate);
   if (_parms._mtries < 1 && _parms._mtries != -1)
     error(
         "_mtries",
         "mtries must be -1 (converted to sqrt(features)), or >= 1 but it is " + _parms._mtries);
   if (_train != null) {
     int ncols = _train.numCols();
     if (_parms._mtries != -1 && !(1 <= _parms._mtries && _parms._mtries < ncols))
       error(
           "_mtries",
           "Computed mtries should be -1 or in interval [1,"
               + ncols
               + "] but it is "
               + _parms._mtries);
   }
   if (_parms._distribution == Distribution.Family.AUTO) {
     if (_nclass == 1) _parms._distribution = Distribution.Family.gaussian;
     if (_nclass >= 2) _parms._distribution = Distribution.Family.multinomial;
   }
   if (expensive) {
     _initialPrediction = isClassifier() ? 0 : getInitialValue();
   }
   if (_parms._sample_rate == 1f && _valid == null)
     error(
         "_sample_rate",
         "Sample rate is 100% and no validation dataset is specified.  There are no OOB data to compute out-of-bag error estimation!");
   if (hasOffsetCol()) error("_offset_column", "Offsets are not yet supported for DRF.");
   if (hasOffsetCol() && isClassifier()) {
     error("_offset_column", "Offset is only supported for regression.");
   }
 }