Beispiel #1
0
 @Override
 public void map(Chunk nids, Chunk ys) {
   Random rand = _tree.rngForChunk(nids.cidx());
   for (int row = 0; row < nids._len; row++)
     if (rand.nextFloat() >= _rate || Double.isNaN(ys.at0(row))) {
       nids.set0(row, OUT_OF_BAG); // Flag row as being ignored by sampling
     }
 }
Beispiel #2
0
    @Override
    public void map(Chunk chks[]) {
      Chunk ys = chk_resp(chks);
      if (_nclass > 1) { // Classification

        for (int row = 0; row < ys._len; row++) {
          if (ys.isNA0(row)) continue;
          int y = (int) ys.at80(row); // zero-based response variable
          // Actual is '1' for class 'y' and '0' for all other classes
          for (int k = 0; k < _nclass; k++) {
            if (_distribution[k] != 0) {
              Chunk wk = chk_work(chks, k);
              wk.set0(row, (y == k ? 1f : 0f) - (float) wk.at0(row));
            }
          }
        }

      } else { // Regression
        Chunk wk = chk_work(chks, 0); // Prediction==>Residuals
        for (int row = 0; row < ys._len; row++) wk.set0(row, (float) (ys.at0(row) - wk.at0(row)));
      }
    }
Beispiel #3
0
 @Override
 public void map(Chunk chks[]) {
   Chunk ys = chk_resp(chks);
   if (_nclass > 1) { // Classification
     float fs[] = new float[_nclass + 1];
     for (int row = 0; row < ys._len; row++) {
       float sum = score1(chks, fs, row);
       if (Float.isInfinite(sum)) // Overflow (happens for constant responses)
       for (int k = 0; k < _nclass; k++)
           chk_work(chks, k).set0(row, Float.isInfinite(fs[k + 1]) ? 1.0f : 0.0f);
       else
         for (int k = 0; k < _nclass; k++) // Save as a probability distribution
         chk_work(chks, k).set0(row, fs[k + 1] / sum);
     }
   } else { // Regression
     Chunk tr = chk_tree(chks, 0); // Prior tree sums
     Chunk wk = chk_work(chks, 0); // Predictions
     for (int row = 0; row < ys._len; row++) wk.set0(row, (float) tr.at0(row));
   }
 }
Beispiel #4
0
 @Override
 public void map(Chunk[] chks) {
   _gss = new double[_nclass][];
   _rss = new double[_nclass][];
   // For all tree/klasses
   for (int k = 0; k < _nclass; k++) {
     final DTree tree = _trees[k];
     final int leaf = _leafs[k];
     if (tree == null) continue; // Empty class is ignored
     // A leaf-biased array of all active Tree leaves.
     final double gs[] = _gss[k] = new double[tree._len - leaf];
     final double rs[] = _rss[k] = new double[tree._len - leaf];
     final Chunk nids = chk_nids(chks, k); // Node-ids  for this tree/class
     final Chunk ress = chk_work(chks, k); // Residuals for this tree/class
     // If we have all constant responses, then we do not split even the
     // root and the residuals should be zero.
     if (tree.root() instanceof LeafNode) continue;
     for (int row = 0; row < nids._len; row++) { // For all rows
       int nid = (int) nids.at80(row); // Get Node to decide from
       if (nid < 0) continue; // Missing response
       if (tree.node(nid) instanceof UndecidedNode) // If we bottomed out the tree
       nid = tree.node(nid)._pid; // Then take parent's decision
       DecidedNode dn = tree.decided(nid); // Must have a decision point
       if (dn._split._col == -1) // Unable to decide?
       dn = tree.decided(nid = dn._pid); // Then take parent's decision
       int leafnid = dn.ns(chks, row); // Decide down to a leafnode
       assert leaf <= leafnid && leafnid < tree._len;
       assert tree.node(leafnid) instanceof LeafNode;
       // Note: I can which leaf/region I end up in, but I do not care for
       // the prediction presented by the tree.  For GBM, we compute the
       // sum-of-residuals (and sum/abs/mult residuals) for all rows in the
       // leaf, and get our prediction from that.
       nids.set0(row, leafnid);
       assert !ress.isNA0(row);
       double res = ress.at0(row);
       double ares = Math.abs(res);
       gs[leafnid - leaf] += _nclass > 1 ? ares * (1 - ares) : 1;
       rs[leafnid - leaf] += res;
     }
   }
 }
Beispiel #5
0
 @Override
 public void map(Chunk[] chks) {
   final Chunk y = importance ? chk_resp(chks) : null; // Response
   final float[] rpred = importance ? new float[1 + _nclass] : null; // Row prediction
   final double[] rowdata = importance ? new double[_ncols] : null; // Pre-allocated row data
   final Chunk oobt = chk_oobt(chks); // Out-of-bag rows counter over all trees
   // Iterate over all rows
   for (int row = 0; row < oobt._len; row++) {
     boolean wasOOBRow = false;
     // For all tree (i.e., k-classes)
     for (int k = 0; k < _nclass; k++) {
       final DTree tree = _trees[k];
       if (tree == null) continue; // Empty class is ignored
       // If we have all constant responses, then we do not split even the
       // root and the residuals should be zero.
       if (tree.root() instanceof LeafNode) continue;
       final Chunk nids = chk_nids(chks, k); // Node-ids  for this tree/class
       final Chunk ct = chk_tree(chks, k); // k-tree working column holding votes for given row
       int nid = (int) nids.at80(row); // Get Node to decide from
       // Update only out-of-bag rows
       // This is out-of-bag row - but we would like to track on-the-fly prediction for the row
       if (isOOBRow(nid)) { // The row should be OOB for all k-trees !!!
         assert k == 0 || wasOOBRow
             : "Something is wrong: k-class trees oob row computing is broken! All k-trees should agree on oob row!";
         wasOOBRow = true;
         nid = oob2Nid(nid);
         if (tree.node(nid) instanceof UndecidedNode) // If we bottomed out the tree
         nid = tree.node(nid).pid(); // Then take parent's decision
         DecidedNode dn = tree.decided(nid); // Must have a decision point
         if (dn._split.col() == -1) // Unable to decide?
         dn = tree.decided(tree.node(nid).pid()); // Then take parent's decision
         int leafnid = dn.ns(chks, row); // Decide down to a leafnode
         // Setup Tree(i) - on the fly prediction of i-tree for row-th row
         //   - for classification: cumulative number of votes for this row
         //   - for regression: cumulative sum of prediction of each tree - has to be normalized
         // by number of trees
         double prediction =
             ((LeafNode) tree.node(leafnid)).pred(); // Prediction for this k-class and this row
         if (importance)
           rpred[1 + k] = (float) prediction; // for both regression and classification
         ct.set0(row, (float) (ct.at0(row) + prediction));
         // For this tree this row is out-of-bag - i.e., a tree voted for this row
         oobt.set0(
             row,
             _nclass > 1
                 ? 1
                 : oobt.at0(row)
                     + 1); // for regression track number of trees, for classification boolean
                           // flag is enough
       }
       // reset help column for this row and this k-class
       nids.set0(row, 0);
     } /* end of k-trees iteration */
     if (importance) {
       if (wasOOBRow && !y.isNA0(row)) {
         if (classification) {
           int treePred = ModelUtils.getPrediction(rpred, data_row(chks, row, rowdata));
           int actuPred = (int) y.at80(row);
           if (treePred == actuPred) rightVotes++; // No miss !
         } else { // regression
           float treePred = rpred[1];
           float actuPred = (float) y.at0(row);
           sse += (actuPred - treePred) * (actuPred - treePred);
         }
         allRows++;
       }
     }
   }
 }