@Override public void map(Chunk strata, Chunk newW) { for (int i = 0; i < strata._len; ++i) { // Log.info("NID:" + ((int) strata.at8(i))); if ((int) strata.at8(i) != stratumToKeep) newW.set(i, 0); } }
@Override public void map(Chunk chks[]) { Chunk cy = chk_resp(chks); for (int i = 0; i < cy._len; i++) { if (cy.isNA(i)) continue; if (isClassifier()) { int cls = (int) cy.at8(i); chk_work(chks, cls).set(i, 1L); } else { float pred = (float) cy.atd(i); chk_work(chks, 0).set(i, pred); } } }
@Override public void map(Chunk cs) { int idx = _chunkOffset + cs.cidx(); Key ckey = Vec.chunkKey(_v._key, idx); if (_cmap != null) { assert !cs.hasFloat() : "Input chunk (" + cs.getClass() + ") has float, but is expected to be categorical"; NewChunk nc = new NewChunk(_v, idx); // loop over rows and update ints for new domain mapping according to vecs[c].domain() for (int r = 0; r < cs._len; ++r) { if (cs.isNA(r)) nc.addNA(); else nc.addNum(_cmap[(int) cs.at8(r)], 0); } nc.close(_fs); } else { DKV.put(ckey, cs.deepCopy(), _fs, true); } }
@Override public void map(Chunk[] chks) { final Chunk y = importance ? chk_resp(chks) : null; // Response final double[] rpred = importance ? new double[1 + _nclass] : null; // Row prediction final double[] rowdata = importance ? new double[_ncols] : null; // Pre-allocated row data final Chunk oobt = chk_oobt(chks); // Out-of-bag rows counter over all trees // Iterate over all rows for (int row = 0; row < oobt._len; row++) { final boolean wasOOBRow = ScoreBuildHistogram.isOOBRow((int) chk_nids(chks, 0).at8(row)); // For all tree (i.e., k-classes) for (int k = 0; k < _nclass; k++) { final DTree tree = _trees[k]; if (tree == null) continue; // Empty class is ignored final Chunk nids = chk_nids(chks, k); // Node-ids for this tree/class int nid = (int) nids.at8(row); // Get Node to decide from // Update only out-of-bag rows // This is out-of-bag row - but we would like to track on-the-fly prediction for the row if (wasOOBRow) { final Chunk ct = chk_tree(chks, k); // k-tree working column holding votes for given row nid = ScoreBuildHistogram.oob2Nid(nid); if (tree.node(nid) instanceof UndecidedNode) // If we bottomed out the tree nid = tree.node(nid).pid(); // Then take parent's decision int leafnid; if (tree.root() instanceof LeafNode) { leafnid = 0; } else { DecidedNode dn = tree.decided(nid); // Must have a decision point if (dn._split.col() == -1) // Unable to decide? dn = tree.decided(tree.node(nid).pid()); // Then take parent's decision leafnid = dn.ns(chks, row); // Decide down to a leafnode } // Setup Tree(i) - on the fly prediction of i-tree for row-th row // - for classification: cumulative number of votes for this row // - for regression: cumulative sum of prediction of each tree - has to be // normalized by number of trees double prediction = ((LeafNode) tree.node(leafnid)) .pred(); // Prediction for this k-class and this row if (importance) rpred[1 + k] = (float) prediction; // for both regression and classification ct.set(row, (float) (ct.atd(row) + prediction)); } // reset help column for this row and this k-class nids.set(row, 0); } /* end of k-trees iteration */ // For this tree this row is out-of-bag - i.e., a tree voted for this row if (wasOOBRow) oobt.set(row, oobt.atd(row) + 1); // track number of trees if (importance) { if (wasOOBRow && !y.isNA(row)) { if (isClassifier()) { int treePred = getPrediction(rpred, data_row(chks, row, rowdata), _threshold); int actuPred = (int) y.at8(row); if (treePred == actuPred) rightVotes++; // No miss ! } else { // regression double treePred = rpred[1]; double actuPred = y.atd(row); sse += (actuPred - treePred) * (actuPred - treePred); } allRows++; } } } }