@Override public void map(Chunk ca, Chunk cp) { // classification if (_c_len > 1) { _cm = new long[_c_len + 1][_c_len + 1]; int len = Math.min( ca._len, cp._len); // handle different lenghts, but the vectors should have been rejected // already for (int i = 0; i < len; i++) { int a = ca.isNA0(i) ? _c_len : (int) ca.at80(i); int p = cp.isNA0(i) ? _c_len : (int) cp.at80(i); _cm[a][p]++; } if (len < ca._len) for (int i = len; i < ca._len; i++) _cm[ca.isNA0(i) ? _c_len : (int) ca.at80(i)][_c_len]++; if (len < cp._len) for (int i = len; i < cp._len; i++) _cm[_c_len][cp.isNA0(i) ? _c_len : (int) cp.at80(i)]++; } else { _cm = null; _mse = 0; assert (ca._len == cp._len); int len = ca._len; for (int i = 0; i < len; i++) { if (ca.isNA0(i) || cp.isNA0(i)) continue; // TODO: Improve final double a = ca.at0(i); final double p = cp.at0(i); _mse += (p - a) * (p - a); _count++; } } }
@Override public void map(Chunk chks[]) { Chunk cy = chk_resp(chks); for (int i = 0; i < cy._len; i++) { if (cy.isNA0(i)) continue; if (classification) { int cls = (int) cy.at80(i); chk_work(chks, cls).set0(i, 1L); } else { float pred = (float) cy.at0(i); chk_work(chks, 0).set0(i, pred); } } }
@Override public void map(Chunk[] chks) { _gss = new double[_nclass][]; _rss = new double[_nclass][]; // For all tree/klasses for (int k = 0; k < _nclass; k++) { final DTree tree = _trees[k]; final int leaf = _leafs[k]; if (tree == null) continue; // Empty class is ignored // A leaf-biased array of all active Tree leaves. final double gs[] = _gss[k] = new double[tree._len - leaf]; final double rs[] = _rss[k] = new double[tree._len - leaf]; final Chunk nids = chk_nids(chks, k); // Node-ids for this tree/class final Chunk ress = chk_work(chks, k); // Residuals for this tree/class // If we have all constant responses, then we do not split even the // root and the residuals should be zero. if (tree.root() instanceof LeafNode) continue; for (int row = 0; row < nids._len; row++) { // For all rows int nid = (int) nids.at80(row); // Get Node to decide from if (nid < 0) continue; // Missing response if (tree.node(nid) instanceof UndecidedNode) // If we bottomed out the tree nid = tree.node(nid)._pid; // Then take parent's decision DecidedNode dn = tree.decided(nid); // Must have a decision point if (dn._split._col == -1) // Unable to decide? dn = tree.decided(nid = dn._pid); // Then take parent's decision int leafnid = dn.ns(chks, row); // Decide down to a leafnode assert leaf <= leafnid && leafnid < tree._len; assert tree.node(leafnid) instanceof LeafNode; // Note: I can which leaf/region I end up in, but I do not care for // the prediction presented by the tree. For GBM, we compute the // sum-of-residuals (and sum/abs/mult residuals) for all rows in the // leaf, and get our prediction from that. nids.set0(row, leafnid); assert !ress.isNA0(row); double res = ress.at0(row); double ares = Math.abs(res); gs[leafnid - leaf] += _nclass > 1 ? ares * (1 - ares) : 1; rs[leafnid - leaf] += res; } } }
@Override public void map(Chunk chks[]) { Chunk ys = chk_resp(chks); if (_nclass > 1) { // Classification for (int row = 0; row < ys._len; row++) { if (ys.isNA0(row)) continue; int y = (int) ys.at80(row); // zero-based response variable // Actual is '1' for class 'y' and '0' for all other classes for (int k = 0; k < _nclass; k++) { if (_distribution[k] != 0) { Chunk wk = chk_work(chks, k); wk.set0(row, (y == k ? 1f : 0f) - (float) wk.at0(row)); } } } } else { // Regression Chunk wk = chk_work(chks, 0); // Prediction==>Residuals for (int row = 0; row < ys._len; row++) wk.set0(row, (float) (ys.at0(row) - wk.at0(row))); } }
@Override public void map(Chunk ca, Chunk cp) { _cms = new hex.ConfusionMatrix[_thresh.length]; for (int i = 0; i < _cms.length; ++i) _cms[i] = new hex.ConfusionMatrix(2); final int len = Math.min(ca._len, cp._len); for (int i = 0; i < len; i++) { if (ca.isNA0(i)) continue; // throw new UnsupportedOperationException("Actual class label cannot be a missing // value!"); final int a = (int) ca.at80(i); // would be a 0 if double was NaN assert (a == 0 || a == 1) : "Invalid values in vactual: must be binary (0 or 1)."; if (cp.isNA0(i)) { // Log.warn("Skipping predicted NaN."); //some models predict NaN! continue; } final double pr = cp.at0(i); for (int t = 0; t < _cms.length; t++) { final int p = pr >= _thresh[t] ? 1 : 0; _cms[t].add(a, p); } } }
@Override public void map(Chunk ys) { _ys = new long[_nclass]; for (int i = 0; i < ys._len; i++) if (!ys.isNA0(i)) _ys[(int) ys.at80(i)]++; }
@Override public void map(Chunk[] chks) { final Chunk y = importance ? chk_resp(chks) : null; // Response final float[] rpred = importance ? new float[1 + _nclass] : null; // Row prediction final double[] rowdata = importance ? new double[_ncols] : null; // Pre-allocated row data final Chunk oobt = chk_oobt(chks); // Out-of-bag rows counter over all trees // Iterate over all rows for (int row = 0; row < oobt._len; row++) { boolean wasOOBRow = false; // For all tree (i.e., k-classes) for (int k = 0; k < _nclass; k++) { final DTree tree = _trees[k]; if (tree == null) continue; // Empty class is ignored // If we have all constant responses, then we do not split even the // root and the residuals should be zero. if (tree.root() instanceof LeafNode) continue; final Chunk nids = chk_nids(chks, k); // Node-ids for this tree/class final Chunk ct = chk_tree(chks, k); // k-tree working column holding votes for given row int nid = (int) nids.at80(row); // Get Node to decide from // Update only out-of-bag rows // This is out-of-bag row - but we would like to track on-the-fly prediction for the row if (isOOBRow(nid)) { // The row should be OOB for all k-trees !!! assert k == 0 || wasOOBRow : "Something is wrong: k-class trees oob row computing is broken! All k-trees should agree on oob row!"; wasOOBRow = true; nid = oob2Nid(nid); if (tree.node(nid) instanceof UndecidedNode) // If we bottomed out the tree nid = tree.node(nid).pid(); // Then take parent's decision DecidedNode dn = tree.decided(nid); // Must have a decision point if (dn._split.col() == -1) // Unable to decide? dn = tree.decided(tree.node(nid).pid()); // Then take parent's decision int leafnid = dn.ns(chks, row); // Decide down to a leafnode // Setup Tree(i) - on the fly prediction of i-tree for row-th row // - for classification: cumulative number of votes for this row // - for regression: cumulative sum of prediction of each tree - has to be normalized // by number of trees double prediction = ((LeafNode) tree.node(leafnid)).pred(); // Prediction for this k-class and this row if (importance) rpred[1 + k] = (float) prediction; // for both regression and classification ct.set0(row, (float) (ct.at0(row) + prediction)); // For this tree this row is out-of-bag - i.e., a tree voted for this row oobt.set0( row, _nclass > 1 ? 1 : oobt.at0(row) + 1); // for regression track number of trees, for classification boolean // flag is enough } // reset help column for this row and this k-class nids.set0(row, 0); } /* end of k-trees iteration */ if (importance) { if (wasOOBRow && !y.isNA0(row)) { if (classification) { int treePred = ModelUtils.getPrediction(rpred, data_row(chks, row, rowdata)); int actuPred = (int) y.at80(row); if (treePred == actuPred) rightVotes++; // No miss ! } else { // regression float treePred = rpred[1]; float actuPred = (float) y.at0(row); sse += (actuPred - treePred) * (actuPred - treePred); } allRows++; } } } }