예제 #1
0
 @Override
 public void map(Chunk ca, Chunk cp) {
   // classification
   if (_c_len > 1) {
     _cm = new long[_c_len + 1][_c_len + 1];
     int len =
         Math.min(
             ca._len,
             cp._len); // handle different lenghts, but the vectors should have been rejected
     // already
     for (int i = 0; i < len; i++) {
       int a = ca.isNA0(i) ? _c_len : (int) ca.at80(i);
       int p = cp.isNA0(i) ? _c_len : (int) cp.at80(i);
       _cm[a][p]++;
     }
     if (len < ca._len)
       for (int i = len; i < ca._len; i++)
         _cm[ca.isNA0(i) ? _c_len : (int) ca.at80(i)][_c_len]++;
     if (len < cp._len)
       for (int i = len; i < cp._len; i++)
         _cm[_c_len][cp.isNA0(i) ? _c_len : (int) cp.at80(i)]++;
   } else {
     _cm = null;
     _mse = 0;
     assert (ca._len == cp._len);
     int len = ca._len;
     for (int i = 0; i < len; i++) {
       if (ca.isNA0(i) || cp.isNA0(i)) continue; // TODO: Improve
       final double a = ca.at0(i);
       final double p = cp.at0(i);
       _mse += (p - a) * (p - a);
       _count++;
     }
   }
 }
예제 #2
0
파일: DRF.java 프로젝트: rohit2412/h2o
 @Override
 public void map(Chunk chks[]) {
   Chunk cy = chk_resp(chks);
   for (int i = 0; i < cy._len; i++) {
     if (cy.isNA0(i)) continue;
     if (classification) {
       int cls = (int) cy.at80(i);
       chk_work(chks, cls).set0(i, 1L);
     } else {
       float pred = (float) cy.at0(i);
       chk_work(chks, 0).set0(i, pred);
     }
   }
 }
예제 #3
0
파일: GBM.java 프로젝트: shjgiser/h2o
 @Override
 public void map(Chunk[] chks) {
   _gss = new double[_nclass][];
   _rss = new double[_nclass][];
   // For all tree/klasses
   for (int k = 0; k < _nclass; k++) {
     final DTree tree = _trees[k];
     final int leaf = _leafs[k];
     if (tree == null) continue; // Empty class is ignored
     // A leaf-biased array of all active Tree leaves.
     final double gs[] = _gss[k] = new double[tree._len - leaf];
     final double rs[] = _rss[k] = new double[tree._len - leaf];
     final Chunk nids = chk_nids(chks, k); // Node-ids  for this tree/class
     final Chunk ress = chk_work(chks, k); // Residuals for this tree/class
     // If we have all constant responses, then we do not split even the
     // root and the residuals should be zero.
     if (tree.root() instanceof LeafNode) continue;
     for (int row = 0; row < nids._len; row++) { // For all rows
       int nid = (int) nids.at80(row); // Get Node to decide from
       if (nid < 0) continue; // Missing response
       if (tree.node(nid) instanceof UndecidedNode) // If we bottomed out the tree
       nid = tree.node(nid)._pid; // Then take parent's decision
       DecidedNode dn = tree.decided(nid); // Must have a decision point
       if (dn._split._col == -1) // Unable to decide?
       dn = tree.decided(nid = dn._pid); // Then take parent's decision
       int leafnid = dn.ns(chks, row); // Decide down to a leafnode
       assert leaf <= leafnid && leafnid < tree._len;
       assert tree.node(leafnid) instanceof LeafNode;
       // Note: I can which leaf/region I end up in, but I do not care for
       // the prediction presented by the tree.  For GBM, we compute the
       // sum-of-residuals (and sum/abs/mult residuals) for all rows in the
       // leaf, and get our prediction from that.
       nids.set0(row, leafnid);
       assert !ress.isNA0(row);
       double res = ress.at0(row);
       double ares = Math.abs(res);
       gs[leafnid - leaf] += _nclass > 1 ? ares * (1 - ares) : 1;
       rs[leafnid - leaf] += res;
     }
   }
 }
예제 #4
0
파일: GBM.java 프로젝트: shjgiser/h2o
    @Override
    public void map(Chunk chks[]) {
      Chunk ys = chk_resp(chks);
      if (_nclass > 1) { // Classification

        for (int row = 0; row < ys._len; row++) {
          if (ys.isNA0(row)) continue;
          int y = (int) ys.at80(row); // zero-based response variable
          // Actual is '1' for class 'y' and '0' for all other classes
          for (int k = 0; k < _nclass; k++) {
            if (_distribution[k] != 0) {
              Chunk wk = chk_work(chks, k);
              wk.set0(row, (y == k ? 1f : 0f) - (float) wk.at0(row));
            }
          }
        }

      } else { // Regression
        Chunk wk = chk_work(chks, 0); // Prediction==>Residuals
        for (int row = 0; row < ys._len; row++) wk.set0(row, (float) (ys.at0(row) - wk.at0(row)));
      }
    }
예제 #5
0
파일: AUC.java 프로젝트: Jrobinso09/h2o
 @Override
 public void map(Chunk ca, Chunk cp) {
   _cms = new hex.ConfusionMatrix[_thresh.length];
   for (int i = 0; i < _cms.length; ++i) _cms[i] = new hex.ConfusionMatrix(2);
   final int len = Math.min(ca._len, cp._len);
   for (int i = 0; i < len; i++) {
     if (ca.isNA0(i)) continue;
     //          throw new UnsupportedOperationException("Actual class label cannot be a missing
     // value!");
     final int a = (int) ca.at80(i); // would be a 0 if double was NaN
     assert (a == 0 || a == 1) : "Invalid values in vactual: must be binary (0 or 1).";
     if (cp.isNA0(i)) {
       //          Log.warn("Skipping predicted NaN."); //some models predict NaN!
       continue;
     }
     final double pr = cp.at0(i);
     for (int t = 0; t < _cms.length; t++) {
       final int p = pr >= _thresh[t] ? 1 : 0;
       _cms[t].add(a, p);
     }
   }
 }
예제 #6
0
파일: MRUtils.java 프로젝트: Jrobinso09/h2o
 @Override
 public void map(Chunk ys) {
   _ys = new long[_nclass];
   for (int i = 0; i < ys._len; i++) if (!ys.isNA0(i)) _ys[(int) ys.at80(i)]++;
 }
예제 #7
0
파일: DRF.java 프로젝트: rohit2412/h2o
 @Override
 public void map(Chunk[] chks) {
   final Chunk y = importance ? chk_resp(chks) : null; // Response
   final float[] rpred = importance ? new float[1 + _nclass] : null; // Row prediction
   final double[] rowdata = importance ? new double[_ncols] : null; // Pre-allocated row data
   final Chunk oobt = chk_oobt(chks); // Out-of-bag rows counter over all trees
   // Iterate over all rows
   for (int row = 0; row < oobt._len; row++) {
     boolean wasOOBRow = false;
     // For all tree (i.e., k-classes)
     for (int k = 0; k < _nclass; k++) {
       final DTree tree = _trees[k];
       if (tree == null) continue; // Empty class is ignored
       // If we have all constant responses, then we do not split even the
       // root and the residuals should be zero.
       if (tree.root() instanceof LeafNode) continue;
       final Chunk nids = chk_nids(chks, k); // Node-ids  for this tree/class
       final Chunk ct = chk_tree(chks, k); // k-tree working column holding votes for given row
       int nid = (int) nids.at80(row); // Get Node to decide from
       // Update only out-of-bag rows
       // This is out-of-bag row - but we would like to track on-the-fly prediction for the row
       if (isOOBRow(nid)) { // The row should be OOB for all k-trees !!!
         assert k == 0 || wasOOBRow
             : "Something is wrong: k-class trees oob row computing is broken! All k-trees should agree on oob row!";
         wasOOBRow = true;
         nid = oob2Nid(nid);
         if (tree.node(nid) instanceof UndecidedNode) // If we bottomed out the tree
         nid = tree.node(nid).pid(); // Then take parent's decision
         DecidedNode dn = tree.decided(nid); // Must have a decision point
         if (dn._split.col() == -1) // Unable to decide?
         dn = tree.decided(tree.node(nid).pid()); // Then take parent's decision
         int leafnid = dn.ns(chks, row); // Decide down to a leafnode
         // Setup Tree(i) - on the fly prediction of i-tree for row-th row
         //   - for classification: cumulative number of votes for this row
         //   - for regression: cumulative sum of prediction of each tree - has to be normalized
         // by number of trees
         double prediction =
             ((LeafNode) tree.node(leafnid)).pred(); // Prediction for this k-class and this row
         if (importance)
           rpred[1 + k] = (float) prediction; // for both regression and classification
         ct.set0(row, (float) (ct.at0(row) + prediction));
         // For this tree this row is out-of-bag - i.e., a tree voted for this row
         oobt.set0(
             row,
             _nclass > 1
                 ? 1
                 : oobt.at0(row)
                     + 1); // for regression track number of trees, for classification boolean
                           // flag is enough
       }
       // reset help column for this row and this k-class
       nids.set0(row, 0);
     } /* end of k-trees iteration */
     if (importance) {
       if (wasOOBRow && !y.isNA0(row)) {
         if (classification) {
           int treePred = ModelUtils.getPrediction(rpred, data_row(chks, row, rowdata));
           int actuPred = (int) y.at80(row);
           if (treePred == actuPred) rightVotes++; // No miss !
         } else { // regression
           float treePred = rpred[1];
           float actuPred = (float) y.at0(row);
           sse += (actuPred - treePred) * (actuPred - treePred);
         }
         allRows++;
       }
     }
   }
 }