Exemple #1
0
 @Override
 public void map(Chunk strata, Chunk newW) {
   for (int i = 0; i < strata._len; ++i) {
     //          Log.info("NID:" + ((int) strata.at8(i)));
     if ((int) strata.at8(i) != stratumToKeep) newW.set(i, 0);
   }
 }
Exemple #2
0
 @Override
 public void map(Chunk chks[]) {
   Chunk cy = chk_resp(chks);
   for (int i = 0; i < cy._len; i++) {
     if (cy.isNA(i)) continue;
     if (isClassifier()) {
       int cls = (int) cy.at8(i);
       chk_work(chks, cls).set(i, 1L);
     } else {
       float pred = (float) cy.atd(i);
       chk_work(chks, 0).set(i, pred);
     }
   }
 }
Exemple #3
0
 @Override
 public void map(Chunk cs) {
   int idx = _chunkOffset + cs.cidx();
   Key ckey = Vec.chunkKey(_v._key, idx);
   if (_cmap != null) {
     assert !cs.hasFloat()
         : "Input chunk (" + cs.getClass() + ") has float, but is expected to be categorical";
     NewChunk nc = new NewChunk(_v, idx);
     // loop over rows and update ints for new domain mapping according to vecs[c].domain()
     for (int r = 0; r < cs._len; ++r) {
       if (cs.isNA(r)) nc.addNA();
       else nc.addNum(_cmap[(int) cs.at8(r)], 0);
     }
     nc.close(_fs);
   } else {
     DKV.put(ckey, cs.deepCopy(), _fs, true);
   }
 }
Exemple #4
0
      @Override
      public void map(Chunk[] chks) {
        final Chunk y = importance ? chk_resp(chks) : null; // Response
        final double[] rpred = importance ? new double[1 + _nclass] : null; // Row prediction
        final double[] rowdata = importance ? new double[_ncols] : null; // Pre-allocated row data
        final Chunk oobt = chk_oobt(chks); // Out-of-bag rows counter over all trees
        // Iterate over all rows
        for (int row = 0; row < oobt._len; row++) {
          final boolean wasOOBRow = ScoreBuildHistogram.isOOBRow((int) chk_nids(chks, 0).at8(row));

          // For all tree (i.e., k-classes)
          for (int k = 0; k < _nclass; k++) {
            final DTree tree = _trees[k];
            if (tree == null) continue; // Empty class is ignored
            final Chunk nids = chk_nids(chks, k); // Node-ids  for this tree/class
            int nid = (int) nids.at8(row); // Get Node to decide from
            // Update only out-of-bag rows
            // This is out-of-bag row - but we would like to track on-the-fly prediction for the row
            if (wasOOBRow) {
              final Chunk ct =
                  chk_tree(chks, k); // k-tree working column holding votes for given row
              nid = ScoreBuildHistogram.oob2Nid(nid);
              if (tree.node(nid) instanceof UndecidedNode) // If we bottomed out the tree
              nid = tree.node(nid).pid(); // Then take parent's decision
              int leafnid;
              if (tree.root() instanceof LeafNode) {
                leafnid = 0;
              } else {
                DecidedNode dn = tree.decided(nid); // Must have a decision point
                if (dn._split.col() == -1) // Unable to decide?
                dn = tree.decided(tree.node(nid).pid()); // Then take parent's decision
                leafnid = dn.ns(chks, row); // Decide down to a leafnode
              }
              // Setup Tree(i) - on the fly prediction of i-tree for row-th row
              //   - for classification: cumulative number of votes for this row
              //   - for regression: cumulative sum of prediction of each tree - has to be
              // normalized by number of trees
              double prediction =
                  ((LeafNode) tree.node(leafnid))
                      .pred(); // Prediction for this k-class and this row
              if (importance)
                rpred[1 + k] = (float) prediction; // for both regression and classification
              ct.set(row, (float) (ct.atd(row) + prediction));
            }
            // reset help column for this row and this k-class
            nids.set(row, 0);
          } /* end of k-trees iteration */
          // For this tree this row is out-of-bag - i.e., a tree voted for this row
          if (wasOOBRow) oobt.set(row, oobt.atd(row) + 1); // track number of trees
          if (importance) {
            if (wasOOBRow && !y.isNA(row)) {
              if (isClassifier()) {
                int treePred = getPrediction(rpred, data_row(chks, row, rowdata), _threshold);
                int actuPred = (int) y.at8(row);
                if (treePred == actuPred) rightVotes++; // No miss !
              } else { // regression
                double treePred = rpred[1];
                double actuPred = y.atd(row);
                sse += (actuPred - treePred) * (actuPred - treePred);
              }
              allRows++;
            }
          }
        }
      }