示例#1
0
 // helper for debugging
 @SuppressWarnings("unused")
 protected static void printGenerateTrees(DTree[] trees) {
   for (DTree dtree : trees)
     if (dtree != null) {
       try {
         PrintWriter writer = new PrintWriter("/tmp/h2o-3.tree" + ++counter + ".txt", "UTF-8");
         writer.println(dtree.root().toString2(new StringBuilder(), 0));
         writer.close();
       } catch (FileNotFoundException | UnsupportedEncodingException e) {
         e.printStackTrace();
       }
       System.out.println(dtree.root().toString2(new StringBuilder(), 0));
     }
 }
示例#2
0
文件: GBM.java 项目: shjgiser/h2o
 @Override
 public void map(Chunk[] chks) {
   _gss = new double[_nclass][];
   _rss = new double[_nclass][];
   // For all tree/klasses
   for (int k = 0; k < _nclass; k++) {
     final DTree tree = _trees[k];
     final int leaf = _leafs[k];
     if (tree == null) continue; // Empty class is ignored
     // A leaf-biased array of all active Tree leaves.
     final double gs[] = _gss[k] = new double[tree._len - leaf];
     final double rs[] = _rss[k] = new double[tree._len - leaf];
     final Chunk nids = chk_nids(chks, k); // Node-ids  for this tree/class
     final Chunk ress = chk_work(chks, k); // Residuals for this tree/class
     // If we have all constant responses, then we do not split even the
     // root and the residuals should be zero.
     if (tree.root() instanceof LeafNode) continue;
     for (int row = 0; row < nids._len; row++) { // For all rows
       int nid = (int) nids.at80(row); // Get Node to decide from
       if (nid < 0) continue; // Missing response
       if (tree.node(nid) instanceof UndecidedNode) // If we bottomed out the tree
       nid = tree.node(nid)._pid; // Then take parent's decision
       DecidedNode dn = tree.decided(nid); // Must have a decision point
       if (dn._split._col == -1) // Unable to decide?
       dn = tree.decided(nid = dn._pid); // Then take parent's decision
       int leafnid = dn.ns(chks, row); // Decide down to a leafnode
       assert leaf <= leafnid && leafnid < tree._len;
       assert tree.node(leafnid) instanceof LeafNode;
       // Note: I can which leaf/region I end up in, but I do not care for
       // the prediction presented by the tree.  For GBM, we compute the
       // sum-of-residuals (and sum/abs/mult residuals) for all rows in the
       // leaf, and get our prediction from that.
       nids.set0(row, leafnid);
       assert !ress.isNA0(row);
       double res = ress.at0(row);
       double ares = Math.abs(res);
       gs[leafnid - leaf] += _nclass > 1 ? ares * (1 - ares) : 1;
       rs[leafnid - leaf] += res;
     }
   }
 }
示例#3
0
      @Override
      public void map(Chunk[] chks) {
        final Chunk y = importance ? chk_resp(chks) : null; // Response
        final double[] rpred = importance ? new double[1 + _nclass] : null; // Row prediction
        final double[] rowdata = importance ? new double[_ncols] : null; // Pre-allocated row data
        final Chunk oobt = chk_oobt(chks); // Out-of-bag rows counter over all trees
        // Iterate over all rows
        for (int row = 0; row < oobt._len; row++) {
          final boolean wasOOBRow = ScoreBuildHistogram.isOOBRow((int) chk_nids(chks, 0).at8(row));

          // For all tree (i.e., k-classes)
          for (int k = 0; k < _nclass; k++) {
            final DTree tree = _trees[k];
            if (tree == null) continue; // Empty class is ignored
            final Chunk nids = chk_nids(chks, k); // Node-ids  for this tree/class
            int nid = (int) nids.at8(row); // Get Node to decide from
            // Update only out-of-bag rows
            // This is out-of-bag row - but we would like to track on-the-fly prediction for the row
            if (wasOOBRow) {
              final Chunk ct =
                  chk_tree(chks, k); // k-tree working column holding votes for given row
              nid = ScoreBuildHistogram.oob2Nid(nid);
              if (tree.node(nid) instanceof UndecidedNode) // If we bottomed out the tree
              nid = tree.node(nid).pid(); // Then take parent's decision
              int leafnid;
              if (tree.root() instanceof LeafNode) {
                leafnid = 0;
              } else {
                DecidedNode dn = tree.decided(nid); // Must have a decision point
                if (dn._split.col() == -1) // Unable to decide?
                dn = tree.decided(tree.node(nid).pid()); // Then take parent's decision
                leafnid = dn.ns(chks, row); // Decide down to a leafnode
              }
              // Setup Tree(i) - on the fly prediction of i-tree for row-th row
              //   - for classification: cumulative number of votes for this row
              //   - for regression: cumulative sum of prediction of each tree - has to be
              // normalized by number of trees
              double prediction =
                  ((LeafNode) tree.node(leafnid))
                      .pred(); // Prediction for this k-class and this row
              if (importance)
                rpred[1 + k] = (float) prediction; // for both regression and classification
              ct.set(row, (float) (ct.atd(row) + prediction));
            }
            // reset help column for this row and this k-class
            nids.set(row, 0);
          } /* end of k-trees iteration */
          // For this tree this row is out-of-bag - i.e., a tree voted for this row
          if (wasOOBRow) oobt.set(row, oobt.atd(row) + 1); // track number of trees
          if (importance) {
            if (wasOOBRow && !y.isNA(row)) {
              if (isClassifier()) {
                int treePred =
                    getPrediction(
                        rpred,
                        _model._output._priorClassDist,
                        data_row(chks, row, rowdata),
                        _threshold);
                int actuPred = (int) y.at8(row);
                if (treePred == actuPred) rightVotes++; // No miss !
              } else { // regression
                double treePred = rpred[1];
                double actuPred = y.atd(row);
                sse += (actuPred - treePred) * (actuPred - treePred);
              }
              allRows++;
            }
          }
        }
      }