Ejemplos de DTree.decided en Java

Lenguaje de programación: Java

Namespace/Package Name: water.util

Clase / Tipo: DTree

Método / Función: decided

Ejemplos en hotexamples.com: 3

Java DTree.decided - 3 ejemplos encontrados. Estos son los ejemplos en Java del mundo real mejor valorados de water.util.DTree.decided extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

decided(3)

len(3)

node(3)

root(2)

undecided(1)

Métodos usados con frecuencia

decided (3)

len (3)

node (3)

root (2)

undecided (1)

Ejemplo n.º 1

Mostrar archivo

Archivo: GBM.java Proyecto: shjgiser/h2o

  /**
   * Compute relative variable importance for GBM model.
   *
   * <p>See (45), (35) formulas in Friedman: Greedy Function Approximation: A Gradient boosting
   * machine. Algo used here can be used for computation individual importance of features per
   * output class.
   */
  @Override
  protected VarImp doVarImpCalc(
      GBMModel model, DTree[] ktrees, int tid, Frame validationFrame, boolean scale) {
    assert model.ntrees() - 1 == tid
        : "varimp computation expect model with already serialized trees: tid=" + tid;
    // Iterates over k-tree
    for (DTree t : ktrees) { // Iterate over trees
      if (t != null) {
        for (int n = 0; n < t.len() - t.leaves; n++)
          if (t.node(n) instanceof DecidedNode) { // it is split node
            Split split = t.decided(n)._split;
            _improvPerVar[split._col] += split.improvement(); // least squares improvement
          }
      }
    }
    // Compute variable importance for all trees in model
    float[] varimp = new float[model.nfeatures()];

    int ntreesTotal = model.ntrees() * model.nclasses();
    int maxVar = 0;
    for (int var = 0; var < _improvPerVar.length; var++) {
      varimp[var] = _improvPerVar[var] / ntreesTotal;
      if (varimp[var] > varimp[maxVar]) maxVar = var;
    }
    // GBM scale varimp to scale 0..100
    if (scale) {
      float maxVal = varimp[maxVar];
      for (int var = 0; var < varimp.length; var++) varimp[var] /= maxVal;
    }

    return new VarImp(varimp);
  }

Ejemplo n.º 2

Mostrar archivo

Archivo: GBM.java Proyecto: shjgiser/h2o

 @Override
 public void map(Chunk[] chks) {
   _gss = new double[_nclass][];
   _rss = new double[_nclass][];
   // For all tree/klasses
   for (int k = 0; k < _nclass; k++) {
     final DTree tree = _trees[k];
     final int leaf = _leafs[k];
     if (tree == null) continue; // Empty class is ignored
     // A leaf-biased array of all active Tree leaves.
     final double gs[] = _gss[k] = new double[tree._len - leaf];
     final double rs[] = _rss[k] = new double[tree._len - leaf];
     final Chunk nids = chk_nids(chks, k); // Node-ids  for this tree/class
     final Chunk ress = chk_work(chks, k); // Residuals for this tree/class
     // If we have all constant responses, then we do not split even the
     // root and the residuals should be zero.
     if (tree.root() instanceof LeafNode) continue;
     for (int row = 0; row < nids._len; row++) { // For all rows
       int nid = (int) nids.at80(row); // Get Node to decide from
       if (nid < 0) continue; // Missing response
       if (tree.node(nid) instanceof UndecidedNode) // If we bottomed out the tree
       nid = tree.node(nid)._pid; // Then take parent's decision
       DecidedNode dn = tree.decided(nid); // Must have a decision point
       if (dn._split._col == -1) // Unable to decide?
       dn = tree.decided(nid = dn._pid); // Then take parent's decision
       int leafnid = dn.ns(chks, row); // Decide down to a leafnode
       assert leaf <= leafnid && leafnid < tree._len;
       assert tree.node(leafnid) instanceof LeafNode;
       // Note: I can which leaf/region I end up in, but I do not care for
       // the prediction presented by the tree.  For GBM, we compute the
       // sum-of-residuals (and sum/abs/mult residuals) for all rows in the
       // leaf, and get our prediction from that.
       nids.set0(row, leafnid);
       assert !ress.isNA0(row);
       double res = ress.at0(row);
       double ares = Math.abs(res);
       gs[leafnid - leaf] += _nclass > 1 ? ares * (1 - ares) : 1;
       rs[leafnid - leaf] += res;
     }
   }
 }

Ejemplo n.º 3

Mostrar archivo

Archivo: GBM.java Proyecto: shjgiser/h2o

  // --------------------------------------------------------------------------
  // Build the next k-trees, which is trying to correct the residual error from
  // the prior trees.  From LSE2, page 387.  Step 2b ii, iii.
  private DTree[] buildNextKTrees(Frame fr) {
    // We're going to build K (nclass) trees - each focused on correcting
    // errors for a single class.
    final DTree[] ktrees = new DTree[_nclass];

    // Initial set of histograms.  All trees; one leaf per tree (the root
    // leaf); all columns
    DHistogram hcs[][][] = new DHistogram[_nclass][1 /*just root leaf*/][_ncols];

    for (int k = 0; k < _nclass; k++) {
      // Initially setup as-if an empty-split had just happened
      if (_distribution == null || _distribution[k] != 0) {
        // The Boolean Optimization
        // This optimization assumes the 2nd tree of a 2-class system is the
        // inverse of the first.  This is false for DRF (and true for GBM) -
        // DRF picks a random different set of columns for the 2nd tree.
        if (k == 1 && _nclass == 2) continue;
        ktrees[k] = new DTree(fr._names, _ncols, (char) nbins, (char) _nclass, min_rows);
        new GBMUndecidedNode(
            ktrees[k],
            -1,
            DHistogram.initialHist(fr, _ncols, nbins, hcs[k][0], false)); // The "root" node
      }
    }
    int[] leafs = new int[_nclass]; // Define a "working set" of leaf splits, from here to tree._len

    // ----
    // ESL2, page 387.  Step 2b ii.
    // One Big Loop till the ktrees are of proper depth.
    // Adds a layer to the trees each pass.
    int depth = 0;
    for (; depth < max_depth; depth++) {
      if (!Job.isRunning(self())) return null;

      hcs = buildLayer(fr, ktrees, leafs, hcs, false, false);

      // If we did not make any new splits, then the tree is split-to-death
      if (hcs == null) break;
    }

    // Each tree bottomed-out in a DecidedNode; go 1 more level and insert
    // LeafNodes to hold predictions.
    for (int k = 0; k < _nclass; k++) {
      DTree tree = ktrees[k];
      if (tree == null) continue;
      int leaf = leafs[k] = tree.len();
      for (int nid = 0; nid < leaf; nid++) {
        if (tree.node(nid) instanceof DecidedNode) {
          DecidedNode dn = tree.decided(nid);
          for (int i = 0; i < dn._nids.length; i++) {
            int cnid = dn._nids[i];
            if (cnid == -1
                || // Bottomed out (predictors or responses known constant)
                tree.node(cnid) instanceof UndecidedNode
                || // Or chopped off for depth
                (tree.node(cnid) instanceof DecidedNode
                    && // Or not possible to split
                    ((DecidedNode) tree.node(cnid))._split.col() == -1))
              dn._nids[i] = new GBMLeafNode(tree, nid).nid(); // Mark a leaf here
          }
          // Handle the trivial non-splitting tree
          if (nid == 0 && dn._split.col() == -1) new GBMLeafNode(tree, -1, 0);
        }
      }
    } // -- k-trees are done

    // ----
    // ESL2, page 387.  Step 2b iii.  Compute the gammas, and store them back
    // into the tree leaves.  Includes learn_rate.
    //    gamma_i_k = (nclass-1)/nclass * (sum res_i / sum (|res_i|*(1-|res_i|)))
    // For regression:
    //    gamma_i_k = sum res_i / count(res_i)
    GammaPass gp = new GammaPass(ktrees, leafs).doAll(fr);
    double m1class = _nclass > 1 ? (double) (_nclass - 1) / _nclass : 1.0; // K-1/K
    for (int k = 0; k < _nclass; k++) {
      final DTree tree = ktrees[k];
      if (tree == null) continue;
      for (int i = 0; i < tree._len - leafs[k]; i++) {
        double g =
            gp._gss[k][i] == 0 // Constant response?
                ? (gp._rss[k][i] == 0
                    ? 0
                    : 1000) // Cap (exponential) learn, instead of dealing with Inf
                : learn_rate * m1class * gp._rss[k][i] / gp._gss[k][i];
        assert !Double.isNaN(g);
        ((LeafNode) tree.node(leafs[k] + i))._pred = g;
      }
    }

    // ----
    // ESL2, page 387.  Step 2b iv.  Cache the sum of all the trees, plus the
    // new tree, in the 'tree' columns.  Also, zap the NIDs for next pass.
    // Tree <== f(Tree)
    // Nids <== 0
    new MRTask2() {
      @Override
      public void map(Chunk chks[]) {
        // For all tree/klasses
        for (int k = 0; k < _nclass; k++) {
          final DTree tree = ktrees[k];
          if (tree == null) continue;
          final Chunk nids = chk_nids(chks, k);
          final Chunk ct = chk_tree(chks, k);
          for (int row = 0; row < nids._len; row++) {
            int nid = (int) nids.at80(row);
            if (nid < 0) continue;
            ct.set0(row, (float) (ct.at0(row) + ((LeafNode) tree.node(nid))._pred));
            nids.set0(row, 0);
          }
        }
      }
    }.doAll(fr);

    // Collect leaves stats
    for (int i = 0; i < ktrees.length; i++)
      if (ktrees[i] != null) ktrees[i].leaves = ktrees[i].len() - leafs[i];
    // DEBUG: Print the generated K trees
    // printGenerateTrees(ktrees);

    return ktrees;
  }