Java Job примеры использования

Язык программирования: Java

Пространство имен/Пакет: hex.gbm.DTree

Класс/Тип: Job

Примеров на hotexamples.com: 2

Java Job - 2 примера найдено. Это лучшие примеры Java кода для hex.gbm.DTree.Job, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

isRunning(2)

Пример #1

Показать файл

Файл: GBM.java Проект: shjgiser/h2o

  // Start by splitting all the data according to some criteria (minimize
  // variance at the leaves).  Record on each row which split it goes to, and
  // assign a split number to it (for next pass).  On *this* pass, use the
  // split-number to build a per-split histogram, with a per-histogram-bucket
  // variance.
  @Override
  protected GBMModel buildModel(
      GBMModel model, final Frame fr, String names[], String domains[][], Timer t_build) {
    // Tag out rows missing the response column
    new ExcludeNAResponse().doAll(fr);

    // Build trees until we hit the limit
    int tid;
    DTree[] ktrees = null; // Trees
    TreeStats tstats = new TreeStats(); // Tree stats
    for (tid = 0; tid < ntrees; tid++) {
      // During first iteration model contains 0 trees, then 0-trees, then 1-tree,...
      // BUT if validation is not specified model does not participate in voting
      // but on-the-fly computed data are used
      model = doScoring(model, fr, ktrees, tid, tstats, false, false, false);
      // ESL2, page 387
      // Step 2a: Compute prediction (prob distribution) from prior tree results:
      //   Work <== f(Tree)
      new ComputeProb().doAll(fr);

      // ESL2, page 387
      // Step 2b i: Compute residuals from the prediction (probability distribution)
      //   Work <== f(Work)
      new ComputeRes().doAll(fr);

      // ESL2, page 387, Step 2b ii, iii, iv
      Timer kb_timer = new Timer();
      ktrees = buildNextKTrees(fr);
      Log.info(Sys.GBM__, (tid + 1) + ". tree was built in " + kb_timer.toString());
      if (!Job.isRunning(self())) break; // If canceled during building, do not bulkscore

      // Check latest predictions
      tstats.updateBy(ktrees);
    }
    // Final scoring
    model = doScoring(model, fr, ktrees, tid, tstats, true, false, false);

    return model;
  }

Пример #2

Показать файл

Файл: GBM.java Проект: shjgiser/h2o

  // --------------------------------------------------------------------------
  // Build the next k-trees, which is trying to correct the residual error from
  // the prior trees.  From LSE2, page 387.  Step 2b ii, iii.
  private DTree[] buildNextKTrees(Frame fr) {
    // We're going to build K (nclass) trees - each focused on correcting
    // errors for a single class.
    final DTree[] ktrees = new DTree[_nclass];

    // Initial set of histograms.  All trees; one leaf per tree (the root
    // leaf); all columns
    DHistogram hcs[][][] = new DHistogram[_nclass][1 /*just root leaf*/][_ncols];

    for (int k = 0; k < _nclass; k++) {
      // Initially setup as-if an empty-split had just happened
      if (_distribution == null || _distribution[k] != 0) {
        // The Boolean Optimization
        // This optimization assumes the 2nd tree of a 2-class system is the
        // inverse of the first.  This is false for DRF (and true for GBM) -
        // DRF picks a random different set of columns for the 2nd tree.
        if (k == 1 && _nclass == 2) continue;
        ktrees[k] = new DTree(fr._names, _ncols, (char) nbins, (char) _nclass, min_rows);
        new GBMUndecidedNode(
            ktrees[k],
            -1,
            DHistogram.initialHist(fr, _ncols, nbins, hcs[k][0], false)); // The "root" node
      }
    }
    int[] leafs = new int[_nclass]; // Define a "working set" of leaf splits, from here to tree._len

    // ----
    // ESL2, page 387.  Step 2b ii.
    // One Big Loop till the ktrees are of proper depth.
    // Adds a layer to the trees each pass.
    int depth = 0;
    for (; depth < max_depth; depth++) {
      if (!Job.isRunning(self())) return null;

      hcs = buildLayer(fr, ktrees, leafs, hcs, false, false);

      // If we did not make any new splits, then the tree is split-to-death
      if (hcs == null) break;
    }

    // Each tree bottomed-out in a DecidedNode; go 1 more level and insert
    // LeafNodes to hold predictions.
    for (int k = 0; k < _nclass; k++) {
      DTree tree = ktrees[k];
      if (tree == null) continue;
      int leaf = leafs[k] = tree.len();
      for (int nid = 0; nid < leaf; nid++) {
        if (tree.node(nid) instanceof DecidedNode) {
          DecidedNode dn = tree.decided(nid);
          for (int i = 0; i < dn._nids.length; i++) {
            int cnid = dn._nids[i];
            if (cnid == -1
                || // Bottomed out (predictors or responses known constant)
                tree.node(cnid) instanceof UndecidedNode
                || // Or chopped off for depth
                (tree.node(cnid) instanceof DecidedNode
                    && // Or not possible to split
                    ((DecidedNode) tree.node(cnid))._split.col() == -1))
              dn._nids[i] = new GBMLeafNode(tree, nid).nid(); // Mark a leaf here
          }
          // Handle the trivial non-splitting tree
          if (nid == 0 && dn._split.col() == -1) new GBMLeafNode(tree, -1, 0);
        }
      }
    } // -- k-trees are done

    // ----
    // ESL2, page 387.  Step 2b iii.  Compute the gammas, and store them back
    // into the tree leaves.  Includes learn_rate.
    //    gamma_i_k = (nclass-1)/nclass * (sum res_i / sum (|res_i|*(1-|res_i|)))
    // For regression:
    //    gamma_i_k = sum res_i / count(res_i)
    GammaPass gp = new GammaPass(ktrees, leafs).doAll(fr);
    double m1class = _nclass > 1 ? (double) (_nclass - 1) / _nclass : 1.0; // K-1/K
    for (int k = 0; k < _nclass; k++) {
      final DTree tree = ktrees[k];
      if (tree == null) continue;
      for (int i = 0; i < tree._len - leafs[k]; i++) {
        double g =
            gp._gss[k][i] == 0 // Constant response?
                ? (gp._rss[k][i] == 0
                    ? 0
                    : 1000) // Cap (exponential) learn, instead of dealing with Inf
                : learn_rate * m1class * gp._rss[k][i] / gp._gss[k][i];
        assert !Double.isNaN(g);
        ((LeafNode) tree.node(leafs[k] + i))._pred = g;
      }
    }

    // ----
    // ESL2, page 387.  Step 2b iv.  Cache the sum of all the trees, plus the
    // new tree, in the 'tree' columns.  Also, zap the NIDs for next pass.
    // Tree <== f(Tree)
    // Nids <== 0
    new MRTask2() {
      @Override
      public void map(Chunk chks[]) {
        // For all tree/klasses
        for (int k = 0; k < _nclass; k++) {
          final DTree tree = ktrees[k];
          if (tree == null) continue;
          final Chunk nids = chk_nids(chks, k);
          final Chunk ct = chk_tree(chks, k);
          for (int row = 0; row < nids._len; row++) {
            int nid = (int) nids.at80(row);
            if (nid < 0) continue;
            ct.set0(row, (float) (ct.at0(row) + ((LeafNode) tree.node(nid))._pred));
            nids.set0(row, 0);
          }
        }
      }
    }.doAll(fr);

    // Collect leaves stats
    for (int i = 0; i < ktrees.length; i++)
      if (ktrees[i] != null) ktrees[i].leaves = ktrees[i].len() - leafs[i];
    // DEBUG: Print the generated K trees
    // printGenerateTrees(ktrees);

    return ktrees;
  }