Exemple #1
0
  public final Vec[] vecs() {
    if (_vecs != null) return _vecs;
    // Load all Vec headers; load them all in parallel by spawning F/J tasks.
    final Vec[] vecs = new Vec[_keys.length];
    Futures fs = new Futures();
    for (int i = 0; i < _keys.length; i++) {
      final int ii = i;
      final Key k = _keys[i];
      H2OCountedCompleter t =
          new H2OCountedCompleter() {
            // We need higher priority here as there is a danger of deadlock in
            // case of many calls from MRTask2 at once (e.g. frame with many
            // vectors invokes rollup tasks for all vectors in parallel).  Should
            // probably be done in CPS style in the future
            @Override
            public byte priority() {
              return H2O.MIN_HI_PRIORITY;
            }

            @Override
            public void compute2() {
              vecs[ii] = DKV.get(k).get();
              tryComplete();
            }
          };
      H2O.submitTask(t);
      fs.add(t);
    }
    fs.blockForPending();
    return _vecs = vecs;
  }
 static void invalidate(H2ONode h2o, Key key, Value newval, Futures fs) {
   assert newval._key != null && key.home();
   // Prevent the new Value from being overwritten by Yet Another PUT by
   // read-locking it.  It's safe to read, but not to over-write, until this
   // invalidate completes on the *prior* value.
   newval.read_lock(); // block further writes until all invalidates complete
   fs.add(RPC.call(h2o, new TaskInvalidateKey(key, newval)));
 }
Exemple #3
0
 /**
  * On-the-fly version for varimp. After generation a new tree, its tree votes are collected on
  * shuffled OOB rows and variable importance is recomputed.
  *
  * <p>The <a
  * href="http://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm#varimp">page</a> says:
  * <cite> "In every tree grown in the forest, put down the oob cases and count the number of votes
  * cast for the correct class. Now randomly permute the values of variable m in the oob cases and
  * put these cases down the tree. Subtract the number of votes for the correct class in the
  * variable-m-permuted oob data from the number of votes for the correct class in the untouched
  * oob data. The average of this number over all trees in the forest is the raw importance score
  * for variable m." </cite>
  */
 @Override
 protected VarImp doVarImpCalc(
     final DRFModel model, DTree[] ktrees, final int tid, final Frame fTrain, boolean scale) {
   // Check if we have already serialized 'ktrees'-trees in the model
   assert model.ntrees() - 1 == tid
       : "Cannot compute DRF varimp since 'ktrees' are not serialized in the model! tid=" + tid;
   assert _treeMeasuresOnOOB.npredictors() - 1 == tid
       : "Tree votes over OOB rows for this tree (var ktrees) were not found!";
   // Compute tree votes over shuffled data
   final CompressedTree[ /*nclass*/] theTree =
       model.ctree(tid); // get the last tree FIXME we should pass only keys
   final int nclasses = model.nclasses();
   Futures fs = new Futures();
   for (int var = 0; var < _ncols; var++) {
     final int variable = var;
     H2OCountedCompleter task4var =
         classification
             ? new H2OCountedCompleter() {
               @Override
               public void compute2() {
                 // Compute this tree votes over all data over given variable
                 TreeVotes cd =
                     TreeMeasuresCollector.collectVotes(
                         theTree, nclasses, fTrain, _ncols, sample_rate, variable);
                 assert cd.npredictors() == 1;
                 asVotes(_treeMeasuresOnSOOB[variable]).append(cd);
                 tryComplete();
               }
             }
             : /* regression */ new H2OCountedCompleter() {
               @Override
               public void compute2() {
                 // Compute this tree votes over all data over given variable
                 TreeSSE cd =
                     TreeMeasuresCollector.collectSSE(
                         theTree, nclasses, fTrain, _ncols, sample_rate, variable);
                 assert cd.npredictors() == 1;
                 asSSE(_treeMeasuresOnSOOB[variable]).append(cd);
                 tryComplete();
               }
             };
     H2O.submitTask(task4var); // Fork computation
     fs.add(task4var);
   }
   fs.blockForPending(); // Wait for results
   // Compute varimp for individual features (_ncols)
   final float[] varimp = new float[_ncols]; // output variable importance
   final float[] varimpSD = new float[_ncols]; // output variable importance sd
   for (int var = 0; var < _ncols; var++) {
     double[ /*2*/] imp =
         classification
             ? asVotes(_treeMeasuresOnSOOB[var]).imp(asVotes(_treeMeasuresOnOOB))
             : asSSE(_treeMeasuresOnSOOB[var]).imp(asSSE(_treeMeasuresOnOOB));
     varimp[var] = (float) imp[0];
     varimpSD[var] = (float) imp[1];
   }
   return new VarImp.VarImpMDA(varimp, varimpSD, model.ntrees());
 }
Exemple #4
0
 static void put(H2ONode h2o, Key key, Value val, Futures fs, boolean dontCache) {
   fs.add(RPC.call(h2o, new TaskPutKey(key, val, dontCache)));
 }