Пример #1
0
  /**
   * Compute relative variable importance for GBM model.
   *
   * <p>See (45), (35) formulas in Friedman: Greedy Function Approximation: A Gradient boosting
   * machine. Algo used here can be used for computation individual importance of features per
   * output class.
   */
  @Override
  protected VarImp doVarImpCalc(
      GBMModel model, DTree[] ktrees, int tid, Frame validationFrame, boolean scale) {
    assert model.ntrees() - 1 == tid
        : "varimp computation expect model with already serialized trees: tid=" + tid;
    // Iterates over k-tree
    for (DTree t : ktrees) { // Iterate over trees
      if (t != null) {
        for (int n = 0; n < t.len() - t.leaves; n++)
          if (t.node(n) instanceof DecidedNode) { // it is split node
            Split split = t.decided(n)._split;
            _improvPerVar[split._col] += split.improvement(); // least squares improvement
          }
      }
    }
    // Compute variable importance for all trees in model
    float[] varimp = new float[model.nfeatures()];

    int ntreesTotal = model.ntrees() * model.nclasses();
    int maxVar = 0;
    for (int var = 0; var < _improvPerVar.length; var++) {
      varimp[var] = _improvPerVar[var] / ntreesTotal;
      if (varimp[var] > varimp[maxVar]) maxVar = var;
    }
    // GBM scale varimp to scale 0..100
    if (scale) {
      float maxVal = varimp[maxVar];
      for (int var = 0; var < varimp.length; var++) varimp[var] /= maxVal;
    }

    return new VarImp(varimp);
  }
Пример #2
0
 @Override
 protected void initAlgo(GBMModel initialModel) {
   // Initialize gbm-specific data structures
   if (importance) _improvPerVar = new float[initialModel.nfeatures()];
 }