/** * Compute relative variable importance for GBM model. * * <p>See (45), (35) formulas in Friedman: Greedy Function Approximation: A Gradient boosting * machine. Algo used here can be used for computation individual importance of features per * output class. */ @Override protected VarImp doVarImpCalc( GBMModel model, DTree[] ktrees, int tid, Frame validationFrame, boolean scale) { assert model.ntrees() - 1 == tid : "varimp computation expect model with already serialized trees: tid=" + tid; // Iterates over k-tree for (DTree t : ktrees) { // Iterate over trees if (t != null) { for (int n = 0; n < t.len() - t.leaves; n++) if (t.node(n) instanceof DecidedNode) { // it is split node Split split = t.decided(n)._split; _improvPerVar[split._col] += split.improvement(); // least squares improvement } } } // Compute variable importance for all trees in model float[] varimp = new float[model.nfeatures()]; int ntreesTotal = model.ntrees() * model.nclasses(); int maxVar = 0; for (int var = 0; var < _improvPerVar.length; var++) { varimp[var] = _improvPerVar[var] / ntreesTotal; if (varimp[var] > varimp[maxVar]) maxVar = var; } // GBM scale varimp to scale 0..100 if (scale) { float maxVal = varimp[maxVar]; for (int var = 0; var < varimp.length; var++) varimp[var] /= maxVal; } return new VarImp(varimp); }
@Override protected void initAlgo(GBMModel initialModel) { // Initialize gbm-specific data structures if (importance) _improvPerVar = new float[initialModel.nfeatures()]; }