/** * Computes class distribution of an instance using the FastRandomTree. * * <p>In Weka's RandomTree, the distributions were normalized so that all probabilities sum to 1; * this would abolish the effect of instance weights on voting. In FastRandomForest 0.97 onwards, * the distributions are normalized by dividing with the number of instances going into a leaf. * * <p> * * @param instance the instance to compute the distribution for * @return the computed class distribution * @throws Exception if computation fails */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] returnedDist = null; if (m_Attribute > -1) { // ============================ node is not a leaf if (instance.isMissing(m_Attribute)) { // ---------------- missing value returnedDist = new double[m_MotherForest.getM_Info().numClasses()]; // split instance up for (int i = 0; i < m_Successors.length; i++) { double[] help = m_Successors[i].distributionForInstance(instance); if (help != null) { for (int j = 0; j < help.length; j++) { returnedDist[j] += m_Prop[i] * help[j]; } } } } else if (m_MotherForest.getM_Info().attribute(m_Attribute).isNominal()) { // ------ nominal // returnedDist = m_Successors[(int) instance.value(m_Attribute)] // .distributionForInstance(instance); // 0.99: new - binary splits (also) for nominal attributes if (instance.value(m_Attribute) == m_SplitPoint) { returnedDist = m_Successors[0].distributionForInstance(instance); } else { returnedDist = m_Successors[1].distributionForInstance(instance); } } else { // ------------------------------------------ numeric attributes if (instance.value(m_Attribute) < m_SplitPoint) { returnedDist = m_Successors[0].distributionForInstance(instance); } else { returnedDist = m_Successors[1].distributionForInstance(instance); } } return returnedDist; } else { // =============================================== node is a leaf return m_ClassProbs; } }
/** * Computes class distribution of an instance using the FastRandomTree. * * <p>Works correctly only if the DataCache has the same attributes as the one used to train the * FastRandomTree - but this function does not check for that! * * <p>Main use of this is to compute out-of-bag error (also when finding feature importances). * * @param instance the instance to compute the distribution for * @return the computed class distribution * @throws Exception if computation fails */ public double[] distributionForInstanceInDataCache(DataCache data, int instIdx) { double[] returnedDist = null; if (m_Attribute > -1) { // ============================ node is not a leaf if (data.isValueMissing(m_Attribute, instIdx)) { // ---------------- missing value returnedDist = new double[m_MotherForest.getM_Info().numClasses()]; // split instance up for (int i = 0; i < m_Successors.length; i++) { double[] help = m_Successors[i].distributionForInstanceInDataCache(data, instIdx); if (help != null) { for (int j = 0; j < help.length; j++) { returnedDist[j] += m_Prop[i] * help[j]; } } } } else if (data.isAttrNominal(m_Attribute)) { // ------ nominal // returnedDist = m_Successors[(int) instance.value(m_Attribute)] // .distributionForInstance(instance); // 0.99: new - binary splits (also) for nominal attributes if (data.vals[m_Attribute][instIdx] == m_SplitPoint) { returnedDist = m_Successors[0].distributionForInstanceInDataCache(data, instIdx); } else { returnedDist = m_Successors[1].distributionForInstanceInDataCache(data, instIdx); } } else { // ------------------------------------------ numeric attributes if (data.vals[m_Attribute][instIdx] < m_SplitPoint) { returnedDist = m_Successors[0].distributionForInstanceInDataCache(data, instIdx); } else { returnedDist = m_Successors[1].distributionForInstanceInDataCache(data, instIdx); } } return returnedDist; } else { // =============================================== node is a leaf return m_ClassProbs; } }