/** * Replace the class values of the instances from the current iteration with residuals ater * predicting with the supplied classifier. * * @param data the instances to predict * @param c the classifier to use * @param useShrinkage whether shrinkage is to be applied to the model's output * @return a new set of instances with class values replaced by residuals * @throws Exception if something goes wrong */ private Instances residualReplace(Instances data, Classifier c, boolean useShrinkage) throws Exception { double pred, residual; Instances newInst = new Instances(data); for (int i = 0; i < newInst.numInstances(); i++) { pred = c.classifyInstance(newInst.instance(i)); if (useShrinkage) { pred *= getShrinkage(); } // +++ LEF +++ // should this be squared here??? residual = newInst.instance(i).classValue() - pred; newInst.instance(i).setClassValue(residual); } // System.err.print(newInst); return newInst; }
/** * Carry out the bias-variance decomposition * * @throws Exception if the decomposition couldn't be carried out */ public void decompose() throws Exception { Reader dataReader = new BufferedReader(new FileReader(m_DataFileName)); Instances data = new Instances(dataReader); if (m_ClassIndex < 0) { data.setClassIndex(data.numAttributes() - 1); } else { data.setClassIndex(m_ClassIndex); } if (data.classAttribute().type() != Attribute.NOMINAL) { throw new Exception("Class attribute must be nominal"); } int numClasses = data.numClasses(); data.deleteWithMissingClass(); if (data.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } if (data.numInstances() < 2 * m_TrainPoolSize) { throw new Exception( "The dataset must contain at least " + (2 * m_TrainPoolSize) + " instances"); } Random random = new Random(m_Seed); data.randomize(random); Instances trainPool = new Instances(data, 0, m_TrainPoolSize); Instances test = new Instances(data, m_TrainPoolSize, data.numInstances() - m_TrainPoolSize); int numTest = test.numInstances(); double[][] instanceProbs = new double[numTest][numClasses]; m_Error = 0; for (int i = 0; i < m_TrainIterations; i++) { if (m_Debug) { System.err.println("Iteration " + (i + 1)); } trainPool.randomize(random); Instances train = new Instances(trainPool, 0, m_TrainPoolSize / 2); Classifier current = AbstractClassifier.makeCopy(m_Classifier); current.buildClassifier(train); //// Evaluate the classifier on test, updating BVD stats for (int j = 0; j < numTest; j++) { int pred = (int) current.classifyInstance(test.instance(j)); if (pred != test.instance(j).classValue()) { m_Error++; } instanceProbs[j][pred]++; } } m_Error /= (m_TrainIterations * numTest); // Average the BV over each instance in test. m_Bias = 0; m_Variance = 0; m_Sigma = 0; for (int i = 0; i < numTest; i++) { Instance current = test.instance(i); double[] predProbs = instanceProbs[i]; double pActual, pPred; double bsum = 0, vsum = 0, ssum = 0; for (int j = 0; j < numClasses; j++) { pActual = (current.classValue() == j) ? 1 : 0; // Or via 1NN from test data? pPred = predProbs[j] / m_TrainIterations; bsum += (pActual - pPred) * (pActual - pPred) - pPred * (1 - pPred) / (m_TrainIterations - 1); vsum += pPred * pPred; ssum += pActual * pActual; } m_Bias += bsum; m_Variance += (1 - vsum); m_Sigma += (1 - ssum); } m_Bias /= (2 * numTest); m_Variance /= (2 * numTest); m_Sigma /= (2 * numTest); if (m_Debug) { System.err.println("Decomposition finished"); } }