@Override protected pikater.ontology.messages.Evaluation evaluateCA() { Evaluation eval = test(); pikater.ontology.messages.Evaluation result = new pikater.ontology.messages.Evaluation(); result.setError_rate((float) eval.errorRate()); try { result.setKappa_statistic((float) eval.kappa()); } catch (Exception e) { result.setKappa_statistic(-1); } result.setMean_absolute_error((float) eval.meanAbsoluteError()); try { result.setRelative_absolute_error((float) eval.relativeAbsoluteError()); } catch (Exception e) { result.setRelative_absolute_error(-1); } result.setRoot_mean_squared_error((float) eval.rootMeanSquaredError()); result.setRoot_relative_squared_error((float) eval.rootRelativeSquaredError()); return result; }
/** * Evaluates a feature subset by cross validation * * @param feature_set the subset to be evaluated * @param num_atts the number of attributes in the subset * @return the estimated accuracy * @throws Exception if subset can't be evaluated */ protected double estimatePerformance(BitSet feature_set, int num_atts) throws Exception { m_evaluation = new Evaluation(m_theInstances); int i; int[] fs = new int[num_atts]; double[] instA = new double[num_atts]; int classI = m_theInstances.classIndex(); int index = 0; for (i = 0; i < m_numAttributes; i++) { if (feature_set.get(i)) { fs[index++] = i; } } // create new hash table m_entries = new Hashtable((int) (m_theInstances.numInstances() * 1.5)); // insert instances into the hash table for (i = 0; i < m_numInstances; i++) { Instance inst = m_theInstances.instance(i); for (int j = 0; j < fs.length; j++) { if (fs[j] == classI) { instA[j] = Double.MAX_VALUE; // missing for the class } else if (inst.isMissing(fs[j])) { instA[j] = Double.MAX_VALUE; } else { instA[j] = inst.value(fs[j]); } } insertIntoTable(inst, instA); } if (m_CVFolds == 1) { // calculate leave one out error for (i = 0; i < m_numInstances; i++) { Instance inst = m_theInstances.instance(i); for (int j = 0; j < fs.length; j++) { if (fs[j] == classI) { instA[j] = Double.MAX_VALUE; // missing for the class } else if (inst.isMissing(fs[j])) { instA[j] = Double.MAX_VALUE; } else { instA[j] = inst.value(fs[j]); } } evaluateInstanceLeaveOneOut(inst, instA); } } else { m_theInstances.randomize(m_rr); m_theInstances.stratify(m_CVFolds); // calculate 10 fold cross validation error for (i = 0; i < m_CVFolds; i++) { Instances insts = m_theInstances.testCV(m_CVFolds, i); evaluateFoldCV(insts, fs); } } switch (m_evaluationMeasure) { case EVAL_DEFAULT: if (m_classIsNominal) { return m_evaluation.pctCorrect(); } return -m_evaluation.rootMeanSquaredError(); case EVAL_ACCURACY: return m_evaluation.pctCorrect(); case EVAL_RMSE: return -m_evaluation.rootMeanSquaredError(); case EVAL_MAE: return -m_evaluation.meanAbsoluteError(); case EVAL_AUC: double[] classPriors = m_evaluation.getClassPriors(); Utils.normalize(classPriors); double weightedAUC = 0; for (i = 0; i < m_theInstances.classAttribute().numValues(); i++) { double tempAUC = m_evaluation.areaUnderROC(i); if (!Utils.isMissingValue(tempAUC)) { weightedAUC += (classPriors[i] * tempAUC); } else { System.err.println("Undefined AUC!!"); } } return weightedAUC; } // shouldn't get here return 0.0; }
/** * Gets the results for the supplied train and test datasets. Now performs a deep copy of the * classifier before it is built and evaluated (just in case the classifier is not initialized * properly in buildClassifier()). * * @param train the training Instances. * @param test the testing Instances. * @return the results stored in an array. The objects stored in the array may be Strings, * Doubles, or null (for the missing value). * @throws Exception if a problem occurs while getting the results */ public Object[] getResult(Instances train, Instances test) throws Exception { if (train.classAttribute().type() != Attribute.NUMERIC) { throw new Exception("Class attribute is not numeric!"); } if (m_Template == null) { throw new Exception("No classifier has been specified"); } ThreadMXBean thMonitor = ManagementFactory.getThreadMXBean(); boolean canMeasureCPUTime = thMonitor.isThreadCpuTimeSupported(); if (canMeasureCPUTime && !thMonitor.isThreadCpuTimeEnabled()) thMonitor.setThreadCpuTimeEnabled(true); int addm = (m_AdditionalMeasures != null) ? m_AdditionalMeasures.length : 0; Object[] result = new Object[RESULT_SIZE + addm + m_numPluginStatistics]; long thID = Thread.currentThread().getId(); long CPUStartTime = -1, trainCPUTimeElapsed = -1, testCPUTimeElapsed = -1, trainTimeStart, trainTimeElapsed, testTimeStart, testTimeElapsed; Evaluation eval = new Evaluation(train); m_Classifier = AbstractClassifier.makeCopy(m_Template); trainTimeStart = System.currentTimeMillis(); if (canMeasureCPUTime) CPUStartTime = thMonitor.getThreadUserTime(thID); m_Classifier.buildClassifier(train); if (canMeasureCPUTime) trainCPUTimeElapsed = thMonitor.getThreadUserTime(thID) - CPUStartTime; trainTimeElapsed = System.currentTimeMillis() - trainTimeStart; testTimeStart = System.currentTimeMillis(); if (canMeasureCPUTime) CPUStartTime = thMonitor.getThreadUserTime(thID); eval.evaluateModel(m_Classifier, test); if (canMeasureCPUTime) testCPUTimeElapsed = thMonitor.getThreadUserTime(thID) - CPUStartTime; testTimeElapsed = System.currentTimeMillis() - testTimeStart; thMonitor = null; m_result = eval.toSummaryString(); // The results stored are all per instance -- can be multiplied by the // number of instances to get absolute numbers int current = 0; result[current++] = new Double(train.numInstances()); result[current++] = new Double(eval.numInstances()); result[current++] = new Double(eval.meanAbsoluteError()); result[current++] = new Double(eval.rootMeanSquaredError()); result[current++] = new Double(eval.relativeAbsoluteError()); result[current++] = new Double(eval.rootRelativeSquaredError()); result[current++] = new Double(eval.correlationCoefficient()); result[current++] = new Double(eval.SFPriorEntropy()); result[current++] = new Double(eval.SFSchemeEntropy()); result[current++] = new Double(eval.SFEntropyGain()); result[current++] = new Double(eval.SFMeanPriorEntropy()); result[current++] = new Double(eval.SFMeanSchemeEntropy()); result[current++] = new Double(eval.SFMeanEntropyGain()); // Timing stats result[current++] = new Double(trainTimeElapsed / 1000.0); result[current++] = new Double(testTimeElapsed / 1000.0); if (canMeasureCPUTime) { result[current++] = new Double((trainCPUTimeElapsed / 1000000.0) / 1000.0); result[current++] = new Double((testCPUTimeElapsed / 1000000.0) / 1000.0); } else { result[current++] = new Double(Utils.missingValue()); result[current++] = new Double(Utils.missingValue()); } // sizes if (m_NoSizeDetermination) { result[current++] = -1.0; result[current++] = -1.0; result[current++] = -1.0; } else { ByteArrayOutputStream bastream = new ByteArrayOutputStream(); ObjectOutputStream oostream = new ObjectOutputStream(bastream); oostream.writeObject(m_Classifier); result[current++] = new Double(bastream.size()); bastream = new ByteArrayOutputStream(); oostream = new ObjectOutputStream(bastream); oostream.writeObject(train); result[current++] = new Double(bastream.size()); bastream = new ByteArrayOutputStream(); oostream = new ObjectOutputStream(bastream); oostream.writeObject(test); result[current++] = new Double(bastream.size()); } // Prediction interval statistics result[current++] = new Double(eval.coverageOfTestCasesByPredictedRegions()); result[current++] = new Double(eval.sizeOfPredictedRegions()); if (m_Classifier instanceof Summarizable) { result[current++] = ((Summarizable) m_Classifier).toSummaryString(); } else { result[current++] = null; } for (int i = 0; i < addm; i++) { if (m_doesProduce[i]) { try { double dv = ((AdditionalMeasureProducer) m_Classifier).getMeasure(m_AdditionalMeasures[i]); if (!Utils.isMissingValue(dv)) { Double value = new Double(dv); result[current++] = value; } else { result[current++] = null; } } catch (Exception ex) { System.err.println(ex); } } else { result[current++] = null; } } // get the actual metrics from the evaluation object List<AbstractEvaluationMetric> metrics = eval.getPluginMetrics(); if (metrics != null) { for (AbstractEvaluationMetric m : metrics) { if (m.appliesToNumericClass()) { List<String> statNames = m.getStatisticNames(); for (String s : statNames) { result[current++] = new Double(m.getStatistic(s)); } } } } if (current != RESULT_SIZE + addm + m_numPluginStatistics) { throw new Error("Results didn't fit RESULT_SIZE"); } return result; }