/** * Process a classifier's prediction for an instance and update a set of plotting instances and * additional plotting info. m_PlotShape for nominal class datasets holds shape types (actual data * points have automatic shape type assignment; classifier error data points have box shape type). * For numeric class datasets, the actual data points are stored in m_PlotInstances and m_PlotSize * stores the error (which is later converted to shape size values). * * @param toPredict the actual data point * @param classifier the classifier * @param eval the evaluation object to use for evaluating the classifier on the instance to * predict * @see #m_PlotShapes * @see #m_PlotSizes * @see #m_PlotInstances */ public void process(Instance toPredict, Classifier classifier, Evaluation eval) { double pred; double[] values; int i; try { pred = eval.evaluateModelOnceAndRecordPrediction(classifier, toPredict); if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) { toPredict = ((weka.classifiers.misc.InputMappedClassifier) classifier) .constructMappedInstance(toPredict); } if (!m_SaveForVisualization) return; if (m_PlotInstances != null) { values = new double[m_PlotInstances.numAttributes()]; for (i = 0; i < m_PlotInstances.numAttributes(); i++) { if (i < toPredict.classIndex()) { values[i] = toPredict.value(i); } else if (i == toPredict.classIndex()) { values[i] = pred; values[i + 1] = toPredict.value(i); i++; } else { values[i] = toPredict.value(i - 1); } } m_PlotInstances.add(new DenseInstance(1.0, values)); if (toPredict.classAttribute().isNominal()) { if (toPredict.isMissing(toPredict.classIndex()) || Utils.isMissingValue(pred)) { m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE)); } else if (pred != toPredict.classValue()) { // set to default error point shape m_PlotShapes.addElement(new Integer(Plot2D.ERROR_SHAPE)); } else { // otherwise set to constant (automatically assigned) point shape m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE)); } m_PlotSizes.addElement(new Integer(Plot2D.DEFAULT_SHAPE_SIZE)); } else { // store the error (to be converted to a point size later) Double errd = null; if (!toPredict.isMissing(toPredict.classIndex()) && !Utils.isMissingValue(pred)) { errd = new Double(pred - toPredict.classValue()); m_PlotShapes.addElement(new Integer(Plot2D.CONST_AUTOMATIC_SHAPE)); } else { // missing shape if actual class not present or prediction is missing m_PlotShapes.addElement(new Integer(Plot2D.MISSING_SHAPE)); } m_PlotSizes.addElement(errd); } } } catch (Exception ex) { ex.printStackTrace(); } }
/** * Accepts and processes a classifier encapsulated in an incremental classifier event * * @param ce an <code>IncrementalClassifierEvent</code> value */ @Override public void acceptClassifier(final IncrementalClassifierEvent ce) { try { if (ce.getStatus() == IncrementalClassifierEvent.NEW_BATCH) { m_throughput = new StreamThroughput(statusMessagePrefix()); m_throughput.setSamplePeriod(m_statusFrequency); // m_eval = new Evaluation(ce.getCurrentInstance().dataset()); m_eval = new Evaluation(ce.getStructure()); m_eval.useNoPriors(); m_dataLegend = new Vector(); m_reset = true; m_dataPoint = new double[0]; Instances inst = ce.getStructure(); System.err.println("NEW BATCH"); m_instanceCount = 0; if (m_windowSize > 0) { m_window = new LinkedList<Instance>(); m_windowEval = new Evaluation(ce.getStructure()); m_windowEval.useNoPriors(); m_windowedPreds = new LinkedList<double[]>(); if (m_logger != null) { m_logger.logMessage( statusMessagePrefix() + "[IncrementalClassifierEvaluator] Chart output using windowed " + "evaluation over " + m_windowSize + " instances"); } } /* * if (m_logger != null) { m_logger.statusMessage(statusMessagePrefix() * + "IncrementalClassifierEvaluator: started processing..."); * m_logger.logMessage(statusMessagePrefix() + * " [IncrementalClassifierEvaluator]" + statusMessagePrefix() + * " started processing..."); } */ } else { Instance inst = ce.getCurrentInstance(); if (inst != null) { m_throughput.updateStart(); m_instanceCount++; // if (inst.attribute(inst.classIndex()).isNominal()) { double[] dist = ce.getClassifier().distributionForInstance(inst); double pred = 0; if (!inst.isMissing(inst.classIndex())) { if (m_outputInfoRetrievalStats) { // store predictions so AUC etc can be output. m_eval.evaluateModelOnceAndRecordPrediction(dist, inst); } else { m_eval.evaluateModelOnce(dist, inst); } if (m_windowSize > 0) { m_windowEval.evaluateModelOnce(dist, inst); m_window.addFirst(inst); m_windowedPreds.addFirst(dist); if (m_instanceCount > m_windowSize) { // "forget" the oldest prediction Instance oldest = m_window.removeLast(); double[] oldDist = m_windowedPreds.removeLast(); oldest.setWeight(-oldest.weight()); m_windowEval.evaluateModelOnce(oldDist, oldest); oldest.setWeight(-oldest.weight()); } } } else { pred = ce.getClassifier().classifyInstance(inst); } if (inst.classIndex() >= 0) { // need to check that the class is not missing if (inst.attribute(inst.classIndex()).isNominal()) { if (!inst.isMissing(inst.classIndex())) { if (m_dataPoint.length < 2) { m_dataPoint = new double[3]; m_dataLegend.addElement("Accuracy"); m_dataLegend.addElement("RMSE (prob)"); m_dataLegend.addElement("Kappa"); } // int classV = (int) inst.value(inst.classIndex()); if (m_windowSize > 0) { m_dataPoint[1] = m_windowEval.rootMeanSquaredError(); m_dataPoint[2] = m_windowEval.kappa(); } else { m_dataPoint[1] = m_eval.rootMeanSquaredError(); m_dataPoint[2] = m_eval.kappa(); } // int maxO = Utils.maxIndex(dist); // if (maxO == classV) { // dist[classV] = -1; // maxO = Utils.maxIndex(dist); // } // m_dataPoint[1] -= dist[maxO]; } else { if (m_dataPoint.length < 1) { m_dataPoint = new double[1]; m_dataLegend.addElement("Confidence"); } } double primaryMeasure = 0; if (!inst.isMissing(inst.classIndex())) { if (m_windowSize > 0) { primaryMeasure = 1.0 - m_windowEval.errorRate(); } else { primaryMeasure = 1.0 - m_eval.errorRate(); } } else { // record confidence as the primary measure // (another possibility would be entropy of // the distribution, or perhaps average // confidence) primaryMeasure = dist[Utils.maxIndex(dist)]; } // double [] dataPoint = new double[1]; m_dataPoint[0] = primaryMeasure; // double min = 0; double max = 100; /* * ChartEvent e = new * ChartEvent(IncrementalClassifierEvaluator.this, m_dataLegend, * min, max, dataPoint); */ m_ce.setLegendText(m_dataLegend); m_ce.setMin(0); m_ce.setMax(1); m_ce.setDataPoint(m_dataPoint); m_ce.setReset(m_reset); m_reset = false; } else { // numeric class if (m_dataPoint.length < 1) { m_dataPoint = new double[1]; if (inst.isMissing(inst.classIndex())) { m_dataLegend.addElement("Prediction"); } else { m_dataLegend.addElement("RMSE"); } } if (!inst.isMissing(inst.classIndex())) { double update; if (!inst.isMissing(inst.classIndex())) { if (m_windowSize > 0) { update = m_windowEval.rootMeanSquaredError(); } else { update = m_eval.rootMeanSquaredError(); } } else { update = pred; } m_dataPoint[0] = update; if (update > m_max) { m_max = update; } if (update < m_min) { m_min = update; } } m_ce.setLegendText(m_dataLegend); m_ce.setMin((inst.isMissing(inst.classIndex()) ? m_min : 0)); m_ce.setMax(m_max); m_ce.setDataPoint(m_dataPoint); m_ce.setReset(m_reset); m_reset = false; } notifyChartListeners(m_ce); } m_throughput.updateEnd(m_logger); } if (ce.getStatus() == IncrementalClassifierEvent.BATCH_FINISHED || inst == null) { if (m_logger != null) { m_logger.logMessage( "[IncrementalClassifierEvaluator]" + statusMessagePrefix() + " Finished processing."); } m_throughput.finished(m_logger); // save memory if using windowed evaluation for charting m_windowEval = null; m_window = null; m_windowedPreds = null; if (m_textListeners.size() > 0) { String textTitle = ce.getClassifier().getClass().getName(); textTitle = textTitle.substring(textTitle.lastIndexOf('.') + 1, textTitle.length()); String results = "=== Performance information ===\n\n" + "Scheme: " + textTitle + "\n" + "Relation: " + m_eval.getHeader().relationName() + "\n\n" + m_eval.toSummaryString(); if (m_eval.getHeader().classIndex() >= 0 && m_eval.getHeader().classAttribute().isNominal() && (m_outputInfoRetrievalStats)) { results += "\n" + m_eval.toClassDetailsString(); } if (m_eval.getHeader().classIndex() >= 0 && m_eval.getHeader().classAttribute().isNominal()) { results += "\n" + m_eval.toMatrixString(); } textTitle = "Results: " + textTitle; TextEvent te = new TextEvent(this, results, textTitle); notifyTextListeners(te); } } } } catch (Exception ex) { if (m_logger != null) { m_logger.logMessage( "[IncrementalClassifierEvaluator]" + statusMessagePrefix() + " Error processing prediction " + ex.getMessage()); m_logger.statusMessage( statusMessagePrefix() + "ERROR: problem processing prediction (see log for details)"); } ex.printStackTrace(); stop(); } }
/** * Calculates the accuracy on a test fold for internal cross validation of feature sets * * @param fold set of instances to be "left out" and classified * @param fs currently selected feature set * @return the accuracy for the fold * @throws Exception if something goes wrong */ double evaluateFoldCV(Instances fold, int[] fs) throws Exception { int i; int ruleCount = 0; int numFold = fold.numInstances(); int numCl = m_theInstances.classAttribute().numValues(); double[][] class_distribs = new double[numFold][numCl]; double[] instA = new double[fs.length]; double[] normDist; DecisionTableHashKey thekey; double acc = 0.0; int classI = m_theInstances.classIndex(); Instance inst; if (m_classIsNominal) { normDist = new double[numCl]; } else { normDist = new double[2]; } // first *remove* instances for (i = 0; i < numFold; i++) { inst = fold.instance(i); for (int j = 0; j < fs.length; j++) { if (fs[j] == classI) { instA[j] = Double.MAX_VALUE; // missing for the class } else if (inst.isMissing(fs[j])) { instA[j] = Double.MAX_VALUE; } else { instA[j] = inst.value(fs[j]); } } thekey = new DecisionTableHashKey(instA); if ((class_distribs[i] = (double[]) m_entries.get(thekey)) == null) { throw new Error("This should never happen!"); } else { if (m_classIsNominal) { class_distribs[i][(int) inst.classValue()] -= inst.weight(); } else { class_distribs[i][0] -= (inst.classValue() * inst.weight()); class_distribs[i][1] -= inst.weight(); } ruleCount++; } m_classPriorCounts[(int) inst.classValue()] -= inst.weight(); } double[] classPriors = m_classPriorCounts.clone(); Utils.normalize(classPriors); // now classify instances for (i = 0; i < numFold; i++) { inst = fold.instance(i); System.arraycopy(class_distribs[i], 0, normDist, 0, normDist.length); if (m_classIsNominal) { boolean ok = false; for (int j = 0; j < normDist.length; j++) { if (Utils.gr(normDist[j], 1.0)) { ok = true; break; } } if (!ok) { // majority class normDist = classPriors.clone(); } // if (ok) { Utils.normalize(normDist); if (m_evaluationMeasure == EVAL_AUC) { m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, inst); } else { m_evaluation.evaluateModelOnce(normDist, inst); } /* } else { normDist[(int)m_majority] = 1.0; if (m_evaluationMeasure == EVAL_AUC) { m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, inst); } else { m_evaluation.evaluateModelOnce(normDist, inst); } } */ } else { if (Utils.eq(normDist[1], 0.0)) { double[] temp = new double[1]; temp[0] = m_majority; m_evaluation.evaluateModelOnce(temp, inst); } else { double[] temp = new double[1]; temp[0] = normDist[0] / normDist[1]; m_evaluation.evaluateModelOnce(temp, inst); } } } // now re-insert instances for (i = 0; i < numFold; i++) { inst = fold.instance(i); m_classPriorCounts[(int) inst.classValue()] += inst.weight(); if (m_classIsNominal) { class_distribs[i][(int) inst.classValue()] += inst.weight(); } else { class_distribs[i][0] += (inst.classValue() * inst.weight()); class_distribs[i][1] += inst.weight(); } } return acc; }
/** * Classifies an instance for internal leave one out cross validation of feature sets * * @param instance instance to be "left out" and classified * @param instA feature values of the selected features for the instance * @return the classification of the instance * @throws Exception if something goes wrong */ double evaluateInstanceLeaveOneOut(Instance instance, double[] instA) throws Exception { DecisionTableHashKey thekey; double[] tempDist; double[] normDist; thekey = new DecisionTableHashKey(instA); if (m_classIsNominal) { // if this one is not in the table if ((tempDist = (double[]) m_entries.get(thekey)) == null) { throw new Error("This should never happen!"); } else { normDist = new double[tempDist.length]; System.arraycopy(tempDist, 0, normDist, 0, tempDist.length); normDist[(int) instance.classValue()] -= instance.weight(); // update the table // first check to see if the class counts are all zero now boolean ok = false; for (int i = 0; i < normDist.length; i++) { if (Utils.gr(normDist[i], 1.0)) { ok = true; break; } } // downdate the class prior counts m_classPriorCounts[(int) instance.classValue()] -= instance.weight(); double[] classPriors = m_classPriorCounts.clone(); Utils.normalize(classPriors); if (!ok) { // majority class normDist = classPriors; } m_classPriorCounts[(int) instance.classValue()] += instance.weight(); // if (ok) { Utils.normalize(normDist); if (m_evaluationMeasure == EVAL_AUC) { m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, instance); } else { m_evaluation.evaluateModelOnce(normDist, instance); } return Utils.maxIndex(normDist); /*} else { normDist = new double [normDist.length]; normDist[(int)m_majority] = 1.0; if (m_evaluationMeasure == EVAL_AUC) { m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, instance); } else { m_evaluation.evaluateModelOnce(normDist, instance); } return m_majority; } */ } // return Utils.maxIndex(tempDist); } else { // see if this one is already in the table if ((tempDist = (double[]) m_entries.get(thekey)) != null) { normDist = new double[tempDist.length]; System.arraycopy(tempDist, 0, normDist, 0, tempDist.length); normDist[0] -= (instance.classValue() * instance.weight()); normDist[1] -= instance.weight(); if (Utils.eq(normDist[1], 0.0)) { double[] temp = new double[1]; temp[0] = m_majority; m_evaluation.evaluateModelOnce(temp, instance); return m_majority; } else { double[] temp = new double[1]; temp[0] = normDist[0] / normDist[1]; m_evaluation.evaluateModelOnce(temp, instance); return temp[0]; } } else { throw new Error("This should never happen!"); } } // shouldn't get here // return 0.0; }