Exemple #1
0
  private static void evaluateClassifier(Classifier c, Instances trainData, Instances testData)
      throws Exception {
    System.err.println(
        "INFO: Starting split validation to predict '"
            + trainData.classAttribute().name()
            + "' using '"
            + c.getClass().getCanonicalName()
            + ":"
            + Arrays.toString(c.getOptions())
            + "' (#train="
            + trainData.numInstances()
            + ",#test="
            + testData.numInstances()
            + ") ...");

    if (trainData.classIndex() < 0) throw new IllegalStateException("class attribute not set");

    c.buildClassifier(trainData);
    Evaluation eval = new Evaluation(testData);
    eval.useNoPriors();
    double[] predictions = eval.evaluateModel(c, testData);

    System.out.println(eval.toClassDetailsString());
    System.out.println(eval.toSummaryString("\nResults\n======\n", false));

    // write predictions to file
    {
      System.err.println("INFO: Writing predictions to file ...");
      Writer out = new FileWriter("prediction.trec");
      writePredictionsTrecEval(predictions, testData, 0, trainData.classIndex(), out);
      out.close();
    }

    // write predicted distributions to CSV
    {
      System.err.println("INFO: Writing predicted distributions to CSV ...");
      Writer out = new FileWriter("predicted_distribution.csv");
      writePredictedDistributions(c, testData, 0, out);
      out.close();
    }
  }
  /**
   * Accept a classifier to be evaluated.
   *
   * @param ce a <code>BatchClassifierEvent</code> value
   */
  public void acceptClassifier(BatchClassifierEvent ce) {
    if (ce.getTestSet() == null || ce.getTestSet().isStructureOnly()) {
      return; // can't evaluate empty/non-existent test instances
    }

    Classifier classifier = ce.getClassifier();

    try {
      if (ce.getGroupIdentifier() != m_currentBatchIdentifier) {
        if (m_setsComplete > 0) {
          if (m_logger != null) {
            m_logger.statusMessage(
                statusMessagePrefix() + "BUSY. Can't accept data " + "at this time.");
            m_logger.logMessage(
                "[ClassifierPerformanceEvaluator] "
                    + statusMessagePrefix()
                    + " BUSY. Can't accept data at this time.");
          }
          return;
        }
        if (ce.getTrainSet().getDataSet() == null
            || ce.getTrainSet().getDataSet().numInstances() == 0) {
          // we have no training set to estimate majority class
          // or mean of target from
          Evaluation eval = new Evaluation(ce.getTestSet().getDataSet());
          m_PlotInstances = ExplorerDefaults.getClassifierErrorsPlotInstances();
          m_PlotInstances.setInstances(ce.getTestSet().getDataSet());
          m_PlotInstances.setClassifier(ce.getClassifier());
          m_PlotInstances.setClassIndex(ce.getTestSet().getDataSet().classIndex());
          m_PlotInstances.setEvaluation(eval);

          eval =
              adjustForInputMappedClassifier(
                  eval, ce.getClassifier(), ce.getTestSet().getDataSet(), m_PlotInstances);
          eval.useNoPriors();
          m_eval = new AggregateableEvaluation(eval);
        } else {
          // we can set up with the training set here
          Evaluation eval = new Evaluation(ce.getTrainSet().getDataSet());
          m_PlotInstances = ExplorerDefaults.getClassifierErrorsPlotInstances();
          m_PlotInstances.setInstances(ce.getTrainSet().getDataSet());
          m_PlotInstances.setClassifier(ce.getClassifier());
          m_PlotInstances.setClassIndex(ce.getTestSet().getDataSet().classIndex());
          m_PlotInstances.setEvaluation(eval);

          eval =
              adjustForInputMappedClassifier(
                  eval, ce.getClassifier(), ce.getTrainSet().getDataSet(), m_PlotInstances);
          m_eval = new AggregateableEvaluation(eval);
        }

        m_PlotInstances.setUp();

        m_currentBatchIdentifier = ce.getGroupIdentifier();
        m_setsComplete = 0;

        m_aggregatedPlotInstances = null;

        String msg =
            "[ClassifierPerformanceEvaluator] "
                + statusMessagePrefix()
                + " starting executor pool ("
                + getExecutionSlots()
                + " slots)...";
        // start the execution pool
        if (m_executorPool == null) {
          startExecutorPool();
        }
        m_tasks = new ArrayList<EvaluationTask>();

        if (m_logger != null) {
          m_logger.logMessage(msg);
        } else {
          System.out.println(msg);
        }
      }

      // if m_tasks == null then we've been stopped
      if (m_setsComplete < ce.getMaxSetNumber() && m_tasks != null) {
        EvaluationTask newTask =
            new EvaluationTask(
                classifier,
                ce.getTrainSet().getDataSet(),
                ce.getTestSet().getDataSet(),
                ce.getSetNumber(),
                ce.getMaxSetNumber());
        String msg =
            "[ClassifierPerformanceEvaluator] "
                + statusMessagePrefix()
                + " scheduling "
                + " evaluation of fold "
                + ce.getSetNumber()
                + " for execution...";
        if (m_logger != null) {
          m_logger.logMessage(msg);
        } else {
          System.out.println(msg);
        }
        m_tasks.add(newTask);
        m_executorPool.execute(newTask);
      }
    } catch (Exception ex) {
      // stop everything
      stop();
    }
  }
  /**
   * Accepts and processes a classifier encapsulated in an incremental classifier event
   *
   * @param ce an <code>IncrementalClassifierEvent</code> value
   */
  @Override
  public void acceptClassifier(final IncrementalClassifierEvent ce) {
    try {
      if (ce.getStatus() == IncrementalClassifierEvent.NEW_BATCH) {
        m_throughput = new StreamThroughput(statusMessagePrefix());
        m_throughput.setSamplePeriod(m_statusFrequency);

        // m_eval = new Evaluation(ce.getCurrentInstance().dataset());
        m_eval = new Evaluation(ce.getStructure());
        m_eval.useNoPriors();

        m_dataLegend = new Vector();
        m_reset = true;
        m_dataPoint = new double[0];
        Instances inst = ce.getStructure();
        System.err.println("NEW BATCH");
        m_instanceCount = 0;

        if (m_windowSize > 0) {
          m_window = new LinkedList<Instance>();
          m_windowEval = new Evaluation(ce.getStructure());
          m_windowEval.useNoPriors();
          m_windowedPreds = new LinkedList<double[]>();

          if (m_logger != null) {
            m_logger.logMessage(
                statusMessagePrefix()
                    + "[IncrementalClassifierEvaluator] Chart output using windowed "
                    + "evaluation over "
                    + m_windowSize
                    + " instances");
          }
        }

        /*
         * if (m_logger != null) { m_logger.statusMessage(statusMessagePrefix()
         * + "IncrementalClassifierEvaluator: started processing...");
         * m_logger.logMessage(statusMessagePrefix() +
         * " [IncrementalClassifierEvaluator]" + statusMessagePrefix() +
         * " started processing..."); }
         */
      } else {
        Instance inst = ce.getCurrentInstance();
        if (inst != null) {
          m_throughput.updateStart();
          m_instanceCount++;
          // if (inst.attribute(inst.classIndex()).isNominal()) {
          double[] dist = ce.getClassifier().distributionForInstance(inst);
          double pred = 0;
          if (!inst.isMissing(inst.classIndex())) {
            if (m_outputInfoRetrievalStats) {
              // store predictions so AUC etc can be output.
              m_eval.evaluateModelOnceAndRecordPrediction(dist, inst);
            } else {
              m_eval.evaluateModelOnce(dist, inst);
            }

            if (m_windowSize > 0) {

              m_windowEval.evaluateModelOnce(dist, inst);
              m_window.addFirst(inst);
              m_windowedPreds.addFirst(dist);

              if (m_instanceCount > m_windowSize) {
                // "forget" the oldest prediction
                Instance oldest = m_window.removeLast();

                double[] oldDist = m_windowedPreds.removeLast();
                oldest.setWeight(-oldest.weight());
                m_windowEval.evaluateModelOnce(oldDist, oldest);
                oldest.setWeight(-oldest.weight());
              }
            }
          } else {
            pred = ce.getClassifier().classifyInstance(inst);
          }
          if (inst.classIndex() >= 0) {
            // need to check that the class is not missing
            if (inst.attribute(inst.classIndex()).isNominal()) {
              if (!inst.isMissing(inst.classIndex())) {
                if (m_dataPoint.length < 2) {
                  m_dataPoint = new double[3];
                  m_dataLegend.addElement("Accuracy");
                  m_dataLegend.addElement("RMSE (prob)");
                  m_dataLegend.addElement("Kappa");
                }
                // int classV = (int) inst.value(inst.classIndex());

                if (m_windowSize > 0) {
                  m_dataPoint[1] = m_windowEval.rootMeanSquaredError();
                  m_dataPoint[2] = m_windowEval.kappa();
                } else {
                  m_dataPoint[1] = m_eval.rootMeanSquaredError();
                  m_dataPoint[2] = m_eval.kappa();
                }
                // int maxO = Utils.maxIndex(dist);
                // if (maxO == classV) {
                // dist[classV] = -1;
                // maxO = Utils.maxIndex(dist);
                // }
                // m_dataPoint[1] -= dist[maxO];
              } else {
                if (m_dataPoint.length < 1) {
                  m_dataPoint = new double[1];
                  m_dataLegend.addElement("Confidence");
                }
              }
              double primaryMeasure = 0;
              if (!inst.isMissing(inst.classIndex())) {
                if (m_windowSize > 0) {
                  primaryMeasure = 1.0 - m_windowEval.errorRate();
                } else {
                  primaryMeasure = 1.0 - m_eval.errorRate();
                }
              } else {
                // record confidence as the primary measure
                // (another possibility would be entropy of
                // the distribution, or perhaps average
                // confidence)
                primaryMeasure = dist[Utils.maxIndex(dist)];
              }
              // double [] dataPoint = new double[1];
              m_dataPoint[0] = primaryMeasure;
              // double min = 0; double max = 100;
              /*
               * ChartEvent e = new
               * ChartEvent(IncrementalClassifierEvaluator.this, m_dataLegend,
               * min, max, dataPoint);
               */

              m_ce.setLegendText(m_dataLegend);
              m_ce.setMin(0);
              m_ce.setMax(1);
              m_ce.setDataPoint(m_dataPoint);
              m_ce.setReset(m_reset);
              m_reset = false;
            } else {
              // numeric class
              if (m_dataPoint.length < 1) {
                m_dataPoint = new double[1];
                if (inst.isMissing(inst.classIndex())) {
                  m_dataLegend.addElement("Prediction");
                } else {
                  m_dataLegend.addElement("RMSE");
                }
              }
              if (!inst.isMissing(inst.classIndex())) {
                double update;
                if (!inst.isMissing(inst.classIndex())) {
                  if (m_windowSize > 0) {
                    update = m_windowEval.rootMeanSquaredError();
                  } else {
                    update = m_eval.rootMeanSquaredError();
                  }
                } else {
                  update = pred;
                }
                m_dataPoint[0] = update;
                if (update > m_max) {
                  m_max = update;
                }
                if (update < m_min) {
                  m_min = update;
                }
              }

              m_ce.setLegendText(m_dataLegend);
              m_ce.setMin((inst.isMissing(inst.classIndex()) ? m_min : 0));
              m_ce.setMax(m_max);
              m_ce.setDataPoint(m_dataPoint);
              m_ce.setReset(m_reset);
              m_reset = false;
            }
            notifyChartListeners(m_ce);
          }
          m_throughput.updateEnd(m_logger);
        }

        if (ce.getStatus() == IncrementalClassifierEvent.BATCH_FINISHED || inst == null) {
          if (m_logger != null) {
            m_logger.logMessage(
                "[IncrementalClassifierEvaluator]"
                    + statusMessagePrefix()
                    + " Finished processing.");
          }
          m_throughput.finished(m_logger);

          // save memory if using windowed evaluation for charting
          m_windowEval = null;
          m_window = null;
          m_windowedPreds = null;

          if (m_textListeners.size() > 0) {
            String textTitle = ce.getClassifier().getClass().getName();
            textTitle = textTitle.substring(textTitle.lastIndexOf('.') + 1, textTitle.length());
            String results =
                "=== Performance information ===\n\n"
                    + "Scheme:   "
                    + textTitle
                    + "\n"
                    + "Relation: "
                    + m_eval.getHeader().relationName()
                    + "\n\n"
                    + m_eval.toSummaryString();
            if (m_eval.getHeader().classIndex() >= 0
                && m_eval.getHeader().classAttribute().isNominal()
                && (m_outputInfoRetrievalStats)) {
              results += "\n" + m_eval.toClassDetailsString();
            }

            if (m_eval.getHeader().classIndex() >= 0
                && m_eval.getHeader().classAttribute().isNominal()) {
              results += "\n" + m_eval.toMatrixString();
            }
            textTitle = "Results: " + textTitle;
            TextEvent te = new TextEvent(this, results, textTitle);
            notifyTextListeners(te);
          }
        }
      }
    } catch (Exception ex) {
      if (m_logger != null) {
        m_logger.logMessage(
            "[IncrementalClassifierEvaluator]"
                + statusMessagePrefix()
                + " Error processing prediction "
                + ex.getMessage());
        m_logger.statusMessage(
            statusMessagePrefix() + "ERROR: problem processing prediction (see log for details)");
      }
      ex.printStackTrace();
      stop();
    }
  }
    public void execute() {
      if (m_stopped) {
        return;
      }

      if (m_logger != null) {
        m_logger.statusMessage(statusMessagePrefix() + "Evaluating (" + m_setNum + ")...");
        m_visual.setAnimated();
      }
      try {

        ClassifierErrorsPlotInstances plotInstances =
            ExplorerDefaults.getClassifierErrorsPlotInstances();
        Evaluation eval = null;

        if (m_trainData == null || m_trainData.numInstances() == 0) {
          eval = new Evaluation(m_testData);
          plotInstances.setInstances(m_testData);
          plotInstances.setClassifier(m_classifier);
          plotInstances.setClassIndex(m_testData.classIndex());
          plotInstances.setEvaluation(eval);
          eval = adjustForInputMappedClassifier(eval, m_classifier, m_testData, plotInstances);

          eval.useNoPriors();
        } else {
          eval = new Evaluation(m_trainData);
          plotInstances.setInstances(m_trainData);
          plotInstances.setClassifier(m_classifier);
          plotInstances.setClassIndex(m_trainData.classIndex());
          plotInstances.setEvaluation(eval);
          eval = adjustForInputMappedClassifier(eval, m_classifier, m_trainData, plotInstances);
        }

        plotInstances.setUp();

        for (int i = 0; i < m_testData.numInstances(); i++) {
          if (m_stopped) {
            break;
          }
          Instance temp = m_testData.instance(i);
          plotInstances.process(temp, m_classifier, eval);
        }

        if (m_stopped) {
          return;
        }

        aggregateEvalTask(eval, m_classifier, m_testData, plotInstances, m_setNum, m_maxSetNum);

      } catch (Exception ex) {
        ClassifierPerformanceEvaluator.this.stop(); // stop all processing
        if (m_logger != null) {
          m_logger.logMessage(
              "[ClassifierPerformanceEvaluator] "
                  + statusMessagePrefix()
                  + " problem evaluating classifier. "
                  + ex.getMessage());
        }
        ex.printStackTrace();
      }
    }