/**
  * Tests a classifier on a data set
  *
  * @param cls the classifier to test
  * @param data the data set to test on
  * @return the performance for each class
  */
 public static Map<Object, PerformanceMeasure> testDataset(Classifier cls, Dataset data) {
   Map<Object, PerformanceMeasure> out = new HashMap<Object, PerformanceMeasure>();
   for (Object o : data.classes()) {
     out.put(o, new PerformanceMeasure());
   }
   for (Instance instance : data) {
     Object prediction = cls.classify(instance);
     if (instance.classValue().equals(prediction)) { // prediction
       // ==class
       for (Object o : out.keySet()) {
         if (o.equals(instance.classValue())) {
           out.get(o).tp++;
         } else {
           out.get(o).tn++;
         }
       }
     } else { // prediction != class
       for (Object o : out.keySet()) {
         /* prediction is positive class */
         if (prediction.equals(o)) {
           out.get(o).fp++;
         }
         /* instance is positive class */
         else if (o.equals(instance.classValue())) {
           out.get(o).fn++;
         }
         /* none is positive class */
         else {
           out.get(o).tn++;
         }
       }
     }
   }
   return out;
 }
Пример #2
0
  /** Shows the default usage of the KNN algorithm. */
  public static void main(String[] args) throws Exception {

    /* Load a data set */
    Dataset data = FileHandler.loadDataset(new File(DATASET), 4, ",");
    /*
     * Contruct a KNN classifier that uses 5 neighbors to make a decision.
     */
    Classifier knn = new KNearestNeighbors(5);
    knn.buildClassifier(data);

    Classifier kdtKnn = new KDtreeKNN(5);
    kdtKnn.buildClassifier(data);

    /*
     * Load a data set for evaluation, this can be a different one, but we
     * will use the same one.
     */
    Dataset dataForClassification = FileHandler.loadDataset(new File(DATASET), 4, ",");
    /* Counters for correct and wrong predictions. */
    int correct = 0, wrong = 0;
    /* Classify all instances and check with the correct class values */
    for (Instance inst : dataForClassification) {
      Object predictedClassValue = knn.classify(inst);
      Object realClassValue = inst.classValue();
      // System.out.println("predicted=" + predictedClassValue.toString() + "
      // real="+realClassValue.toString());
      if (predictedClassValue.equals(realClassValue)) {
        correct++;
      } else {
        wrong++;
      }
    }
    System.out.println("Correct predictions  " + correct);
    System.out.println("Wrong predictions " + wrong);

    /* Performance
     *
     */
    System.out.println("Performance ...");
    Map<Object, PerformanceMeasure> pm = EvaluateDataset.testDataset(knn, dataForClassification);
    printPerfMeasure(pm);
    /*
     * Cross validation
     */
    System.out.println("Cross validation ...");
    /* Construct new cross validation instance with the KNN classifier, */
    CrossValidation cv = new CrossValidation(knn);
    /* 5-fold CV with fixed random generator */
    Map<Object, PerformanceMeasure> p0 = cv.crossValidation(data, 5, new Random(1));
    Map<Object, PerformanceMeasure> p1 = cv.crossValidation(data, 5, new Random(1));
    Map<Object, PerformanceMeasure> p2 = cv.crossValidation(data, 5, new Random(25));
    printPerfMeasure(p0);
    printPerfMeasure(p1);
    printPerfMeasure(p2);

    /*
     * Create Weka classifier
     */
    System.out.println("Weka classifier ...");
    SMO smo = new SMO();
    /* Wrap Weka classifier in bridge */
    Classifier javamlsmo = new WekaClassifier(smo);
    /* Initialize cross-validation */
    CrossValidation wekaCV = new CrossValidation(javamlsmo);
    /* Perform cross-validation */
    Map<Object, PerformanceMeasure> wekaPm = wekaCV.crossValidation(data);
    /* Output results
     * see http://en.wikipedia.org/wiki/Precision_and_recall
     */
    System.out.println("see http://en.wikipedia.org/wiki/Precision_and_recall" + wekaPm);
    printPerfMeasure(wekaPm);

    /*
     * Feature scoring
     */
    System.out.println("Feature scoring ");
    GainRatio ga = new GainRatio();
    /* Apply the algorithm to the data set */
    ga.build(data);
    /* Print out the score of each attribute */
    for (int i = 0; i < ga.noAttributes(); i++) {
      System.out.println("Attribute[" + i + "] relevance" + ga.score(i));
    }
  }