示例#1
0
  public void run(String[] args) throws Exception {

    // args = new String[]{"-L", "baseline", "-A", "data/iris.arff", "-E", "cross", "10", "-N"};

    // Random rand = new Random(1234); // Use a seed for deterministic results (makes debugging
    // easier)
    Random rand = new Random(); // No seed for non-deterministic results

    // Parse the command line arguments
    ArgParser parser = new ArgParser(args);
    String fileName = parser.getARFF(); // File specified by the user
    String learnerName = parser.getLearner(); // Learning algorithm specified by the user
    String evalMethod = parser.getEvaluation(); // Evaluation method specified by the user
    String evalParameter = parser.getEvalParameter(); // Evaluation parameters specified by the user
    boolean printConfusionMatrix = parser.getVerbose();
    boolean normalize = parser.getNormalize();

    // Load the model
    SupervisedLearner learner = getLearner(learnerName, rand);

    // Load the ARFF file
    Matrix data = new Matrix();
    data.loadArff(fileName);
    double[] max = new double[data.cols()];
    double[] min = new double[data.cols()];
    for (int col = 0; col < max.length; col++) {
      max[col] = data.columnMax(col);
      min[col] = data.columnMin(col);
    }

    if (normalize) {
      System.out.println("Using normalized data\n");
      data.normalize();
    }

    // Print some stats
    System.out.println();
    System.out.println("Dataset name: " + fileName);
    System.out.println("Number of instances: " + data.rows());
    System.out.println("Number of attributes: " + data.cols());
    System.out.println("Learning algorithm: " + learnerName);
    System.out.println("Evaluation method: " + evalMethod);
    System.out.println();

    if (evalMethod.equals("training")) {
      System.out.println("Calculating accuracy on training set...");
      System.out.println("# cols: " + data.cols());
      Matrix features = new Matrix(data, 0, 0, data.rows(), data.cols() - 1);
      Matrix labels = new Matrix(data, 0, data.cols() - 1, data.rows(), 1);
      Matrix confusion = new Matrix();
      double startTime = System.currentTimeMillis();
      learner.train(features, labels);
      double elapsedTime = System.currentTimeMillis() - startTime;
      System.out.println("Time to train (in seconds): " + elapsedTime / 1000.0);
      double accuracy = learner.measureAccuracy(features, labels, confusion);
      System.out.println("Training set accuracy: " + accuracy);
      if (printConfusionMatrix) {
        System.out.println("\nConfusion matrix: (Row=target value, Col=predicted value)");
        confusion.print();
        System.out.println("\n");
      }
    } else if (evalMethod.equals("static")) {
      Matrix testData = new Matrix();
      testData.loadArff(evalParameter);
      if (normalize)
        testData
            .normalize(); // BUG! This may normalize differently from the training data. It should
      // use the same ranges for normalization!

      System.out.println("Calculating accuracy on separate test set...");
      System.out.println("Test set name: " + evalParameter);
      System.out.println("Number of test instances: " + testData.rows());
      Matrix features = new Matrix(data, 0, 0, data.rows(), data.cols() - 1);
      Matrix labels = new Matrix(data, 0, data.cols() - 1, data.rows(), 1);
      double startTime = System.currentTimeMillis();
      learner.train(features, labels);
      double elapsedTime = System.currentTimeMillis() - startTime;
      System.out.println("Time to train (in seconds): " + elapsedTime / 1000.0);
      double trainAccuracy = learner.measureAccuracy(features, labels, null);
      System.out.println("Training set accuracy: " + trainAccuracy);
      Matrix testFeatures = new Matrix(testData, 0, 0, testData.rows(), testData.cols() - 1);
      Matrix testLabels = new Matrix(testData, 0, testData.cols() - 1, testData.rows(), 1);
      Matrix confusion = new Matrix();
      double testAccuracy = learner.measureAccuracy(testFeatures, testLabels, confusion);
      System.out.println("Test set accuracy: " + testAccuracy);
      if (printConfusionMatrix) {
        System.out.println("\nConfusion matrix: (Row=target value, Col=predicted value)");
        confusion.print();
        System.out.println("\n");
      }
    } else if (evalMethod.equals("random")) {
      System.out.println("Calculating accuracy on a random hold-out set...");
      double trainPercent = 1 - Double.parseDouble(evalParameter);
      if (trainPercent < 0 || trainPercent > 1)
        throw new Exception("Percentage for random evaluation must be between 0 and 1");
      System.out.println("Percentage used for training: " + trainPercent);
      System.out.println("Percentage used for testing: " + Double.parseDouble(evalParameter));
      data.shuffle(rand);
      int trainSize = (int) (trainPercent * data.rows());
      Matrix trainFeatures = new Matrix(data, 0, 0, trainSize, data.cols() - 1);
      Matrix trainLabels = new Matrix(data, 0, data.cols() - 1, trainSize, 1);
      Matrix testFeatures =
          new Matrix(data, trainSize, 0, data.rows() - trainSize, data.cols() - 1);
      Matrix testLabels = new Matrix(data, trainSize, data.cols() - 1, data.rows() - trainSize, 1);
      double startTime = System.currentTimeMillis();
      learner.train(trainFeatures, trainLabels);
      double elapsedTime = System.currentTimeMillis() - startTime;
      System.out.println("Time to train (in seconds): " + elapsedTime / 1000.0);
      double trainAccuracy = learner.measureAccuracy(trainFeatures, trainLabels, null);
      System.out.println("Training set accuracy: " + trainAccuracy);
      Matrix confusion = new Matrix();
      double testAccuracy = learner.measureAccuracy(testFeatures, testLabels, confusion);
      System.out.println("Test set accuracy: " + testAccuracy);
      if (printConfusionMatrix) {
        System.out.println("\nConfusion matrix: (Row=target value, Col=predicted value)");
        confusion.print();
        System.out.println("\n");
      }
    } else if (evalMethod.equals("cross")) {
      System.out.println("Calculating accuracy using cross-validation...");
      int folds = Integer.parseInt(evalParameter);
      if (folds <= 0) throw new Exception("Number of folds must be greater than 0");
      System.out.println("Number of folds: " + folds);
      int reps = 1;
      double sumAccuracy = 0.0;
      double elapsedTime = 0.0;
      for (int j = 0; j < reps; j++) {
        data.shuffle(rand);
        for (int i = 0; i < folds; i++) {
          int begin = i * data.rows() / folds;
          int end = (i + 1) * data.rows() / folds;
          Matrix trainFeatures = new Matrix(data, 0, 0, begin, data.cols() - 1);
          Matrix trainLabels = new Matrix(data, 0, data.cols() - 1, begin, 1);
          Matrix testFeatures = new Matrix(data, begin, 0, end - begin, data.cols() - 1);
          Matrix testLabels = new Matrix(data, begin, data.cols() - 1, end - begin, 1);
          trainFeatures.add(data, end, 0, data.rows() - end);
          trainLabels.add(data, end, data.cols() - 1, data.rows() - end);
          double startTime = System.currentTimeMillis();
          learner.train(trainFeatures, trainLabels);
          elapsedTime += System.currentTimeMillis() - startTime;
          double accuracy = learner.measureAccuracy(testFeatures, testLabels, null);
          sumAccuracy += accuracy;
          System.out.println("Rep=" + j + ", Fold=" + i + ", Accuracy=" + accuracy);
        }
      }
      elapsedTime /= (reps * folds);
      System.out.println("Average time to train (in seconds): " + elapsedTime / 1000.0);
      System.out.println("Mean accuracy=" + (sumAccuracy / (reps * folds)));
    }
  }