예제 #1
0
 public classifierThread(Classifier classifier, Instances train, Instances test) {
   this.threadId = multiThreadEval.getThreadId();
   this.classifier = classifier;
   this.train = train;
   this.test = test;
   this.totCnt = test.size();
 }
예제 #2
0
  public int calculateAllWrong() {
    if (run_ids.size() < 2) {
      throw new RuntimeException("Too few runs to compare. Should be at least 2. ");
    }

    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("repeat"));
    attributes.add(new Attribute("fold"));
    attributes.add(new Attribute("rowid"));
    resultSet = new Instances("all-wrong", attributes, task_splits.numInstances());

    for (int i = 0; i < task_splits.numInstances(); ++i) {
      Instance current = task_splits.get(i);
      boolean test = current.stringValue(task_splits.attribute("type")).equals("TEST");
      if (!test) {
        continue;
      }

      Integer row_id = (int) current.value(task_splits.attribute("rowid"));
      Integer repeat = (int) current.value(task_splits.attribute("repeat"));
      Integer fold = (int) current.value(task_splits.attribute("fold"));
      Integer sample = 0;
      try {
        sample = (int) current.value(task_splits.attribute("sample"));
      } catch (Exception e) {
      }

      String correctLabel = correct.get(row_id);
      Integer correctPredictions = 0;

      for (Integer run_id : run_ids) {

        // System.out.println(predictions.get(run_id));
        // System.out.println(repeat + "," + fold + "," + sample + "," + row_id);

        if (predictions
            .get(run_id)
            .get(repeat)
            .get(fold)
            .get(sample)
            .get(row_id)
            .equals(correctLabel)) {
          correctPredictions += 1;
        }
      }

      if (correctPredictions == 0) {
        double[] instance = {repeat, fold, row_id};
        resultSet.add(new DenseInstance(1.0, instance));
      }
    }
    return resultSet.size();
  }
  public void chooseClassifier() {
    int classIndex = 0; // number of attributes must be greater than 1
    /**
     * We can use either a supervised or an un-supervised algorithm if a class attribute already
     * exists in the dataset (meaning some labeled instances exists), depending on the size of the
     * training set, the decision is taken.
     */
    classIndex = traindata.numAttributes() - 1;
    traindata.setClassIndex(classIndex);
    if (classIndex == traindata.numAttributes() - 1
        || traindata.attribute("class") != null
        || traindata.attribute("Class") != null && traindata.size() >= testdata.size()) {
      System.out.println("class attribute found....");
      System.out.println("Initial training set is larger than the test set...." + traindata.size());

      // Go ahead to generate folds, then call classifier
      try {
        ce.generateFolds(traindata);
      } catch (Exception ex) {
        Logger.getLogger(FileTypeEnablerAndProcessor.class.getName()).log(Level.SEVERE, null, ex);
      }
    }
    /**
     * When there is no class attribute to show labeled instances exists then use an un-supervised
     * algorithm straight; no need for the cross-validation folds.
     */
    else {
      try {
        System.out.println("class attribute not found");
        classIndex = traindata.numAttributes() - 1;
        traindata.setClassIndex(classIndex);
        System.out.println("Class to predict is = " + traindata.classAttribute() + "\n");
        uc.autoProbClass(traindata);
      } catch (Exception ex) {
        Logger.getLogger(FileTypeEnablerAndProcessor.class.getName()).log(Level.SEVERE, null, ex);
      }
    }
  }
예제 #4
0
  public int calculateDifference() {
    if (run_ids.size() != 2) {
      throw new RuntimeException("Too many runs to compare. Should be 2. ");
    }

    List<String> values = new ArrayList<String>();
    for (Integer run : run_ids) {
      values.add(run + "");
    }
    values.add("none");

    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("repeat"));
    attributes.add(new Attribute("fold"));
    attributes.add(new Attribute("rowid"));
    attributes.add(new Attribute("whichCorrect", values));

    resultSet = new Instances("difference", attributes, task_splits.numInstances());

    for (int i = 0; i < task_splits.numInstances(); ++i) {
      Instance current = task_splits.get(i);
      boolean test = current.stringValue(task_splits.attribute("type")).equals("TEST");
      if (!test) {
        continue;
      }

      Integer row_id = (int) current.value(task_splits.attribute("rowid"));
      Integer repeat = (int) current.value(task_splits.attribute("repeat"));
      Integer fold = (int) current.value(task_splits.attribute("fold"));
      Integer sample = 0;
      try {
        sample = (int) current.value(task_splits.attribute("sample"));
      } catch (Exception e) {
      }

      String label = null;
      boolean difference = false;
      String correctLabel = correct.get(row_id);
      double whichCorrect = resultSet.attribute("whichCorrect").indexOfValue("none");

      for (Integer run_id : run_ids) {
        String currentLabel = predictions.get(run_id).get(repeat).get(fold).get(sample).get(row_id);
        // check for difference
        if (label == null) {
          label = currentLabel;
        } else if (label.equals(currentLabel) == false) {
          difference = true;
        }

        // check for correct label
        if (currentLabel.equals(correctLabel)) {
          whichCorrect = resultSet.attribute("whichCorrect").indexOfValue(run_id + "");
        }
      }

      if (difference) {
        double[] instance = {repeat, fold, row_id, whichCorrect};
        resultSet.add(new DenseInstance(1.0, instance));
      }
    }

    try { // put it in try catch, as admin rights are required.
      openml.setupDifferences(
          setup_ids.get(0), setup_ids.get(1), task_id, task_splits_size, resultSet.size());
    } catch (Exception e) {
    }

    return resultSet.size();
  }
    @Override
    public Void doInBackground() {
      BufferedReader reader;
      publish("Computing features...");
      int testingSamples = p.getAllFeatures2(path, "testing_data");

      try {
        publish("Reading data...");

        reader = new BufferedReader(new FileReader("testing_data.arff"));
        final Instances testingdata = new Instances(reader);
        reader.close();
        // setting class attribute
        testingdata.setClassIndex(13);
        testingdata.randomize(new Random(1));
        long startTime = System.nanoTime();
        Classifier ann = (Classifier) weka.core.SerializationHelper.read("mlp.model");
        publish("Evaluating ANN...");

        evalANN = new Evaluation(testingdata);
        startTime = System.nanoTime();
        evalANN.evaluateModel(ann, testingdata);
        long runningTimeANN = (System.nanoTime() - startTime) / 1000000;
        // runningTimeANN /= 100;

        publish("Done evaluating ANN");

        publish("Evaluating SVM...");
        Classifier svm = (Classifier) weka.core.SerializationHelper.read("svm.model");

        evalSVM = new Evaluation(testingdata);
        startTime = System.nanoTime();
        evalSVM.evaluateModel(svm, testingdata);
        long runningTimeSVM = (System.nanoTime() - startTime) / 1000000;
        // runningTimeSVM /= 100;
        publish("Done evaluating SVM");

        publish("Evaluating NB...");
        Classifier nb = (Classifier) weka.core.SerializationHelper.read("naivebayes.model");

        evalNB = new Evaluation(testingdata);
        startTime = System.nanoTime();
        evalNB.evaluateModel(nb, testingdata);
        long runningTimeNB = (System.nanoTime() - startTime) / 1000000;
        // runningTimeNB /= 100;
        publish("Done evaluating ANN");

        Platform.runLater(
            new Runnable() {
              @Override
              public void run() {
                bc.getData()
                    .get(0)
                    .getData()
                    .get(0)
                    .setYValue(evalANN.correct() / testingdata.size() * 100);
                bc.getData()
                    .get(0)
                    .getData()
                    .get(1)
                    .setYValue(evalSVM.correct() / testingdata.size() * 100);
                bc.getData()
                    .get(0)
                    .getData()
                    .get(2)
                    .setYValue(evalNB.correct() / testingdata.size() * 100);

                for (int i = 0; i < NUM_CLASSES; i++) {
                  lineChart.getData().get(0).getData().get(i).setYValue(evalANN.recall(i) * 100);
                  lineChart.getData().get(1).getData().get(i).setYValue(evalSVM.recall(i) * 100);
                  lineChart.getData().get(2).getData().get(i).setYValue(evalNB.recall(i) * 100);
                }
              }
            });

        panel.fillConfTable(evalSVM.confusionMatrix());

        summaryTable.setValueAt(evalANN.correct() / testingdata.size() * 100., 0, 1);
        summaryTable.setValueAt(evalSVM.correct() / testingdata.size() * 100, 0, 2);
        summaryTable.setValueAt(evalNB.correct() / testingdata.size() * 100, 0, 3);

        summaryTable.setValueAt(runningTimeANN, 1, 1);
        summaryTable.setValueAt(runningTimeSVM, 1, 2);
        summaryTable.setValueAt(runningTimeNB, 1, 3);

      } catch (Exception e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      }
      return null;
    }
    @Override
    public Void doInBackground() {
      BufferedReader reader;
      try {
        publish("Reading data...");
        reader = new BufferedReader(new FileReader("cross_validation_data.arff"));
        final Instances trainingdata = new Instances(reader);
        reader.close();
        // setting class attribute
        trainingdata.setClassIndex(13);
        trainingdata.randomize(new Random(1));
        long startTime = System.nanoTime();

        publish("Training Naive Bayes Classifier...");

        NaiveBayes nb = new NaiveBayes();
        startTime = System.nanoTime();
        nb.buildClassifier(trainingdata);
        double runningTimeNB = (System.nanoTime() - startTime) / 1000000;
        runningTimeNB /= 1000;
        // saving the naive bayes model
        weka.core.SerializationHelper.write("naivebayes.model", nb);
        System.out.println("running time" + runningTimeNB);
        publish("Done training NB.\nEvaluating NB using 10-fold cross-validation...");
        evalNB = new Evaluation(trainingdata);
        evalNB.crossValidateModel(nb, trainingdata, 10, new Random(1));
        publish("Done evaluating NB.");

        // System.out.println(evalNB.toSummaryString("\nResults for Naive Bayes\n======\n", false));

        MultilayerPerceptron mlp = new MultilayerPerceptron();
        mlp.setOptions(Utils.splitOptions("-L 0.3 -M 0.2 -N 500 -V 0 -S 0 -E 20 -H a"));
        publish("Training ANN...");
        startTime = System.nanoTime();
        mlp.buildClassifier(trainingdata);
        long runningTimeANN = (System.nanoTime() - startTime) / 1000000;
        runningTimeANN /= 1000;
        // saving the MLP model
        weka.core.SerializationHelper.write("mlp.model", mlp);

        publish("Done training ANN.\nEvaluating ANN using 10-fold cross-validation...");

        evalANN = new Evaluation(trainingdata);
        evalANN.evaluateModel(mlp, trainingdata);
        // evalMLP.crossValidateModel(mlp, trainingdata, 10, new Random(1));

        publish("Done evaluating ANN.");
        publish("Training SVM...");
        SMO svm = new SMO();

        startTime = System.nanoTime();
        svm.buildClassifier(trainingdata);
        long runningTimeSVM = (System.nanoTime() - startTime) / 1000000;
        runningTimeSVM /= 1000;
        weka.core.SerializationHelper.write("svm.model", svm);
        publish("Done training SVM.\nEvaluating SVM using 10-fold cross-validation...");
        evalSVM = new Evaluation(trainingdata);
        evalSVM.evaluateModel(svm, trainingdata);
        publish("Done evaluating SVM.");

        Platform.runLater(
            new Runnable() {
              @Override
              public void run() {
                bc.getData()
                    .get(0)
                    .getData()
                    .get(0)
                    .setYValue(evalANN.correct() / trainingdata.size() * 100);
                bc.getData()
                    .get(0)
                    .getData()
                    .get(1)
                    .setYValue(evalSVM.correct() / trainingdata.size() * 100);
                bc.getData()
                    .get(0)
                    .getData()
                    .get(2)
                    .setYValue(evalNB.correct() / trainingdata.size() * 100);

                for (int i = 0; i < NUM_CLASSES; i++) {
                  lineChart.getData().get(0).getData().get(i).setYValue(evalANN.recall(i) * 100);
                  lineChart.getData().get(1).getData().get(i).setYValue(evalSVM.recall(i) * 100);
                  lineChart.getData().get(2).getData().get(i).setYValue(evalNB.recall(i) * 100);
                }
              }
            });

        panel.fillConfTable(evalSVM.confusionMatrix());

        summaryTable.setValueAt(evalANN.correct() / trainingdata.size() * 100., 0, 1);
        summaryTable.setValueAt(evalSVM.correct() / trainingdata.size() * 100, 0, 2);
        summaryTable.setValueAt(evalNB.correct() / trainingdata.size() * 100, 0, 3);

        summaryTable.setValueAt(runningTimeANN, 1, 1);
        summaryTable.setValueAt(runningTimeSVM, 1, 2);
        summaryTable.setValueAt(runningTimeNB, 1, 3);

      } catch (Exception e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      }
      return null;
    }