public classifierThread(Classifier classifier, Instances train, Instances test) { this.threadId = multiThreadEval.getThreadId(); this.classifier = classifier; this.train = train; this.test = test; this.totCnt = test.size(); }
public int calculateAllWrong() { if (run_ids.size() < 2) { throw new RuntimeException("Too few runs to compare. Should be at least 2. "); } ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute("repeat")); attributes.add(new Attribute("fold")); attributes.add(new Attribute("rowid")); resultSet = new Instances("all-wrong", attributes, task_splits.numInstances()); for (int i = 0; i < task_splits.numInstances(); ++i) { Instance current = task_splits.get(i); boolean test = current.stringValue(task_splits.attribute("type")).equals("TEST"); if (!test) { continue; } Integer row_id = (int) current.value(task_splits.attribute("rowid")); Integer repeat = (int) current.value(task_splits.attribute("repeat")); Integer fold = (int) current.value(task_splits.attribute("fold")); Integer sample = 0; try { sample = (int) current.value(task_splits.attribute("sample")); } catch (Exception e) { } String correctLabel = correct.get(row_id); Integer correctPredictions = 0; for (Integer run_id : run_ids) { // System.out.println(predictions.get(run_id)); // System.out.println(repeat + "," + fold + "," + sample + "," + row_id); if (predictions .get(run_id) .get(repeat) .get(fold) .get(sample) .get(row_id) .equals(correctLabel)) { correctPredictions += 1; } } if (correctPredictions == 0) { double[] instance = {repeat, fold, row_id}; resultSet.add(new DenseInstance(1.0, instance)); } } return resultSet.size(); }
public void chooseClassifier() { int classIndex = 0; // number of attributes must be greater than 1 /** * We can use either a supervised or an un-supervised algorithm if a class attribute already * exists in the dataset (meaning some labeled instances exists), depending on the size of the * training set, the decision is taken. */ classIndex = traindata.numAttributes() - 1; traindata.setClassIndex(classIndex); if (classIndex == traindata.numAttributes() - 1 || traindata.attribute("class") != null || traindata.attribute("Class") != null && traindata.size() >= testdata.size()) { System.out.println("class attribute found...."); System.out.println("Initial training set is larger than the test set...." + traindata.size()); // Go ahead to generate folds, then call classifier try { ce.generateFolds(traindata); } catch (Exception ex) { Logger.getLogger(FileTypeEnablerAndProcessor.class.getName()).log(Level.SEVERE, null, ex); } } /** * When there is no class attribute to show labeled instances exists then use an un-supervised * algorithm straight; no need for the cross-validation folds. */ else { try { System.out.println("class attribute not found"); classIndex = traindata.numAttributes() - 1; traindata.setClassIndex(classIndex); System.out.println("Class to predict is = " + traindata.classAttribute() + "\n"); uc.autoProbClass(traindata); } catch (Exception ex) { Logger.getLogger(FileTypeEnablerAndProcessor.class.getName()).log(Level.SEVERE, null, ex); } } }
public int calculateDifference() { if (run_ids.size() != 2) { throw new RuntimeException("Too many runs to compare. Should be 2. "); } List<String> values = new ArrayList<String>(); for (Integer run : run_ids) { values.add(run + ""); } values.add("none"); ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute("repeat")); attributes.add(new Attribute("fold")); attributes.add(new Attribute("rowid")); attributes.add(new Attribute("whichCorrect", values)); resultSet = new Instances("difference", attributes, task_splits.numInstances()); for (int i = 0; i < task_splits.numInstances(); ++i) { Instance current = task_splits.get(i); boolean test = current.stringValue(task_splits.attribute("type")).equals("TEST"); if (!test) { continue; } Integer row_id = (int) current.value(task_splits.attribute("rowid")); Integer repeat = (int) current.value(task_splits.attribute("repeat")); Integer fold = (int) current.value(task_splits.attribute("fold")); Integer sample = 0; try { sample = (int) current.value(task_splits.attribute("sample")); } catch (Exception e) { } String label = null; boolean difference = false; String correctLabel = correct.get(row_id); double whichCorrect = resultSet.attribute("whichCorrect").indexOfValue("none"); for (Integer run_id : run_ids) { String currentLabel = predictions.get(run_id).get(repeat).get(fold).get(sample).get(row_id); // check for difference if (label == null) { label = currentLabel; } else if (label.equals(currentLabel) == false) { difference = true; } // check for correct label if (currentLabel.equals(correctLabel)) { whichCorrect = resultSet.attribute("whichCorrect").indexOfValue(run_id + ""); } } if (difference) { double[] instance = {repeat, fold, row_id, whichCorrect}; resultSet.add(new DenseInstance(1.0, instance)); } } try { // put it in try catch, as admin rights are required. openml.setupDifferences( setup_ids.get(0), setup_ids.get(1), task_id, task_splits_size, resultSet.size()); } catch (Exception e) { } return resultSet.size(); }
@Override public Void doInBackground() { BufferedReader reader; publish("Computing features..."); int testingSamples = p.getAllFeatures2(path, "testing_data"); try { publish("Reading data..."); reader = new BufferedReader(new FileReader("testing_data.arff")); final Instances testingdata = new Instances(reader); reader.close(); // setting class attribute testingdata.setClassIndex(13); testingdata.randomize(new Random(1)); long startTime = System.nanoTime(); Classifier ann = (Classifier) weka.core.SerializationHelper.read("mlp.model"); publish("Evaluating ANN..."); evalANN = new Evaluation(testingdata); startTime = System.nanoTime(); evalANN.evaluateModel(ann, testingdata); long runningTimeANN = (System.nanoTime() - startTime) / 1000000; // runningTimeANN /= 100; publish("Done evaluating ANN"); publish("Evaluating SVM..."); Classifier svm = (Classifier) weka.core.SerializationHelper.read("svm.model"); evalSVM = new Evaluation(testingdata); startTime = System.nanoTime(); evalSVM.evaluateModel(svm, testingdata); long runningTimeSVM = (System.nanoTime() - startTime) / 1000000; // runningTimeSVM /= 100; publish("Done evaluating SVM"); publish("Evaluating NB..."); Classifier nb = (Classifier) weka.core.SerializationHelper.read("naivebayes.model"); evalNB = new Evaluation(testingdata); startTime = System.nanoTime(); evalNB.evaluateModel(nb, testingdata); long runningTimeNB = (System.nanoTime() - startTime) / 1000000; // runningTimeNB /= 100; publish("Done evaluating ANN"); Platform.runLater( new Runnable() { @Override public void run() { bc.getData() .get(0) .getData() .get(0) .setYValue(evalANN.correct() / testingdata.size() * 100); bc.getData() .get(0) .getData() .get(1) .setYValue(evalSVM.correct() / testingdata.size() * 100); bc.getData() .get(0) .getData() .get(2) .setYValue(evalNB.correct() / testingdata.size() * 100); for (int i = 0; i < NUM_CLASSES; i++) { lineChart.getData().get(0).getData().get(i).setYValue(evalANN.recall(i) * 100); lineChart.getData().get(1).getData().get(i).setYValue(evalSVM.recall(i) * 100); lineChart.getData().get(2).getData().get(i).setYValue(evalNB.recall(i) * 100); } } }); panel.fillConfTable(evalSVM.confusionMatrix()); summaryTable.setValueAt(evalANN.correct() / testingdata.size() * 100., 0, 1); summaryTable.setValueAt(evalSVM.correct() / testingdata.size() * 100, 0, 2); summaryTable.setValueAt(evalNB.correct() / testingdata.size() * 100, 0, 3); summaryTable.setValueAt(runningTimeANN, 1, 1); summaryTable.setValueAt(runningTimeSVM, 1, 2); summaryTable.setValueAt(runningTimeNB, 1, 3); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } return null; }
@Override public Void doInBackground() { BufferedReader reader; try { publish("Reading data..."); reader = new BufferedReader(new FileReader("cross_validation_data.arff")); final Instances trainingdata = new Instances(reader); reader.close(); // setting class attribute trainingdata.setClassIndex(13); trainingdata.randomize(new Random(1)); long startTime = System.nanoTime(); publish("Training Naive Bayes Classifier..."); NaiveBayes nb = new NaiveBayes(); startTime = System.nanoTime(); nb.buildClassifier(trainingdata); double runningTimeNB = (System.nanoTime() - startTime) / 1000000; runningTimeNB /= 1000; // saving the naive bayes model weka.core.SerializationHelper.write("naivebayes.model", nb); System.out.println("running time" + runningTimeNB); publish("Done training NB.\nEvaluating NB using 10-fold cross-validation..."); evalNB = new Evaluation(trainingdata); evalNB.crossValidateModel(nb, trainingdata, 10, new Random(1)); publish("Done evaluating NB."); // System.out.println(evalNB.toSummaryString("\nResults for Naive Bayes\n======\n", false)); MultilayerPerceptron mlp = new MultilayerPerceptron(); mlp.setOptions(Utils.splitOptions("-L 0.3 -M 0.2 -N 500 -V 0 -S 0 -E 20 -H a")); publish("Training ANN..."); startTime = System.nanoTime(); mlp.buildClassifier(trainingdata); long runningTimeANN = (System.nanoTime() - startTime) / 1000000; runningTimeANN /= 1000; // saving the MLP model weka.core.SerializationHelper.write("mlp.model", mlp); publish("Done training ANN.\nEvaluating ANN using 10-fold cross-validation..."); evalANN = new Evaluation(trainingdata); evalANN.evaluateModel(mlp, trainingdata); // evalMLP.crossValidateModel(mlp, trainingdata, 10, new Random(1)); publish("Done evaluating ANN."); publish("Training SVM..."); SMO svm = new SMO(); startTime = System.nanoTime(); svm.buildClassifier(trainingdata); long runningTimeSVM = (System.nanoTime() - startTime) / 1000000; runningTimeSVM /= 1000; weka.core.SerializationHelper.write("svm.model", svm); publish("Done training SVM.\nEvaluating SVM using 10-fold cross-validation..."); evalSVM = new Evaluation(trainingdata); evalSVM.evaluateModel(svm, trainingdata); publish("Done evaluating SVM."); Platform.runLater( new Runnable() { @Override public void run() { bc.getData() .get(0) .getData() .get(0) .setYValue(evalANN.correct() / trainingdata.size() * 100); bc.getData() .get(0) .getData() .get(1) .setYValue(evalSVM.correct() / trainingdata.size() * 100); bc.getData() .get(0) .getData() .get(2) .setYValue(evalNB.correct() / trainingdata.size() * 100); for (int i = 0; i < NUM_CLASSES; i++) { lineChart.getData().get(0).getData().get(i).setYValue(evalANN.recall(i) * 100); lineChart.getData().get(1).getData().get(i).setYValue(evalSVM.recall(i) * 100); lineChart.getData().get(2).getData().get(i).setYValue(evalNB.recall(i) * 100); } } }); panel.fillConfTable(evalSVM.confusionMatrix()); summaryTable.setValueAt(evalANN.correct() / trainingdata.size() * 100., 0, 1); summaryTable.setValueAt(evalSVM.correct() / trainingdata.size() * 100, 0, 2); summaryTable.setValueAt(evalNB.correct() / trainingdata.size() * 100, 0, 3); summaryTable.setValueAt(runningTimeANN, 1, 1); summaryTable.setValueAt(runningTimeSVM, 1, 2); summaryTable.setValueAt(runningTimeNB, 1, 3); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } return null; }