public static void main(String[] args) throws Exception { BufferedReader reader = new BufferedReader(new FileReader("spambase.arff")); Instances data = new Instances(reader); reader.close(); // setting class attribute data.setClassIndex(data.numAttributes() - 1); int i = data.numInstances(); int j = data.numAttributes() - 1; File file = new File("tablelog.csv"); Writer output = null; output = new BufferedWriter(new FileWriter(file)); output.write( "%missing,auc1,correct1,fmeasure1,auc2,correct2,fmeasure2,auc3,correct3,fmeasure3\n"); Random randomGenerator = new Random(); data.randomize(randomGenerator); int numBlock = data.numInstances() / 2; double num0 = 0, num1 = 0, num2 = 0; /*mdata.instance(0).setMissing(0); mdata.deleteWithMissing(0); System.out.println(mdata.numInstances()+","+data.numInstances());*/ // Instances traindata=null; // Instances testdata=null; // System.out.println(data.instance(3).stringValue(1)); for (int perc = 10; perc < 101; perc = perc + 10) { Instances mdata = new Instances(data); int numMissing = perc * numBlock / 100; double y11[] = new double[2]; double y21[] = new double[2]; double y31[] = new double[2]; double y12[] = new double[2]; double y22[] = new double[2]; double y32[] = new double[2]; double y13[] = new double[2]; double y23[] = new double[2]; double y33[] = new double[2]; for (int p = 0; p < 2; p++) { Instances traindata = mdata.trainCV(2, p); Instances testdata = mdata.testCV(2, p); num0 = 0; num1 = 0; num2 = 0; for (int t = 0; t < numBlock; t++) { if (traindata.instance(t).classValue() == 0) num0++; if (traindata.instance(t).classValue() == 1) num1++; // if (traindata.instance(t).classValue()==2) num2++; } // System.out.println(mdata.instance(0).classValue()); Instances trainwithmissing = new Instances(traindata); Instances testwithmissing = new Instances(testdata); for (int q = 0; q < j; q++) { int r = randomGenerator.nextInt((int) i / 2); for (int k = 0; k < numMissing; k++) { // int r = randomGenerator.nextInt((int) i/2); // int c = randomGenerator.nextInt(j); trainwithmissing.instance((r + k) % numBlock).setMissing(q); testwithmissing.instance((r + k) % numBlock).setMissing(q); } } // trainwithmissing.deleteWithMissing(0);System.out.println(traindata.numInstances()+","+trainwithmissing.numInstances()); Classifier cModel = (Classifier) new Logistic(); // try for different classifiers and datasets cModel.buildClassifier(trainwithmissing); Evaluation eTest1 = new Evaluation(trainwithmissing); eTest1.evaluateModel(cModel, testdata); // eTest.crossValidateModel(cModel,mdata,10,mdata.getRandomNumberGenerator(1)); y11[p] = num0 / numBlock * eTest1.areaUnderROC(0) + num1 / numBlock * eTest1.areaUnderROC(1) /*+num2/numBlock*eTest1.areaUnderROC(2)*/; y21[p] = eTest1.correct(); y31[p] = num0 / numBlock * eTest1.fMeasure(0) + num1 / numBlock * eTest1.fMeasure(1) /*+num2/numBlock*eTest1.fMeasure(2)*/; Classifier cModel2 = (Classifier) new Logistic(); cModel2.buildClassifier(traindata); Evaluation eTest2 = new Evaluation(traindata); eTest2.evaluateModel(cModel2, testwithmissing); y12[p] = num0 / numBlock * eTest2.areaUnderROC(0) + num1 / numBlock * eTest2.areaUnderROC(1) /*+num2/numBlock*eTest2.areaUnderROC(2)*/; y22[p] = eTest2.correct(); y32[p] = num0 / numBlock * eTest2.fMeasure(0) + num1 / numBlock * eTest2.fMeasure(1) /*+num2/numBlock*eTest2.fMeasure(2)*/; Classifier cModel3 = (Classifier) new Logistic(); cModel3.buildClassifier(trainwithmissing); Evaluation eTest3 = new Evaluation(trainwithmissing); eTest3.evaluateModel(cModel3, testwithmissing); y13[p] = num0 / numBlock * eTest3.areaUnderROC(0) + num1 / numBlock * eTest3.areaUnderROC(1) /*+num2/numBlock*eTest3.areaUnderROC(2)*/; y23[p] = eTest3.correct(); y33[p] = num0 / numBlock * eTest3.fMeasure(0) + num1 / numBlock * eTest3.fMeasure(1) /*+num2/numBlock*eTest3.fMeasure(2)*/; // System.out.println(num0+","+num1+","+num2+"\n"); } double auc1 = (y11[0] + y11[1]) / 2; double auc2 = (y12[0] + y12[1]) / 2; double auc3 = (y13[0] + y13[1]) / 2; double corr1 = (y21[0] + y21[1]) / i; double corr2 = (y22[0] + y22[1]) / i; double corr3 = (y23[0] + y23[1]) / i; double fm1 = (y31[0] + y31[1]) / 2; double fm2 = (y32[0] + y32[1]) / 2; double fm3 = (y33[0] + y33[1]) / 2; output.write( perc + "," + auc1 + "," + corr1 + "," + fm1 + "," + auc2 + "," + corr2 + "," + fm2 + "," + auc3 + "," + corr3 + "," + fm3 + "\n"); // System.out.println(num0); // mdata=data; } output.close(); }
/** * Evaluates a feature subset by cross validation * * @param feature_set the subset to be evaluated * @param num_atts the number of attributes in the subset * @return the estimated accuracy * @throws Exception if subset can't be evaluated */ protected double estimatePerformance(BitSet feature_set, int num_atts) throws Exception { m_evaluation = new Evaluation(m_theInstances); int i; int[] fs = new int[num_atts]; double[] instA = new double[num_atts]; int classI = m_theInstances.classIndex(); int index = 0; for (i = 0; i < m_numAttributes; i++) { if (feature_set.get(i)) { fs[index++] = i; } } // create new hash table m_entries = new Hashtable((int) (m_theInstances.numInstances() * 1.5)); // insert instances into the hash table for (i = 0; i < m_numInstances; i++) { Instance inst = m_theInstances.instance(i); for (int j = 0; j < fs.length; j++) { if (fs[j] == classI) { instA[j] = Double.MAX_VALUE; // missing for the class } else if (inst.isMissing(fs[j])) { instA[j] = Double.MAX_VALUE; } else { instA[j] = inst.value(fs[j]); } } insertIntoTable(inst, instA); } if (m_CVFolds == 1) { // calculate leave one out error for (i = 0; i < m_numInstances; i++) { Instance inst = m_theInstances.instance(i); for (int j = 0; j < fs.length; j++) { if (fs[j] == classI) { instA[j] = Double.MAX_VALUE; // missing for the class } else if (inst.isMissing(fs[j])) { instA[j] = Double.MAX_VALUE; } else { instA[j] = inst.value(fs[j]); } } evaluateInstanceLeaveOneOut(inst, instA); } } else { m_theInstances.randomize(m_rr); m_theInstances.stratify(m_CVFolds); // calculate 10 fold cross validation error for (i = 0; i < m_CVFolds; i++) { Instances insts = m_theInstances.testCV(m_CVFolds, i); evaluateFoldCV(insts, fs); } } switch (m_evaluationMeasure) { case EVAL_DEFAULT: if (m_classIsNominal) { return m_evaluation.pctCorrect(); } return -m_evaluation.rootMeanSquaredError(); case EVAL_ACCURACY: return m_evaluation.pctCorrect(); case EVAL_RMSE: return -m_evaluation.rootMeanSquaredError(); case EVAL_MAE: return -m_evaluation.meanAbsoluteError(); case EVAL_AUC: double[] classPriors = m_evaluation.getClassPriors(); Utils.normalize(classPriors); double weightedAUC = 0; for (i = 0; i < m_theInstances.classAttribute().numValues(); i++) { double tempAUC = m_evaluation.areaUnderROC(i); if (!Utils.isMissingValue(tempAUC)) { weightedAUC += (classPriors[i] * tempAUC); } else { System.err.println("Undefined AUC!!"); } } return weightedAUC; } // shouldn't get here return 0.0; }