// regular DM method private static void regular() { List<Point> positiveTrainData = new ArrayList<Point>(); // select the minority class instances for (Point point : dataSet) { if (point.getLabel() == 0) positiveTrainData.add(point); } System.out.print("train data :" + dataSet.size() + "\t"); System.out.println("train positive :" + positiveTrainData.size()); // 将训练集进行smote操作 smote(positiveTrainData); // generate new dataset String trainFileName = NAME + "SMOTETrain" + ".arff"; String testFileName = NAME + "SMOTETest" + ".arff"; Generate.generate(dataSet, pointSet, COL, fileName, trainFileName); Generate.generate(testSet, new ArrayList<Point>(), COL, fileName, testFileName); pointSet.clear(); classify(trainFileName, testFileName); // 不进行任何处理 trainFileName = NAME + "TrainWS" + ".arff"; testFileName = NAME + "TestWS" + ".arff"; Generate.generate(dataSet, new ArrayList<Point>(), COL, fileName, trainFileName); Generate.generate(testSet, new ArrayList<Point>(), COL, fileName, testFileName); classify(trainFileName, testFileName); }
/** divide the dataset into k folds */ public static void crossValidation() { // double sum = 0; // ---------------------分为k折----------------------------- // 初始化为k fold for (int i = 0; i < FOLD; i++) { ArrayList<Point> tmp = new ArrayList<Point>(); allData.add(tmp); } // 选一个 删一个 List<Integer> chosen = new ArrayList<Integer>(); for (int i = 0; i < dataSet.size(); i++) { chosen.add(i); } for (int i = 0; i < FOLD; i++) { int choose = 0; while (choose < ROW / FOLD && i != FOLD - 1) { int rand = new Random().nextInt(dataSet.size()); if (chosen.contains(rand)) { chosen.remove(new Integer(rand)); choose++; allData.get(i).add(dataSet.get(rand)); } } // 最后一折全部添加 if (i == FOLD - 1) { for (Integer o : chosen) { allData.get(i).add(dataSet.get(o)); } } } // ------------------取一折为测试,其余为训练集----------------------------- for (int fold = 0; fold < FOLD; fold++) { List<Point> trainData = new ArrayList<Point>(); List<Point> testData = new ArrayList<Point>(); List<Point> positiveTrainData = new ArrayList<Point>(); List<Point> positiveTestData = new ArrayList<Point>(); testData.addAll(allData.get(fold)); for (List<Point> ps : allData) { if (ps != allData.get(fold)) trainData.addAll(ps); } // select the minority class instances for (Point point : trainData) { if (point.getLabel() == 0) positiveTrainData.add(point); } System.out.print("train data :" + trainData.size() + "\t"); System.out.println("train positive :" + positiveTrainData.size()); for (Point point : testData) { if (point.getLabel() == 0) positiveTestData.add(point); } System.out.print("test data :" + testData.size() + "\t"); System.out.println("test positive :" + positiveTestData.size()); // 不进行任何处理 // System.out.println("----------------Original result-----------------------"); String trainFileName = NAME + "OriginalTrain" + fold + ".arff"; String testFileName = NAME + "OriginalTest" + fold + ".arff"; // Generate.generate(trainData,new ArrayList<Point>(),COL,fileName,trainFileName); // Generate.generate(testData,new ArrayList<Point>(),COL,fileName,testFileName); // // classify(trainFileName,testFileName); // generate new dataset // 将训练集进行smote操作 smote(positiveTrainData); System.out.println("----------------SMOTE result-----------------------"); trainFileName = NAME + "SMOTETrain" + fold + ".arff"; testFileName = NAME + "SMOTETest" + fold + ".arff"; Generate.generate(trainData, pointSet, COL, fileName, trainFileName); Generate.generate(testData, new ArrayList<Point>(), COL, fileName, testFileName); pointSet.clear(); classify(trainFileName, testFileName); System.out.println("-----------------------------------------------"); } // System.out.println("average precision is :" + sum/FOLD); }