Ejemplo n.º 1
0
  // regular DM method
  private static void regular() {
    List<Point> positiveTrainData = new ArrayList<Point>();

    // select the minority class instances
    for (Point point : dataSet) {
      if (point.getLabel() == 0) positiveTrainData.add(point);
    }
    System.out.print("train data :" + dataSet.size() + "\t");
    System.out.println("train positive :" + positiveTrainData.size());

    // 将训练集进行smote操作
    smote(positiveTrainData);

    // generate new dataset
    String trainFileName = NAME + "SMOTETrain" + ".arff";
    String testFileName = NAME + "SMOTETest" + ".arff";
    Generate.generate(dataSet, pointSet, COL, fileName, trainFileName);
    Generate.generate(testSet, new ArrayList<Point>(), COL, fileName, testFileName);
    pointSet.clear();
    classify(trainFileName, testFileName);

    // 不进行任何处理
    trainFileName = NAME + "TrainWS" + ".arff";
    testFileName = NAME + "TestWS" + ".arff";
    Generate.generate(dataSet, new ArrayList<Point>(), COL, fileName, trainFileName);
    Generate.generate(testSet, new ArrayList<Point>(), COL, fileName, testFileName);
    classify(trainFileName, testFileName);
  }
Ejemplo n.º 2
0
  /** divide the dataset into k folds */
  public static void crossValidation() {

    //		double sum = 0;
    // ---------------------分为k折-----------------------------
    // 初始化为k fold
    for (int i = 0; i < FOLD; i++) {
      ArrayList<Point> tmp = new ArrayList<Point>();
      allData.add(tmp);
    }
    // 选一个  删一个
    List<Integer> chosen = new ArrayList<Integer>();
    for (int i = 0; i < dataSet.size(); i++) {
      chosen.add(i);
    }

    for (int i = 0; i < FOLD; i++) {
      int choose = 0;
      while (choose < ROW / FOLD && i != FOLD - 1) {
        int rand = new Random().nextInt(dataSet.size());
        if (chosen.contains(rand)) {
          chosen.remove(new Integer(rand));
          choose++;
          allData.get(i).add(dataSet.get(rand));
        }
      }
      // 最后一折全部添加
      if (i == FOLD - 1) {
        for (Integer o : chosen) {
          allData.get(i).add(dataSet.get(o));
        }
      }
    }

    // ------------------取一折为测试,其余为训练集-----------------------------
    for (int fold = 0; fold < FOLD; fold++) {
      List<Point> trainData = new ArrayList<Point>();
      List<Point> testData = new ArrayList<Point>();
      List<Point> positiveTrainData = new ArrayList<Point>();
      List<Point> positiveTestData = new ArrayList<Point>();

      testData.addAll(allData.get(fold));
      for (List<Point> ps : allData) {
        if (ps != allData.get(fold)) trainData.addAll(ps);
      }
      // select the minority class instances
      for (Point point : trainData) {
        if (point.getLabel() == 0) positiveTrainData.add(point);
      }
      System.out.print("train data :" + trainData.size() + "\t");
      System.out.println("train positive :" + positiveTrainData.size());
      for (Point point : testData) {
        if (point.getLabel() == 0) positiveTestData.add(point);
      }
      System.out.print("test data :" + testData.size() + "\t");
      System.out.println("test positive :" + positiveTestData.size());

      // 不进行任何处理
      //			System.out.println("----------------Original result-----------------------");
      String trainFileName = NAME + "OriginalTrain" + fold + ".arff";
      String testFileName = NAME + "OriginalTest" + fold + ".arff";
      //			Generate.generate(trainData,new ArrayList<Point>(),COL,fileName,trainFileName);
      //			Generate.generate(testData,new ArrayList<Point>(),COL,fileName,testFileName);
      //
      //			classify(trainFileName,testFileName);

      // generate new dataset
      // 将训练集进行smote操作
      smote(positiveTrainData);

      System.out.println("----------------SMOTE result-----------------------");
      trainFileName = NAME + "SMOTETrain" + fold + ".arff";
      testFileName = NAME + "SMOTETest" + fold + ".arff";
      Generate.generate(trainData, pointSet, COL, fileName, trainFileName);
      Generate.generate(testData, new ArrayList<Point>(), COL, fileName, testFileName);
      pointSet.clear();

      classify(trainFileName, testFileName);

      System.out.println("-----------------------------------------------");
    }
    //		System.out.println("average precision is :" + sum/FOLD);
  }