Beispiel #1
0
  /**
   * SMOTE preprocessing procedure
   *
   * @param datosTrain input training dta
   * @param realTrain actual training data
   * @param nominalTrain nominal attribute values
   * @param nulosTrain null values
   * @param clasesTrain training classes
   * @param datosArt synthetic instances
   */
  public void SMOTE(
      double datosTrain[][],
      double realTrain[][],
      int nominalTrain[][],
      boolean nulosTrain[][],
      int clasesTrain[],
      double datosArt[][],
      double realArt[][],
      int nominalArt[][],
      boolean nulosArt[][],
      int clasesArt[],
      int kSMOTE,
      int ASMO,
      double smoting,
      boolean balance,
      int nPos,
      int posID,
      int nNeg,
      int negID,
      boolean distanceEu) {

    int i, j, l, m;
    int tmp, pos;
    int positives[];
    int neighbors[][];
    double genS[][];
    double genR[][];
    int genN[][];
    boolean genM[][];
    int clasesGen[];
    int nn;

    /* Localize the positive instances */
    positives = new int[nPos];
    for (i = 0, j = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] == posID) {
        positives[j] = i;
        j++;
      }
    }

    /* Randomize the instance presentation */
    for (i = 0; i < positives.length; i++) {
      tmp = positives[i];
      pos = Randomize.Randint(0, positives.length - 1);
      positives[i] = positives[pos];
      positives[pos] = tmp;
    }

    /* Obtain k-nearest neighbors of each positive instance */
    neighbors = new int[positives.length][kSMOTE];
    for (i = 0; i < positives.length; i++) {
      switch (ASMO) {
        case 0:
          KNN.evaluacionKNN2(
              kSMOTE,
              datosTrain,
              realTrain,
              nominalTrain,
              nulosTrain,
              clasesTrain,
              datosTrain[positives[i]],
              realTrain[positives[i]],
              nominalTrain[positives[i]],
              nulosTrain[positives[i]],
              Math.max(posID, negID) + 1,
              distanceEu,
              neighbors[i]);
          break;
        case 1:
          evaluacionKNNClass(
              kSMOTE,
              datosTrain,
              realTrain,
              nominalTrain,
              nulosTrain,
              clasesTrain,
              datosTrain[positives[i]],
              realTrain[positives[i]],
              nominalTrain[positives[i]],
              nulosTrain[positives[i]],
              Math.max(posID, negID) + 1,
              distanceEu,
              neighbors[i],
              posID);
          break;
        case 2:
          evaluacionKNNClass(
              kSMOTE,
              datosTrain,
              realTrain,
              nominalTrain,
              nulosTrain,
              clasesTrain,
              datosTrain[positives[i]],
              realTrain[positives[i]],
              nominalTrain[positives[i]],
              nulosTrain[positives[i]],
              Math.max(posID, negID) + 1,
              distanceEu,
              neighbors[i],
              negID);
          break;
      }
    }

    /* Interpolation of the minority instances */
    if (balance) {
      genS = new double[nNeg - nPos][datosTrain[0].length];
      genR = new double[nNeg - nPos][datosTrain[0].length];
      genN = new int[nNeg - nPos][datosTrain[0].length];
      genM = new boolean[nNeg - nPos][datosTrain[0].length];
      clasesGen = new int[nNeg - nPos];
    } else {
      genS = new double[(int) (nPos * smoting)][datosTrain[0].length];
      genR = new double[(int) (nPos * smoting)][datosTrain[0].length];
      genN = new int[(int) (nPos * smoting)][datosTrain[0].length];
      genM = new boolean[(int) (nPos * smoting)][datosTrain[0].length];
      clasesGen = new int[(int) (nPos * smoting)];
    }
    for (i = 0; i < genS.length; i++) {
      clasesGen[i] = posID;
      nn = Randomize.Randint(0, kSMOTE - 1);
      interpola(
          realTrain[positives[i % positives.length]],
          realTrain[neighbors[i % positives.length][nn]],
          nominalTrain[positives[i % positives.length]],
          nominalTrain[neighbors[i % positives.length][nn]],
          nulosTrain[positives[i % positives.length]],
          nulosTrain[neighbors[i % positives.length][nn]],
          genS[i],
          genR[i],
          genN[i],
          genM[i]);
    }

    for (j = 0; j < datosTrain.length; j++) {
      for (l = 0; l < datosTrain[0].length; l++) {
        datosArt[j][l] = datosTrain[j][l];
        realArt[j][l] = realTrain[j][l];
        nominalArt[j][l] = nominalTrain[j][l];
        nulosArt[j][l] = nulosTrain[j][l];
      }
      clasesArt[j] = clasesTrain[j];
    }
    for (m = 0; j < datosArt.length; j++, m++) {
      for (l = 0; l < datosTrain[0].length; l++) {
        datosArt[j][l] = genS[m][l];
        realArt[j][l] = genR[m][l];
        nominalArt[j][l] = genN[m][l];
        nulosArt[j][l] = genM[m][l];
      }
      clasesArt[j] = clasesGen[m];
    }
  }