/** * SMOTE preprocessing procedure * * @param datosTrain input training dta * @param realTrain actual training data * @param nominalTrain nominal attribute values * @param nulosTrain null values * @param clasesTrain training classes * @param datosArt synthetic instances */ public void SMOTE( double datosTrain[][], double realTrain[][], int nominalTrain[][], boolean nulosTrain[][], int clasesTrain[], double datosArt[][], double realArt[][], int nominalArt[][], boolean nulosArt[][], int clasesArt[], int kSMOTE, int ASMO, double smoting, boolean balance, int nPos, int posID, int nNeg, int negID, boolean distanceEu) { int i, j, l, m; int tmp, pos; int positives[]; int neighbors[][]; double genS[][]; double genR[][]; int genN[][]; boolean genM[][]; int clasesGen[]; int nn; /* Localize the positive instances */ positives = new int[nPos]; for (i = 0, j = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == posID) { positives[j] = i; j++; } } /* Randomize the instance presentation */ for (i = 0; i < positives.length; i++) { tmp = positives[i]; pos = Randomize.Randint(0, positives.length - 1); positives[i] = positives[pos]; positives[pos] = tmp; } /* Obtain k-nearest neighbors of each positive instance */ neighbors = new int[positives.length][kSMOTE]; for (i = 0; i < positives.length; i++) { switch (ASMO) { case 0: KNN.evaluacionKNN2( kSMOTE, datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosTrain[positives[i]], realTrain[positives[i]], nominalTrain[positives[i]], nulosTrain[positives[i]], Math.max(posID, negID) + 1, distanceEu, neighbors[i]); break; case 1: evaluacionKNNClass( kSMOTE, datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosTrain[positives[i]], realTrain[positives[i]], nominalTrain[positives[i]], nulosTrain[positives[i]], Math.max(posID, negID) + 1, distanceEu, neighbors[i], posID); break; case 2: evaluacionKNNClass( kSMOTE, datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosTrain[positives[i]], realTrain[positives[i]], nominalTrain[positives[i]], nulosTrain[positives[i]], Math.max(posID, negID) + 1, distanceEu, neighbors[i], negID); break; } } /* Interpolation of the minority instances */ if (balance) { genS = new double[nNeg - nPos][datosTrain[0].length]; genR = new double[nNeg - nPos][datosTrain[0].length]; genN = new int[nNeg - nPos][datosTrain[0].length]; genM = new boolean[nNeg - nPos][datosTrain[0].length]; clasesGen = new int[nNeg - nPos]; } else { genS = new double[(int) (nPos * smoting)][datosTrain[0].length]; genR = new double[(int) (nPos * smoting)][datosTrain[0].length]; genN = new int[(int) (nPos * smoting)][datosTrain[0].length]; genM = new boolean[(int) (nPos * smoting)][datosTrain[0].length]; clasesGen = new int[(int) (nPos * smoting)]; } for (i = 0; i < genS.length; i++) { clasesGen[i] = posID; nn = Randomize.Randint(0, kSMOTE - 1); interpola( realTrain[positives[i % positives.length]], realTrain[neighbors[i % positives.length][nn]], nominalTrain[positives[i % positives.length]], nominalTrain[neighbors[i % positives.length][nn]], nulosTrain[positives[i % positives.length]], nulosTrain[neighbors[i % positives.length][nn]], genS[i], genR[i], genN[i], genM[i]); } for (j = 0; j < datosTrain.length; j++) { for (l = 0; l < datosTrain[0].length; l++) { datosArt[j][l] = datosTrain[j][l]; realArt[j][l] = realTrain[j][l]; nominalArt[j][l] = nominalTrain[j][l]; nulosArt[j][l] = nulosTrain[j][l]; } clasesArt[j] = clasesTrain[j]; } for (m = 0; j < datosArt.length; j++, m++) { for (l = 0; l < datosTrain[0].length; l++) { datosArt[j][l] = genS[m][l]; realArt[j][l] = genR[m][l]; nominalArt[j][l] = genN[m][l]; nulosArt[j][l] = genM[m][l]; } clasesArt[j] = clasesGen[m]; } }