/** * The main method of the class that includes the operations of the algorithm. It includes all the * operations that the algorithm has and finishes when it writes the output information into * files. */ public void run() { int nPos = 0; int nNeg = 0; int i, j, l, m; int tmp; int posID; int positives[]; int overs[]; double conjS[][]; int clasesS[]; int tamS; long tiempo = System.currentTimeMillis(); /*Count of number of positive and negative examples*/ for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == 0) nPos++; else nNeg++; } if (nPos > nNeg) { tmp = nPos; nPos = nNeg; nNeg = tmp; posID = 1; } else { posID = 0; } /*Localize the positive instances*/ positives = new int[nPos]; for (i = 0, j = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == posID) { positives[j] = i; j++; } } /*Obtain the oversampling array taking account the previous array*/ overs = new int[nNeg - nPos]; Randomize.setSeed(semilla); for (i = 0; i < overs.length; i++) { tmp = Randomize.Randint(0, nPos - 1); overs[i] = positives[tmp]; } tamS = 2 * nNeg; /*Construction of the S set from the previous vector S*/ conjS = new double[tamS][datosTrain[0].length]; clasesS = new int[tamS]; for (j = 0; j < datosTrain.length; j++) { for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[j][l]; clasesS[j] = clasesTrain[j]; } for (m = 0; j < tamS; j++, m++) { for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[overs[m]][l]; clasesS[j] = clasesTrain[overs[m]]; } System.out.println( "RandomOverSampling " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); OutputIS.escribeSalida(ficheroSalida[0], conjS, clasesS, entradas, salida, nEntradas, relation); OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation); }
/** Executes the algorithm */ public void ejecutar() { int i, j, l; int nClases; double conjS[][]; double conjR[][]; int conjN[][]; boolean conjM[][]; int clasesS[]; int nSel = 0; Cromosoma poblacion[]; int ev = 0; double prob[]; double NUmax = 1.5; double NUmin = 0.5; // used for lineal ranking double aux; double pos1, pos2; int sel1, sel2, comp1, comp2; Cromosoma newPob[]; long tiempo = System.currentTimeMillis(); /*Getting the number of different clases*/ nClases = 0; for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i]; nClases++; /*Random inicialization of the population*/ Randomize.setSeed(semilla); poblacion = new Cromosoma[tamPoblacion]; for (i = 0; i < tamPoblacion; i++) poblacion[i] = new Cromosoma(datosTrain.length); /*Initial evaluation of the population*/ for (i = 0; i < tamPoblacion; i++) poblacion[i].evalua( datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, alfa, kNeigh, nClases, distanceEu); if (torneo) { while (ev < nEval) { newPob = new Cromosoma[2]; /*Binary tournament selection*/ comp1 = Randomize.Randint(0, tamPoblacion - 1); do { comp2 = Randomize.Randint(0, tamPoblacion - 1); } while (comp2 == comp1); if (poblacion[comp1].getCalidad() > poblacion[comp2].getCalidad()) sel1 = comp1; else sel1 = comp2; comp1 = Randomize.Randint(0, tamPoblacion - 1); do { comp2 = Randomize.Randint(0, tamPoblacion - 1); } while (comp2 == comp1); if (poblacion[comp1].getCalidad() > poblacion[comp2].getCalidad()) sel2 = comp1; else sel2 = comp2; if (Randomize.Rand() < pCruce) { // there is cross crucePMX(poblacion, newPob, sel1, sel2); } else { // there is not cross newPob[0] = new Cromosoma(datosTrain.length, poblacion[sel1]); newPob[1] = new Cromosoma(datosTrain.length, poblacion[sel2]); } /*Mutation of the cromosomes*/ for (i = 0; i < 2; i++) newPob[i].mutacion(pMutacion1to0, pMutacion0to1); /*Evaluation of the population*/ for (i = 0; i < 2; i++) if (!(newPob[i].estaEvaluado())) { newPob[i].evalua( datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, alfa, kNeigh, nClases, distanceEu); ev++; } /*Replace the two worst*/ Arrays.sort(poblacion); poblacion[tamPoblacion - 2] = new Cromosoma(datosTrain.length, newPob[0]); poblacion[tamPoblacion - 1] = new Cromosoma(datosTrain.length, newPob[1]); } } else { /*Get the probabilities of lineal ranking in case of not use binary tournament*/ prob = new double[tamPoblacion]; for (i = 0; i < tamPoblacion; i++) { aux = (double) (NUmax - NUmin) * ((double) i / (tamPoblacion - 1)); prob[i] = (double) (1.0 / (tamPoblacion)) * (NUmax - aux); } for (i = 1; i < tamPoblacion; i++) prob[i] = prob[i] + prob[i - 1]; while (ev < nEval) { /*Sort the population by quality criterion*/ Arrays.sort(poblacion); newPob = new Cromosoma[2]; pos1 = Randomize.Rand(); pos2 = Randomize.Rand(); for (j = 0; j < tamPoblacion && prob[j] < pos1; j++) ; sel1 = j; for (j = 0; j < tamPoblacion && prob[j] < pos2; j++) ; sel2 = j; if (Randomize.Rand() < pCruce) { // there is cross crucePMX(poblacion, newPob, sel1, sel2); } else { // there is not cross newPob[0] = new Cromosoma(datosTrain.length, poblacion[sel1]); newPob[1] = new Cromosoma(datosTrain.length, poblacion[sel2]); } /*Mutation of the cromosomes*/ for (i = 0; i < 2; i++) newPob[i].mutacion(pMutacion1to0, pMutacion0to1); /*Evaluation of the population*/ for (i = 0; i < 2; i++) if (!(newPob[i].estaEvaluado())) { newPob[i].evalua( datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, alfa, kNeigh, nClases, distanceEu); ev++; } /*Replace the two worst*/ poblacion[tamPoblacion - 2] = new Cromosoma(datosTrain.length, newPob[0]); poblacion[tamPoblacion - 1] = new Cromosoma(datosTrain.length, newPob[1]); } } nSel = poblacion[0].genesActivos(); /*Building of S set from the best cromosome obtained*/ conjS = new double[nSel][datosTrain[0].length]; conjR = new double[nSel][datosTrain[0].length]; conjN = new int[nSel][datosTrain[0].length]; conjM = new boolean[nSel][datosTrain[0].length]; clasesS = new int[nSel]; for (i = 0, l = 0; i < datosTrain.length; i++) { if (poblacion[0].getGen(i)) { // the instance must be copied to the solution for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[i][j]; conjR[l][j] = realTrain[i][j]; conjN[l][j] = nominalTrain[i][j]; conjM[l][j] = nulosTrain[i][j]; } clasesS[l] = clasesTrain[i]; l++; } } System.out.println( "SGA " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); OutputIS.escribeSalida( ficheroSalida[0], conjR, conjN, conjM, clasesS, entradas, salida, nEntradas, relation); OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation); } // end-method
/** * The main method of the class that includes the operations of the algorithm. It includes all the * operations that the algorithm has and finishes when it writes the output information into * files. */ public void run() { int S[]; int i, j, l, m; int nPos = 0, nNeg = 0; int posID; int nClases; int pos; int baraje[]; int tmp; double conjS[][]; int clasesS[]; int tamS = 0; int claseObt; int cont; int busq; boolean marcas[]; int nSel; double conjS2[][]; int clasesS2[]; double minDist, dist; long tiempo = System.currentTimeMillis(); /*CNN PART*/ /*Count of number of positive and negative examples*/ for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == 0) nPos++; else nNeg++; } if (nPos > nNeg) { tmp = nPos; nPos = nNeg; nNeg = tmp; posID = 1; } else { posID = 0; } /*Inicialization of the candidates set*/ S = new int[datosTrain.length]; for (i = 0; i < S.length; i++) S[i] = Integer.MAX_VALUE; /*Inserting an element of mayority class*/ Randomize.setSeed(semilla); pos = Randomize.Randint(0, clasesTrain.length - 1); while (clasesTrain[pos] == posID) pos = (pos + 1) % clasesTrain.length; S[tamS] = pos; tamS++; /*Insert all subset of minority class*/ for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == posID) { S[tamS] = i; tamS++; } } /*Algorithm body. We resort randomly the instances of T and compare with the rest of S. If an instance doesn´t classified correctly, it is inserted in S*/ baraje = new int[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) baraje[i] = i; for (i = 0; i < datosTrain.length; i++) { pos = Randomize.Randint(i, clasesTrain.length - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } for (i = 0; i < datosTrain.length; i++) { if (clasesTrain[i] != posID) { // only for mayority class instances /*Construction of the S set from the previous vector S*/ conjS = new double[tamS][datosTrain[0].length]; clasesS = new int[tamS]; for (j = 0; j < tamS; j++) { for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[S[j]][l]; clasesS[j] = clasesTrain[S[j]]; } /*Do KNN to the instance*/ claseObt = KNN.evaluacionKNN(k, conjS, clasesS, datosTrain[baraje[i]], 2); if (claseObt != clasesTrain[baraje[i]]) { // fail in the class, it is included in S Arrays.sort(S); busq = Arrays.binarySearch(S, baraje[i]); if (busq < 0) { S[tamS] = baraje[i]; tamS++; } } } } /*Construction of the S set from the previous vector S*/ conjS = new double[tamS][datosTrain[0].length]; clasesS = new int[tamS]; for (j = 0; j < tamS; j++) { for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[S[j]][l]; clasesS[j] = clasesTrain[S[j]]; } /*TOMEK LINKS PART*/ /*Inicialization of the instance flagged vector of the S set*/ marcas = new boolean[conjS.length]; for (i = 0; i < conjS.length; i++) { marcas[i] = true; } nSel = conjS.length; for (i = 0; i < conjS.length; i++) { minDist = Double.POSITIVE_INFINITY; pos = 0; for (j = 0; j < conjS.length; j++) { if (i != j) { dist = KNN.distancia(conjS[i], conjS[j]); if (dist < minDist) { minDist = dist; pos = j; } } } if (clasesS[i] != clasesS[pos]) { if (clasesS[i] != posID) { if (marcas[i] == true) { marcas[i] = false; nSel--; } } else { if (marcas[pos] == true) { marcas[pos] = false; nSel--; } } } } /*Construction of the S set from the flags*/ conjS2 = new double[nSel][conjS[0].length]; clasesS2 = new int[nSel]; for (m = 0, l = 0; m < conjS.length; m++) { if (marcas[m]) { // the instance will evaluate for (j = 0; j < conjS[0].length; j++) { conjS2[l][j] = conjS[m][j]; } clasesS2[l] = clasesS[m]; l++; } } System.out.println( "CNN_TomekLinks " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); OutputIS.escribeSalida( ficheroSalida[0], conjS2, clasesS2, entradas, salida, nEntradas, relation); OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation); }
public void ejecutar() { double conjS[][]; double conjR[][]; int conjN[][]; boolean conjM[][]; int clasesS[]; int S[]; /* Binary Vector, to decide if the instance will be included*/ int i, j, l, cont; int nClases; int tamS; int transformations; int claseObt[]; int clasePredominante; long tiempo = System.currentTimeMillis(); transformations = 0; /*Getting the number of different classes*/ nClases = 0; for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i]; nClases++; if (nClases < 2) { System.err.println("Input dataset is empty"); nClases = 0; } /*Algorithm body. First, S=TS. Then, for each instance of TS, the first step is to repeat the aplication of the k-nn, and then we decide if we need to change the label of the instance or we don't need it. */ /*Inicialization of the candidates set, S=X, where X is the original Training Set*/ S = new int[datosTrain.length]; for (i = 0; i < S.length; i++) S[i] = 1; /* All included*/ tamS = datosTrain.length; System.out.print("K= " + k + "\n"); System.out.print("K'= " + k2 + "\n"); for (i = 0; i < datosTrain.length; i++) { /* I need find the k-nn of i in X - {i}, so I make conjS without i*/ conjS = new double[datosTrain.length - 1][datosTrain[0].length]; conjR = new double[datosTrain.length - 1][datosTrain[0].length]; conjN = new int[datosTrain.length - 1][datosTrain[0].length]; conjM = new boolean[datosTrain.length - 1][datosTrain[0].length]; clasesS = new int[datosTrain.length - 1]; cont = 0; for (j = 0; j < datosTrain.length; j++) { if (i != j) { for (l = 0; l < datosTrain[0].length; l++) { conjS[cont][l] = datosTrain[j][l]; conjR[cont][l] = realTrain[j][l]; conjN[cont][l] = nominalTrain[j][l]; conjM[cont][l] = nulosTrain[j][l]; } clasesS[cont] = clasesTrain[j]; cont++; } } /*Do KNN to the instance*/ claseObt = KNN.evaluacionKNN3( k, conjS, conjR, conjN, conjM, clasesS, datosTrain[i], realTrain[i], nominalTrain[i], nulosTrain[i], nClases, distanceEu); /* System.out.print("Las clases de los k vecinos m�s cercanos son\n"); for(int m=0;m<k;m++){ System.out.print(claseObt[m]+ " "); } System.out.print("\n-----------------------------------------------\n"); */ /*Now, we must check that we have at least k2 neighboors with the same class. */ int max = 0; clasePredominante = 0; for (int m = 0; m < claseObt.length; m++) { int claseDeInstancia = claseObt[m]; // Select one class. int iguales = 0; for (j = 0; j < claseObt.length; j++) { // Check numbers of instances with this class if (j != m) { // I can't count the same. if (claseObt[j] == claseDeInstancia) { iguales++; } } } // I must check if there is another class with more instances. if (iguales > max) { max = iguales; clasePredominante = claseObt[m]; } } // System.out.print("max " + max +"\n"); // System.out.print("Clase Predominante: "+clasePredominante+"\n"); /* Max+1 = number of neighbours with the same class*/ if ((max) >= k2) { /* if there are at least k2 neighbour, we change the class in S, */ if (clasePredominante != clasesTrain[i]) transformations++; clasesTrain[i] = clasePredominante; S[i] = 1; } else { /* Discard.*/ tamS--; S[i] = 0; } } System.out.print("S size resultante= " + tamS + "\n"); System.out.print("Transformations = " + transformations + "\n"); /*Construction of the S set from the previous vector S*/ conjS = new double[tamS][datosTrain[0].length]; conjR = new double[tamS][datosTrain[0].length]; conjN = new int[tamS][datosTrain[0].length]; conjM = new boolean[tamS][datosTrain[0].length]; clasesS = new int[tamS]; cont = 0; /* To establish the sets' sizes */ for (j = 0; j < datosTrain.length; j++) { if (S[j] == 1) { /* Checking the instance is included*/ for (l = 0; l < datosTrain[0].length; l++) { conjS[cont][l] = datosTrain[j][l]; conjR[cont][l] = realTrain[j][l]; conjN[cont][l] = nominalTrain[j][l]; conjM[cont][l] = nulosTrain[j][l]; } clasesS[cont] = clasesTrain[j]; cont++; } } System.out.println( "Time elapse: " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); OutputIS.escribeSalida( ficheroSalida[0], conjR, conjN, conjM, clasesS, entradas, salida, nEntradas, relation); OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation); }