protected int NN(int nSel, double conj[][], double ejemplo[]) { double mindist, dist; int nneigh = -1; mindist = Double.POSITIVE_INFINITY; for (int i = 0; i < nSel; i++) { dist = KNN.distancia(conj[i], ejemplo); if (dist < mindist) { mindist = dist; nneigh = i; } } return nneigh; }
int nextNeighbour(boolean marcas[], double datos[][], int ej, Vector<Integer> vecinos) { int i, j, k; int pos = -1; double distmin = Double.POSITIVE_INFINITY; double distancia; double centroid[]; double prototipo[]; /*Computation of the previous centroid*/ centroid = new double[datos[0].length]; prototipo = new double[datos[0].length]; for (j = 0; j < datos[0].length; j++) { centroid[j] = 0; for (k = 0; k < vecinos.size(); k++) { centroid[j] += datos[vecinos.elementAt(k).intValue()][j]; } } for (i = 0; i < datos.length; i++) { if (marcas[i] && i != ej) { for (j = 0; j < datos[0].length; j++) { prototipo[j] = centroid[j] + datos[i][j]; prototipo[j] /= (vecinos.size() + 1); } distancia = KNN.distancia(datos[ej], prototipo); if (distancia < distmin) { distmin = distancia; pos = i; } } } return pos; }
/** * The main method of the class that includes the operations of the algorithm. It includes all the * operations that the algorithm has and finishes when it writes the output information into * files. */ public void run() { int S[]; int i, j, l, m; int nPos = 0, nNeg = 0; int posID; int nClases; int pos; int baraje[]; int tmp; double conjS[][]; int clasesS[]; int tamS = 0; int claseObt; int cont; int busq; boolean marcas[]; int nSel; double conjS2[][]; int clasesS2[]; double minDist, dist; long tiempo = System.currentTimeMillis(); /*CNN PART*/ /*Count of number of positive and negative examples*/ for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == 0) nPos++; else nNeg++; } if (nPos > nNeg) { tmp = nPos; nPos = nNeg; nNeg = tmp; posID = 1; } else { posID = 0; } /*Inicialization of the candidates set*/ S = new int[datosTrain.length]; for (i = 0; i < S.length; i++) S[i] = Integer.MAX_VALUE; /*Inserting an element of mayority class*/ Randomize.setSeed(semilla); pos = Randomize.Randint(0, clasesTrain.length - 1); while (clasesTrain[pos] == posID) pos = (pos + 1) % clasesTrain.length; S[tamS] = pos; tamS++; /*Insert all subset of minority class*/ for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == posID) { S[tamS] = i; tamS++; } } /*Algorithm body. We resort randomly the instances of T and compare with the rest of S. If an instance doesn´t classified correctly, it is inserted in S*/ baraje = new int[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) baraje[i] = i; for (i = 0; i < datosTrain.length; i++) { pos = Randomize.Randint(i, clasesTrain.length - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } for (i = 0; i < datosTrain.length; i++) { if (clasesTrain[i] != posID) { // only for mayority class instances /*Construction of the S set from the previous vector S*/ conjS = new double[tamS][datosTrain[0].length]; clasesS = new int[tamS]; for (j = 0; j < tamS; j++) { for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[S[j]][l]; clasesS[j] = clasesTrain[S[j]]; } /*Do KNN to the instance*/ claseObt = KNN.evaluacionKNN(k, conjS, clasesS, datosTrain[baraje[i]], 2); if (claseObt != clasesTrain[baraje[i]]) { // fail in the class, it is included in S Arrays.sort(S); busq = Arrays.binarySearch(S, baraje[i]); if (busq < 0) { S[tamS] = baraje[i]; tamS++; } } } } /*Construction of the S set from the previous vector S*/ conjS = new double[tamS][datosTrain[0].length]; clasesS = new int[tamS]; for (j = 0; j < tamS; j++) { for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[S[j]][l]; clasesS[j] = clasesTrain[S[j]]; } /*TOMEK LINKS PART*/ /*Inicialization of the instance flagged vector of the S set*/ marcas = new boolean[conjS.length]; for (i = 0; i < conjS.length; i++) { marcas[i] = true; } nSel = conjS.length; for (i = 0; i < conjS.length; i++) { minDist = Double.POSITIVE_INFINITY; pos = 0; for (j = 0; j < conjS.length; j++) { if (i != j) { dist = KNN.distancia(conjS[i], conjS[j]); if (dist < minDist) { minDist = dist; pos = j; } } } if (clasesS[i] != clasesS[pos]) { if (clasesS[i] != posID) { if (marcas[i] == true) { marcas[i] = false; nSel--; } } else { if (marcas[pos] == true) { marcas[pos] = false; nSel--; } } } } /*Construction of the S set from the flags*/ conjS2 = new double[nSel][conjS[0].length]; clasesS2 = new int[nSel]; for (m = 0, l = 0; m < conjS.length; m++) { if (marcas[m]) { // the instance will evaluate for (j = 0; j < conjS[0].length; j++) { conjS2[l][j] = conjS[m][j]; } clasesS2[l] = clasesS[m]; l++; } } System.out.println( "CNN_TomekLinks " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); OutputIS.escribeSalida( ficheroSalida[0], conjS2, clasesS2, entradas, salida, nEntradas, relation); OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation); }
/** * Evaluates a chromosome * * @param datos Reference to the training set * @param real Reference to the training set (real valued) * @param nominal Reference to the training set (nominal valued) * @param nulos Reference to the training set (null values) * @param clases Output attribute of each instance * @param alfa Alpha value of the fitness function * @param kNeigh Number of neighbors for the KNN algorithm * @param nClases Number of classes of the problem * @param distanceEu True= Euclidean distance; False= HVDM */ public void evalua( double datos[][], double real[][], int nominal[][], boolean nulos[][], int clases[], double alfa, int kNeigh, int nClases, boolean distanceEu) { int i, j, l, m; int aciertos = 0; double M, s; double conjS[][]; double conjR[][]; int conjN[][]; boolean conjM[][]; int clasesS[]; int vecinos[]; int claseObt; int vecinoCercano; double dist, minDist; M = (double) datos.length; s = (double) genesActivos(); if (kNeigh > 1) { vecinos = new int[kNeigh]; conjS = new double[(int) s][datos[0].length]; conjR = new double[(int) s][datos[0].length]; conjN = new int[(int) s][datos[0].length]; conjM = new boolean[(int) s][datos[0].length]; clasesS = new int[(int) s]; for (j = 0, l = 0; j < datos.length; j++) { if (cuerpo[j]) { // the instance must be copied to the solution for (m = 0; m < datos[j].length; m++) { conjS[l][m] = datos[j][m]; conjR[l][m] = real[j][m]; conjN[l][m] = nominal[j][m]; conjM[l][m] = nulos[j][m]; } clasesS[l] = clases[j]; l++; } } for (i = 0; i < datos.length; i++) { claseObt = KNN.evaluacionKNN2( kNeigh, conjS, conjR, conjN, conjM, clasesS, datos[i], real[i], nominal[i], nulos[i], nClases, distanceEu, vecinos); if (claseObt >= 0) if (clases[i] == claseObt) aciertos++; } } else { for (i = 0; i < datos.length; i++) { vecinoCercano = -1; minDist = Double.POSITIVE_INFINITY; for (j = 0; j < datos.length; j++) { if (cuerpo[j]) { // It is in S dist = KNN.distancia( datos[i], real[i], nominal[i], nulos[i], datos[j], real[j], nominal[j], nulos[j], distanceEu); if (dist < minDist && dist != 0) { minDist = dist; vecinoCercano = j; } } } if (vecinoCercano >= 0) if (clases[i] == clases[vecinoCercano]) aciertos++; } } calidad = ((double) (aciertos) / M) * alfa * 100.0; calidad += ((1.0 - alfa) * 100.0 * (M - s) / M); cruzado = false; } // end-method