/** It computes the average and standard deviation of the input attributes */ private void computeStatistics() { stdev = new double[this.getnVars()]; average = new double[this.getnVars()]; for (int i = 0; i < this.getnInputs(); i++) { average[i] = 0; for (int j = 0; j < X[i].length; j++) { average[i] += X[i][j]; } average[i] /= X[i].length; } average[average.length - 1] = 0; for (int j = 0; j < outputReal.length; j++) { average[average.length - 1] += outputReal[j]; } average[average.length - 1] /= outputReal.length; for (int i = 0; i < this.getnInputs(); i++) { double sum = 0; for (int j = 0; j < X[i].length; j++) { sum += (X[i][j] - average[i]) * (X[i][j] - average[i]); } sum /= X[i].length; stdev[i] = Math.sqrt(sum); } double sum = 0; for (int j = 0; j < outputReal.length; j++) { sum += (outputReal[j] - average[average.length - 1]) * (outputReal[j] - average[average.length - 1]); } sum /= outputReal.length; stdev[stdev.length - 1] = Math.sqrt(sum); }
/** Applies mutation in the new poblation */ public void mutate() { int posiciones, i, j; double m; posiciones = n_genes * long_poblacion; if (prob_mutacion > 0) while (Mu_next < posiciones) { /* Se determina el cromosoma y el gen que corresponden a la posicion que se va a mutar */ i = Mu_next / n_genes; j = Mu_next % n_genes; /* Se efectua la mutacion sobre ese gen */ poblacion[i].mutate(j); /* Se marca el cromosoma mutado para su posterior evaluacion */ poblacion[i].setEvaluated(false); /* Se calcula la siguiente posicion a mutar */ if (prob_mutacion < 1) { m = Randomize.Rand(); Mu_next += Math.ceil(Math.log(m) / Math.log(1.0 - prob_mutacion)); } else Mu_next += 1; } Mu_next -= posiciones; }
/** It computes the average and standard deviation of the input attributes */ private void computeStatistics() { stdev = new double[this.getNvariables()]; average = new double[this.getNvariables()]; for (int i = 0; i < this.getnInputs(); i++) { average[i] = 0; for (int j = 0; j < this.getNdatos(); j++) { if (!this.isMissing(j, i)) { average[i] += X[j][i]; } } average[i] /= this.getNdatos(); } average[average.length - 1] = 0; for (int j = 0; j < outputReal.length; j++) { average[average.length - 1] += outputReal[j]; } average[average.length - 1] /= outputReal.length; for (int i = 0; i < this.getnInputs(); i++) { double sum = 0; for (int j = 0; j < this.getNdatos(); j++) { if (!this.isMissing(j, i)) { sum += (X[j][i] - average[i]) * (X[j][i] - average[i]); } } sum /= this.getNdatos(); stdev[i] = Math.sqrt(sum); } double sum = 0; for (int j = 0; j < outputReal.length; j++) { sum += (outputReal[j] - average[average.length - 1]) * (outputReal[j] - average[average.length - 1]); } sum /= outputReal.length; stdev[stdev.length - 1] = Math.sqrt(sum); }
private void interpola( double ra[], double rb[], int na[], int nb[], boolean ma[], boolean mb[], double resS[], double resR[], int resN[], boolean resM[]) { int i; double diff; double gap; int suerte; for (i = 0; i < ra.length; i++) { if (ma[i] == true && mb[i] == true) { resM[i] = true; resS[i] = 0; } else { resM[i] = false; if (entradas[i].getType() == Attribute.REAL) { diff = rb[i] - ra[i]; gap = Randomize.Rand(); resR[i] = ra[i] + gap * diff; resS[i] = (ra[i] + entradas[i].getMinAttribute()) / (entradas[i].getMaxAttribute() - entradas[i].getMinAttribute()); } else if (entradas[i].getType() == Attribute.INTEGER) { diff = rb[i] - ra[i]; gap = Randomize.Rand(); resR[i] = Math.round(ra[i] + gap * diff); resS[i] = (ra[i] + entradas[i].getMinAttribute()) / (entradas[i].getMaxAttribute() - entradas[i].getMinAttribute()); } else { suerte = Randomize.Randint(0, 2); if (suerte == 0) { resN[i] = na[i]; } else { resN[i] = nb[i]; } resS[i] = (double) resN[i] / (double) (entradas[i].getNominalValuesList().size() - 1); } } } }
public void ejecutar() { int i, j, l, m; double alfai; int nClases; int claseObt; boolean marcas[]; boolean notFound; int init; int clasSel[]; int baraje[]; int pos, tmp; String instanciasIN[]; String instanciasOUT[]; long tiempo = System.currentTimeMillis(); /* Getting the number of differents classes */ nClases = 0; for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i]; nClases++; /* Shuffle the train set */ baraje = new int[datosTrain.length]; Randomize.setSeed(semilla); for (i = 0; i < datosTrain.length; i++) baraje[i] = i; for (i = 0; i < datosTrain.length; i++) { pos = Randomize.Randint(i, datosTrain.length - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } /* * Inicialization of the flagged instaces vector for a posterior * elimination */ marcas = new boolean[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) marcas[i] = false; if (datosTrain.length > 0) { // marcas[baraje[0]] = true; //the first instance is included always nSel = n_p; if (nSel < nClases) nSel = nClases; } else { System.err.println("Input dataset is empty"); nSel = 0; } clasSel = new int[nClases]; System.out.print("Selecting initial neurons... "); // at least, there must be 1 neuron of each class at the beginning init = nClases; for (i = 0; i < nClases && i < datosTrain.length; i++) { pos = Randomize.Randint(0, datosTrain.length - 1); tmp = 0; while ((clasesTrain[pos] != i || marcas[pos]) && tmp < datosTrain.length) { pos = (pos + 1) % datosTrain.length; tmp++; } if (tmp < datosTrain.length) marcas[pos] = true; else init--; // clasSel[i] = i; } for (i = init; i < Math.min(nSel, datosTrain.length); i++) { tmp = 0; pos = Randomize.Randint(0, datosTrain.length - 1); while (marcas[pos]) { pos = (pos + 1) % datosTrain.length; tmp++; } // if(i<nClases){ // notFound = true; // do{ // for(j=i-1;j>=0 && notFound;j--){ // if(clasSel[j] == clasesTrain[pos]) // notFound = false; // } // if(!notFound) // pos = Randomize.Randint (0, datosTrain.length-1); // }while(!notFound); // } // clasSel[i] = clasesTrain[pos]; marcas[pos] = true; init++; } nSel = init; System.out.println("Initial neurons selected: " + nSel); /* Building of the S set from the flags */ conjS = new double[nSel][datosTrain[0].length]; clasesS = new int[nSel]; for (m = 0, l = 0; m < datosTrain.length; m++) { if (marcas[m]) { // the instance must be copied to the solution for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[m][j]; } clasesS[l] = clasesTrain[m]; l++; } } alfai = alpha; boolean change = true; /* Body of the LVQ algorithm. */ // Train the network for (int it = 0; it < T && change; it++) { change = false; alpha = alfai; for (i = 1; i < datosTrain.length; i++) { // search for the nearest neuron to training instance pos = NN(nSel, conjS, datosTrain[baraje[i]]); // nearest neuron labels correctly the class of training // instance? if (clasesS[pos] != clasesTrain[baraje[i]]) { // NO - repel // the neuron for (j = 0; j < conjS[pos].length; j++) { conjS[pos][j] = conjS[pos][j] - alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]); } change = true; } else { // YES - migrate the neuron towards the input vector for (j = 0; j < conjS[pos].length; j++) { conjS[pos][j] = conjS[pos][j] + alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]); } } alpha = nu * alpha; } // Shuffle again the training partition baraje = new int[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) baraje[i] = i; for (i = 0; i < datosTrain.length; i++) { pos = Randomize.Randint(i, datosTrain.length - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } } System.out.println( "LVQ " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); // Classify the train data set instanciasIN = new String[datosReferencia.length]; instanciasOUT = new String[datosReferencia.length]; for (i = 0; i < datosReferencia.length; i++) { /* Classify the instance selected in this iteration */ Attribute a = Attributes.getOutputAttribute(0); int tipo = a.getType(); claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosReferencia[i], nClases); if (tipo != Attribute.NOMINAL) { instanciasIN[i] = new String(String.valueOf(clasesReferencia[i])); instanciasOUT[i] = new String(String.valueOf(claseObt)); } else { instanciasIN[i] = new String(a.getNominalValue(clasesReferencia[i])); instanciasOUT[i] = new String(a.getNominalValue(claseObt)); } } escribeSalida( ficheroSalida[0], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation); // Classify the test data set normalizarTest(); instanciasIN = new String[datosTest.length]; instanciasOUT = new String[datosTest.length]; for (i = 0; i < datosTest.length; i++) { /* Classify the instance selected in this iteration */ Attribute a = Attributes.getOutputAttribute(0); int tipo = a.getType(); claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosTest[i], nClases); if (tipo != Attribute.NOMINAL) { instanciasIN[i] = new String(String.valueOf(clasesTest[i])); instanciasOUT[i] = new String(String.valueOf(claseObt)); } else { instanciasIN[i] = new String(a.getNominalValue(clasesTest[i])); instanciasOUT[i] = new String(a.getNominalValue(claseObt)); } } escribeSalida( ficheroSalida[1], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation); // Print the network to a file printNetworkToFile(ficheroSalida[2], referencia.getHeader()); }
/** * Computes and stores the information gain of each attribute (variable) of the dataset * * @param Examples Set of instances of the dataset * @param nFile Name of the file */ public void GainInit(TableDat Examples, String nFile) { int i, j, h, v; boolean encontrado; float info_gk, suma, suma1, suma2, p_clase, logaritmo; int num_clase[] = new int[n_clases]; int n_vars = this.getNVars(); int MaxVal = this.getMaxVal(); float p[][] = new float[n_vars][MaxVal]; float p_cond[][][] = new float[n_clases][n_vars][MaxVal]; GI = new float[n_vars]; intervalosGI = new float[n_vars][MaxVal]; String contents; contents = "\n--------------------------------------------\n"; contents += "| Computation of the info gain |\n"; contents += "--------------------------------------------\n"; contents += "Points for computation of the info gain:\n"; // Loads the values for "intervalosGI" float marca, p_corte; for (int v1 = 0; v1 < n_vars; v1++) { if (this.getContinuous(v1) == true) { contents += "\tVariable " + var[v1].getName() + ": "; marca = (this.getMax(v1) - this.getMin(v1)) / ((float) (this.getNLabelVar(v1) - 1)); p_corte = this.getMin(v1) + marca / 2; for (int et = 0; et < this.getNLabelVar(v1); et++) { intervalosGI[v1][et] = p_corte; contents += intervalosGI[v1][et] + " "; p_corte += marca; } contents += "\n"; } } // Structure initialization for (i = 0; i < n_clases; i++) num_clase[i] = 0; for (i = 0; i < n_vars; i++) for (j = 0; j < MaxVal; j++) { p[i][j] = 0; // Simple probabilities matrix for (h = 0; h < n_clases; h++) p_cond[h][i][j] = 0; // Conditional probabilities matrix } // Computation of the Simple and Conditional probabilities matrixs for (i = 0; i < Examples.getNEx(); i++) { num_clase[Examples.getClass(i)]++; // distribution by classes for (j = 0; j < n_vars; j++) { // distribution by values if (!this.getContinuous(j)) { // Discrete variable if (!Examples.getLost(this, i, j)) { // if the value is not a lost one p[j][(int) Examples.getDat(i, j)]++; p_cond[(int) Examples.getClass(i)][j][(int) Examples.getDat(i, j)]++; } } else { // Continuous variable encontrado = false; h = 0; while (!encontrado && h < this.getNLabelVar(j)) { if (Examples.getDat(i, j) <= intervalosGI[j][h]) encontrado = true; else h++; } if (encontrado == true) { p[j][h]++; p_cond[(int) Examples.getClass(i)][j][h]++; } else { if (!Examples.getLost(this, i, j)) { // Lost value System.out.println( "Fallo al calcular la ganancia de infor, Variable " + j + " Ejemplo " + i); return; } } } } } for (h = 0; h < n_clases; h++) for (i = 0; i < n_vars; i++) { if (!this.getContinuous(i)) // Discrete variable for (j = (int) this.getMin(i); j <= (int) this.getMax(i); j++) p_cond[h][i][j] = p_cond[h][i][j] / Examples.getNEx(); else // Continuous variable for (j = 0; j < this.getNLabelVar(i); j++) p_cond[h][i][j] = p_cond[h][i][j] / Examples.getNEx(); } for (i = 0; i < n_vars; i++) { if (!this.getContinuous(i)) // Discrete variable for (j = (int) this.getMin(i); j <= (int) this.getMax(i); j++) p[i][j] = p[i][j] / Examples.getNEx(); else // Continuous variable for (j = 0; j < this.getNLabelVar(i); j++) p[i][j] = p[i][j] / Examples.getNEx(); } // Info Gk computation suma = 0; for (i = 0; i < n_clases; i++) { p_clase = ((float) num_clase[i]) / Examples.getNEx(); if (p_clase > 0) { logaritmo = (float) (Math.log((double) p_clase) / Math.log(2)); suma += p_clase * logaritmo; } } info_gk = (-1) * suma; // Information gain computation for each attibute for (v = 0; v < n_vars; v++) { suma = info_gk; suma1 = 0; if (!this.getContinuous(v)) { // Discrete Variable for (i = (int) this.getMin(v); i <= (int) this.getMax(v); i++) { suma2 = 0; for (j = 0; j < n_clases; j++) if (p_cond[j][v][i] > 0) { logaritmo = (float) (Math.log(p_cond[j][v][i]) / Math.log(2)); suma2 += p_cond[j][v][i] * logaritmo; } suma1 += p[v][i] * (-1) * suma2; } } else { // Continuous Variable for (i = 0; i < this.getNLabelVar(v); i++) { suma2 = 0; for (j = 0; j < n_clases; j++) if (p_cond[j][v][i] > 0) { logaritmo = (float) (Math.log(p_cond[j][v][i]) / Math.log(2)); suma2 += p_cond[j][v][i] * logaritmo; } suma1 += p[v][i] * (-1) * suma2; } } GI[v] = suma + (-1) * suma1; } contents += "Information Gain of the variables:\n"; for (v = 0; v < n_vars; v++) { if (this.getContinuous(v) == true) contents += "\tVariable " + var[v].getName() + ": " + GI[v] + "\n"; } if (nFile != "") Files.addToFile(nFile, contents); }
/** * SMOTE preprocessing procedure * * @param datosTrain input training dta * @param realTrain actual training data * @param nominalTrain nominal attribute values * @param nulosTrain null values * @param clasesTrain training classes * @param datosArt synthetic instances */ public void SMOTE( double datosTrain[][], double realTrain[][], int nominalTrain[][], boolean nulosTrain[][], int clasesTrain[], double datosArt[][], double realArt[][], int nominalArt[][], boolean nulosArt[][], int clasesArt[], int kSMOTE, int ASMO, double smoting, boolean balance, int nPos, int posID, int nNeg, int negID, boolean distanceEu) { int i, j, l, m; int tmp, pos; int positives[]; int neighbors[][]; double genS[][]; double genR[][]; int genN[][]; boolean genM[][]; int clasesGen[]; int nn; /* Localize the positive instances */ positives = new int[nPos]; for (i = 0, j = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == posID) { positives[j] = i; j++; } } /* Randomize the instance presentation */ for (i = 0; i < positives.length; i++) { tmp = positives[i]; pos = Randomize.Randint(0, positives.length - 1); positives[i] = positives[pos]; positives[pos] = tmp; } /* Obtain k-nearest neighbors of each positive instance */ neighbors = new int[positives.length][kSMOTE]; for (i = 0; i < positives.length; i++) { switch (ASMO) { case 0: KNN.evaluacionKNN2( kSMOTE, datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosTrain[positives[i]], realTrain[positives[i]], nominalTrain[positives[i]], nulosTrain[positives[i]], Math.max(posID, negID) + 1, distanceEu, neighbors[i]); break; case 1: evaluacionKNNClass( kSMOTE, datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosTrain[positives[i]], realTrain[positives[i]], nominalTrain[positives[i]], nulosTrain[positives[i]], Math.max(posID, negID) + 1, distanceEu, neighbors[i], posID); break; case 2: evaluacionKNNClass( kSMOTE, datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosTrain[positives[i]], realTrain[positives[i]], nominalTrain[positives[i]], nulosTrain[positives[i]], Math.max(posID, negID) + 1, distanceEu, neighbors[i], negID); break; } } /* Interpolation of the minority instances */ if (balance) { genS = new double[nNeg - nPos][datosTrain[0].length]; genR = new double[nNeg - nPos][datosTrain[0].length]; genN = new int[nNeg - nPos][datosTrain[0].length]; genM = new boolean[nNeg - nPos][datosTrain[0].length]; clasesGen = new int[nNeg - nPos]; } else { genS = new double[(int) (nPos * smoting)][datosTrain[0].length]; genR = new double[(int) (nPos * smoting)][datosTrain[0].length]; genN = new int[(int) (nPos * smoting)][datosTrain[0].length]; genM = new boolean[(int) (nPos * smoting)][datosTrain[0].length]; clasesGen = new int[(int) (nPos * smoting)]; } for (i = 0; i < genS.length; i++) { clasesGen[i] = posID; nn = Randomize.Randint(0, kSMOTE - 1); interpola( realTrain[positives[i % positives.length]], realTrain[neighbors[i % positives.length][nn]], nominalTrain[positives[i % positives.length]], nominalTrain[neighbors[i % positives.length][nn]], nulosTrain[positives[i % positives.length]], nulosTrain[neighbors[i % positives.length][nn]], genS[i], genR[i], genN[i], genM[i]); } for (j = 0; j < datosTrain.length; j++) { for (l = 0; l < datosTrain[0].length; l++) { datosArt[j][l] = datosTrain[j][l]; realArt[j][l] = realTrain[j][l]; nominalArt[j][l] = nominalTrain[j][l]; nulosArt[j][l] = nulosTrain[j][l]; } clasesArt[j] = clasesTrain[j]; } for (m = 0; j < datosArt.length; j++, m++) { for (l = 0; l < datosTrain[0].length; l++) { datosArt[j][l] = genS[m][l]; realArt[j][l] = genR[m][l]; nominalArt[j][l] = genN[m][l]; nulosArt[j][l] = genM[m][l]; } clasesArt[j] = clasesGen[m]; } }
/** Process the training and test files provided in the parameters file to the constructor. */ public void process() { // declarations double[] outputs; double[] outputs2; Instance neighbor; double dist, mean; int actual; Randomize rnd = new Randomize(); Instance ex; gCenter kmeans = null; int iterations = 0; double E; double prevE; int totalMissing = 0; boolean allMissing = true; rnd.setSeed(semilla); // PROCESS try { // Load in memory a dataset that contains a classification problem IS.readSet(input_train_name, true); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][nvariables]; // matrix with transformed data kmeans = new gCenter(K, ndatos, nvariables); timesSeen = new FreqList[nvariables]; mostCommon = new String[nvariables]; // first, we choose k 'means' randomly from all // instances totalMissing = 0; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); if (inst.existsAnyMissingValue()) totalMissing++; } if (totalMissing == ndatos) allMissing = true; else allMissing = false; for (int numMeans = 0; numMeans < K; numMeans++) { do { actual = (int) (ndatos * rnd.Rand()); ex = IS.getInstance(actual); } while (ex.existsAnyMissingValue() && !allMissing); kmeans.copyCenter(ex, numMeans); } // now, iterate adjusting clusters' centers and // instances to them prevE = 0; iterations = 0; do { for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); kmeans.setClusterOf(inst, i); } // set new centers kmeans.recalculateCenters(IS); // compute RMSE E = 0; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); E += kmeans.distance(inst, kmeans.getClusterOf(i)); } iterations++; // System.out.println(iterations+"\t"+E); if (Math.abs(prevE - E) == 0) iterations = maxIter; else prevE = E; } while (E > minError && iterations < maxIter); for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); in = 0; out = 0; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) { X[i][j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } out++; } } } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_train_name); /** ************************************************************************************ */ // does a test file associated exist? if (input_train_name.compareTo(input_test_name) != 0) { try { // Load in memory a dataset that contains a classification problem IStest.readSet(input_test_name, false); int in = 0; int out = 0; ndatos = IStest.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); for (int i = 0; i < ndatos; i++) { Instance inst = IStest.getInstance(i); in = 0; out = 0; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) { X[i][j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } out++; } } } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_test_name); } }