/** * Chi square distribution * * @param x Chi^2 value * @param n Degrees of freedom * @return P-value associated */ private static double ChiSq(double x, int n) { if (n == 1 & x > 1000) { return 0; } if (x > 1000 | n > 1000) { double q = ChiSq((x - n) * (x - n) / (2 * n), 1) / 2; if (x > n) { return q; } { return 1 - q; } } double p = Math.exp(-0.5 * x); if ((n % 2) == 1) { p = p * Math.sqrt(2 * x / Math.PI); } double k = n; while (k >= 2) { p = p * x / k; k = k - 2; } double t = p; double a = n; while (t > 0.0000000001 * p) { a = a + 2; t = t * x / a; p = p + t; } return 1 - p; }
/** * Claculates the upper bound for the Chi-Squared value of a rule. * * @param suppAnte the support for the antecedent of a rule. * @param suppCons the support for the consequent of a rule. * @return the Chi-Squared upper bound. */ private double calcChiSquaredUpperBound(double suppAnte, double suppCons) { double term; // Test support for antecedent and confidence and choose minimum if (suppAnte < suppCons) term = Math.pow(suppAnte - ((suppAnte * suppCons) / numRecords), 2.0); else term = Math.pow(suppCons - ((suppAnte * suppCons) / numRecords), 2.0); // Determine e double eVlaue = calcWCSeValue(suppAnte, suppCons); // Rerturn upper bound return (term * eVlaue * numRecords); }
/** * This private method computes the distance between an example in the dataset and a cluster * centroid. The distance is measure as the square root of the sum of the squares of the * differences between the example and the cetroid for all the dimensions. * * @param a The example in the dataset * @param b The culster centroid * @return The distance between a and b as a double precision float value. */ private static double distance(double a[], double b[]) { // Euclid distance between two patterns double d = 0; for (int i = 0; i < a.length; i++) d += (a[i] - b[i]) * (a[i] - b[i]); return (double) Math.sqrt(d); }
/** * Computes the ouput of a RBF * * @param _input Input vector * @return The ouput of a RBF */ public double evaluationRbf(double[] _input) { double aux; aux = RBFUtils.euclidean(_input, centre); aux *= aux; aux /= (2.0 * radius * radius); return (Math.exp(-aux)); }
/** * It converts double to char by the gray's code * * @param ds the vector which is goint to change * @param length the size of the vector * @return the changed vector */ private char[] StringRep(double[] ds, int length) { int i; double n; int pos; double INCREMENTO; char[] Cad_sal; Cad_sal = new char[Genes * BITS_GEN + 1]; if (flag == 1) { tmpstring = new char[Genes * BITS_GEN]; flag = 0; } pos = 0; for (i = 0; i < length; i++) { INCREMENTO = (Gene[i].max() - Gene[i].min()) / (Math.pow(2.0, (double) BITS_GEN) - 1.0); n = (((ds[i] - Gene[i].min()) / INCREMENTO) + 0.5); tmpstring = F.Itoc((int) n, BITS_GEN); F.Gray(tmpstring, Cad_sal, BITS_GEN, pos); pos += BITS_GEN; } return Cad_sal; }
/** * Computes the euclidean distance between a neuron and a vector * * @param v A vector * @return A double with the euclidean distance */ public double euclideaDist(double[] v) { int i; double aux = 0; for (i = 0; i < nInput; i++) aux += (v[i] - centre[i]) * (v[i] - centre[i]); return (Math.sqrt(aux)); }
/** * Fisher distribution * * @param f Fisher value * @param n1 N1 value * @param n2 N2 value * @return P-value associated */ private static double FishF(double f, int n1, int n2) { double x = n2 / (n1 * f + n2); if ((n1 % 2) == 0) { return StatCom(1 - x, n2, n1 + n2 - 4, n2 - 2) * Math.pow(x, n2 / 2.0); } if ((n2 % 2) == 0) { return 1 - StatCom(x, n1, n1 + n2 - 4, n1 - 2) * Math.pow(1 - x, n1 / 2.0); } double th = Math.atan(Math.sqrt(n1 * f / (1.0 * n2))); double a = th / (Math.PI / 2.0); double sth = Math.sin(th); double cth = Math.cos(th); if (n2 > 1) { a = a + sth * cth * StatCom(cth * cth, 2, n2 - 3, -1) / (Math.PI / 2.0); } if (n1 == 1) { return 1 - a; } double c = 4 * StatCom(sth * sth, n2 + 1, n1 + n2 - 4, n2 - 2) * sth * Math.pow(cth, n2) / Math.PI; if (n2 == 1) { return 1 - a + c / 2.0; } int k = 2; while (k <= (n2 - 1) / 2.0) { c = c * k / (k - .5); k = k + 1; } return 1 - a + c; }
/** * Calculates the Chi squared values and returns their sum. * * @return the sum of the Chi Squared values. */ private double calcChiSquaredValue() { double sumChiSquaredValues = 0.0; for (int index = 0; index < obsValues.length; index++) { double chiValue = Math.pow((obsValues[index] - expValues[index]), 2.0) / expValues[index]; sumChiSquaredValues = sumChiSquaredValues + chiValue; } // Return return (sumChiSquaredValues); }
private double computeAdjustedResidual(int data[][], int classData[], Rule regla, int clase) { double suma = 0; int i; for (i = 0; i < regla.getRule().length; i++) { suma += computeStandarizedResidual(data, classData, regla.getiCondition(i), clase) / Math.sqrt( computeMaximumLikelohoodEstimate(data, classData, regla.getiCondition(i), clase)); } return suma; }
/** * It calculate the matching degree between the antecedent of the rule and a given example * * @param indiv Individual The individual representing a fuzzy rule * @param ejemplo double [] A given example * @return double The matching degree between the example and the antecedent of the rule */ double Matching_degree(Individual indiv, double[] ejemplo) { int i, sig; double result, suma, numerador, denominador, sigma, ancho_intervalo; suma = 0.0; for (i = 0; i < entradas; i++) { ancho_intervalo = train.getMax(i) - train.getMin(i); sigma = -1.0; sig = indiv.antecedente[i].sigma; switch (sig) { case 1: sigma = 0.3; break; case 2: sigma = 0.4; break; case 3: sigma = 0.5; break; case 4: sigma = 0.6; break; } sigma *= ancho_intervalo; numerador = Math.pow((ejemplo[i] - indiv.antecedente[i].m), 2.0); denominador = Math.pow(sigma, 2.0); suma += (numerador / denominador); } suma *= -1.0; result = Math.exp(suma); return (result); }
private void interpola( double ra[], double rb[], int na[], int nb[], boolean ma[], boolean mb[], double resS[], double resR[], int resN[], boolean resM[]) { int i; double diff; double gap; int suerte; for (i = 0; i < ra.length; i++) { if (ma[i] == true && mb[i] == true) { resM[i] = true; resS[i] = 0; } else { resM[i] = false; if (entradas[i].getType() == Attribute.REAL) { diff = rb[i] - ra[i]; gap = Randomize.Rand(); resR[i] = ra[i] + gap * diff; resS[i] = (ra[i] + entradas[i].getMinAttribute()) / (entradas[i].getMaxAttribute() - entradas[i].getMinAttribute()); } else if (entradas[i].getType() == Attribute.INTEGER) { diff = rb[i] - ra[i]; gap = Randomize.Rand(); resR[i] = Math.round(ra[i] + gap * diff); resS[i] = (ra[i] + entradas[i].getMinAttribute()) / (entradas[i].getMaxAttribute() - entradas[i].getMinAttribute()); } else { suerte = Randomize.Randint(0, 2); if (suerte == 0) { resN[i] = na[i]; } else { resN[i] = nb[i]; } resS[i] = (double) resN[i] / (double) (entradas[i].getNominalValuesList().size() - 1); } } } }
/** * It Evaluates the performance of the fuzzy system. The Mean Square Error (MSE) by training is * used */ public double Evaluate_fuzzy_system() { int i; double result, suma, fuerza; suma = 0.0; for (i = 0; i < train.getnData(); i++) { fuerza = Output_fuzzy_system(train.getExample(i)); suma += Math.pow(train.getOutputAsReal(i) - fuerza, 2.0); } result = suma / train.getnData(); /* We want to have a maximization problem so, we invert the error */ if (result != 0.0) { result = 1.0 / result; } return (result); }
/** * It Evaluates the performance of the best evolved fuzzy system on test data. The Mean Square * Error (MSE) is used * * @return double The MSE error in test data */ public double Evaluate_best_fuzzy_system_in_test() { int i; double result, suma, fuerza; SistemaDifuso.clear(); for (i = 0; i < Nr; i++) { Individual indi = new Individual(BestSistemaDifuso.get(i)); SistemaDifuso.add(indi); } suma = 0.0; for (i = 0; i < test.getnData(); i++) { fuerza = Output_fuzzy_system(test.getExample(i)); suma += Math.pow(test.getOutputAsReal(i) - fuerza, 2.0); } result = suma / test.getnData(); return (result); }
/** * It calculates PCBLX * * @param d it multiplies the module of the difference of the parents * @param P1 it is a father to cross * @param P2 it is the other father to do the cross * @param Hijo1 the son obtained * @param Hijo2 the other obtained * @param gens the size of the vector */ private void xPC_BLX( double d, double[] P1, double[] P2, double[] Hijo1, double[] Hijo2, int gens) { double I, A1, C1; int i; for (i = 0; i < gens; i++) { I = d * Math.abs(P1[i] - P2[i]); A1 = P1[i] - I; if (A1 < Gene[i].min()) A1 = Gene[i].min(); C1 = P1[i] + I; if (C1 > Gene[i].max()) C1 = Gene[i].max(); Hijo1[i] = A1 + Randomize.Rand() * (C1 - A1); A1 = P2[i] - I; if (A1 < Gene[i].min()) A1 = Gene[i].min(); C1 = P2[i] + I; if (C1 > Gene[i].max()) C1 = Gene[i].max(); Hijo2[i] = A1 + Randomize.Rand() * (C1 - A1); } }
/** * This method runs the multiple comparison tests * * @param code A value to codify which post-hoc methods apply * @param results Array with the results of the methods * @param algorithmName Array with the name of the methods employed * @return A string with the contents of the test in LaTeX format */ private static String runMultiple(double[][] results, String algorithmName[]) { int i, j, k; int posicion; double mean[][]; MultiplePair orden[][]; MultiplePair rank[][]; boolean encontrado; int ig; double sum; boolean visto[]; Vector<Integer> porVisitar; double Rj[]; double friedman; double sumatoria = 0; double termino1, termino2, termino3; double iman; boolean vistos[]; int pos, tmp, counter; String cad; double maxVal; double Pi[]; double ALPHAiHolm[]; double ALPHAiShaffer[]; String ordenAlgoritmos[]; double ordenRankings[]; int order[]; double adjustedP[][]; double SE; boolean parar; Vector<Integer> indices = new Vector<Integer>(); Vector<Vector<Relation>> exhaustiveI = new Vector<Vector<Relation>>(); boolean[][] cuadro; double minPi, tmpPi, maxAPi, tmpAPi; Relation[] parejitas; Vector<Integer> T; int Tarray[]; DecimalFormat nf4 = (DecimalFormat) DecimalFormat.getInstance(); nf4.setMaximumFractionDigits(4); nf4.setMinimumFractionDigits(0); DecimalFormatSymbols dfs = nf4.getDecimalFormatSymbols(); dfs.setDecimalSeparator('.'); nf4.setDecimalFormatSymbols(dfs); DecimalFormat nf6 = (DecimalFormat) DecimalFormat.getInstance(); nf6.setMaximumFractionDigits(6); nf6.setMinimumFractionDigits(0); nf6.setDecimalFormatSymbols(dfs); String out = ""; int nDatasets = Configuration.getNDatasets(); Iman = Configuration.isIman(); Nemenyi = Configuration.isNemenyi(); Bonferroni = Configuration.isBonferroni(); Holm = Configuration.isHolm(); Hoch = Configuration.isHochberg(); Hommel = Configuration.isHommel(); Scha = Configuration.isShaffer(); Berg = Configuration.isBergman(); mean = new double[nDatasets][algorithmName.length]; // Maximize performance if (Configuration.getObjective() == 1) { /*Compute the average performance per algorithm for each data set*/ for (i = 0; i < nDatasets; i++) { for (j = 0; j < algorithmName.length; j++) { mean[i][j] = results[j][i]; } } } // Minimize performance else { double maxValue = Double.MIN_VALUE; /*Compute the average performance per algorithm for each data set*/ for (i = 0; i < nDatasets; i++) { for (j = 0; j < algorithmName.length; j++) { if (results[j][i] > maxValue) { maxValue = results[j][i]; } mean[i][j] = (-1.0 * results[j][i]); } } for (i = 0; i < nDatasets; i++) { for (j = 0; j < algorithmName.length; j++) { mean[i][j] += maxValue; } } } /*We use the pareja structure to compute and order rankings*/ orden = new MultiplePair[nDatasets][algorithmName.length]; for (i = 0; i < nDatasets; i++) { for (j = 0; j < algorithmName.length; j++) { orden[i][j] = new MultiplePair(j, mean[i][j]); } Arrays.sort(orden[i]); } /*building of the rankings table per algorithms and data sets*/ rank = new MultiplePair[nDatasets][algorithmName.length]; posicion = 0; for (i = 0; i < nDatasets; i++) { for (j = 0; j < algorithmName.length; j++) { encontrado = false; for (k = 0; k < algorithmName.length && !encontrado; k++) { if (orden[i][k].indice == j) { encontrado = true; posicion = k + 1; } } rank[i][j] = new MultiplePair(posicion, orden[i][posicion - 1].valor); } } /*In the case of having the same performance, the rankings are equal*/ for (i = 0; i < nDatasets; i++) { visto = new boolean[algorithmName.length]; porVisitar = new Vector<Integer>(); Arrays.fill(visto, false); for (j = 0; j < algorithmName.length; j++) { porVisitar.removeAllElements(); sum = rank[i][j].indice; visto[j] = true; ig = 1; for (k = j + 1; k < algorithmName.length; k++) { if (rank[i][j].valor == rank[i][k].valor && !visto[k]) { sum += rank[i][k].indice; ig++; porVisitar.add(new Integer(k)); visto[k] = true; } } sum /= (double) ig; rank[i][j].indice = sum; for (k = 0; k < porVisitar.size(); k++) { rank[i][((Integer) porVisitar.elementAt(k)).intValue()].indice = sum; } } } /*compute the average ranking for each algorithm*/ Rj = new double[algorithmName.length]; for (i = 0; i < algorithmName.length; i++) { Rj[i] = 0; for (j = 0; j < nDatasets; j++) { Rj[i] += rank[j][i].indice / ((double) nDatasets); } } /*Print the average ranking per algorithm*/ out += "\n\nAverage ranks obtained by applying the Friedman procedure\n\n"; out += "\\begin{table}[!htp]\n" + "\\centering\n" + "\\begin{tabular}{|c|c|}\\hline\n" + "Algorithm&Ranking\\\\\\hline\n"; for (i = 0; i < algorithmName.length; i++) { out += (String) algorithmName[i] + " & " + nf4.format(Rj[i]) + "\\\\\n"; } out += "\\hline\n\\end{tabular}\n\\caption{Average Rankings of the algorithms}\n\\end{table}"; /*Compute the Friedman statistic*/ termino1 = (12 * (double) nDatasets) / ((double) algorithmName.length * ((double) algorithmName.length + 1)); termino2 = (double) algorithmName.length * ((double) algorithmName.length + 1) * ((double) algorithmName.length + 1) / (4.0); for (i = 0; i < algorithmName.length; i++) { sumatoria += Rj[i] * Rj[i]; } friedman = (sumatoria - termino2) * termino1; out += "\n\nFriedman statistic considering reduction performance (distributed according to chi-square with " + (algorithmName.length - 1) + " degrees of freedom: " + nf6.format(friedman) + ".\n\n"; double pFriedman; pFriedman = ChiSq(friedman, (algorithmName.length - 1)); System.out.print("P-value computed by Friedman Test: " + pFriedman + ".\\newline\n\n"); /*Compute the Iman-Davenport statistic*/ if (Iman) { iman = ((nDatasets - 1) * friedman) / (nDatasets * (algorithmName.length - 1) - friedman); out += "Iman and Davenport statistic considering reduction performance (distributed according to F-distribution with " + (algorithmName.length - 1) + " and " + (algorithmName.length - 1) * (nDatasets - 1) + " degrees of freedom: " + nf6.format(iman) + ".\n\n"; double pIman; pIman = FishF(iman, (algorithmName.length - 1), (algorithmName.length - 1) * (nDatasets - 1)); System.out.print("P-value computed by Iman and Daveport Test: " + pIman + ".\\newline\n\n"); } termino3 = Math.sqrt( (double) algorithmName.length * ((double) algorithmName.length + 1) / (6.0 * (double) nDatasets)); out += "\n\n\\pagebreak\n\n"; /** ********** NxN COMPARISON ************* */ out += "\\section{Post hoc comparisons}"; out += "\n\nResults achieved on post hoc comparisons for $\\alpha = 0.05$, $\\alpha = 0.10$ and adjusted p-values.\n\n"; /*Compute the unadjusted p_i value for each comparison alpha=0.05*/ Pi = new double[(int) combinatoria(2, algorithmName.length)]; ALPHAiHolm = new double[(int) combinatoria(2, algorithmName.length)]; ALPHAiShaffer = new double[(int) combinatoria(2, algorithmName.length)]; ordenAlgoritmos = new String[(int) combinatoria(2, algorithmName.length)]; ordenRankings = new double[(int) combinatoria(2, algorithmName.length)]; order = new int[(int) combinatoria(2, algorithmName.length)]; parejitas = new Relation[(int) combinatoria(2, algorithmName.length)]; T = new Vector<Integer>(); T = trueHShaffer(algorithmName.length); Tarray = new int[T.size()]; for (i = 0; i < T.size(); i++) { Tarray[i] = ((Integer) T.elementAt(i)).intValue(); } Arrays.sort(Tarray); SE = termino3; vistos = new boolean[(int) combinatoria(2, algorithmName.length)]; for (i = 0, k = 0; i < algorithmName.length; i++) { for (j = i + 1; j < algorithmName.length; j++, k++) { ordenRankings[k] = Math.abs(Rj[i] - Rj[j]); ordenAlgoritmos[k] = (String) algorithmName[i] + " vs. " + (String) algorithmName[j]; parejitas[k] = new Relation(i, j); } } Arrays.fill(vistos, false); for (i = 0; i < ordenRankings.length; i++) { for (j = 0; vistos[j] == true; j++) ; pos = j; maxVal = ordenRankings[j]; for (j = j + 1; j < ordenRankings.length; j++) { if (vistos[j] == false && ordenRankings[j] > maxVal) { pos = j; maxVal = ordenRankings[j]; } } vistos[pos] = true; order[i] = pos; } /*Computing the logically related hypotheses tests (Shaffer and Bergmann-Hommel)*/ pos = 0; tmp = Tarray.length - 1; for (i = 0; i < order.length; i++) { Pi[i] = 2 * CDF_Normal.normp((-1) * Math.abs((ordenRankings[order[i]]) / SE)); ALPHAiHolm[i] = 0.05 / ((double) order.length - (double) i); ALPHAiShaffer[i] = 0.05 / ((double) order.length - (double) Math.max(pos, i)); if (i == pos && Pi[i] <= ALPHAiShaffer[i]) { tmp--; pos = (int) combinatoria(2, algorithmName.length) - Tarray[tmp]; } } out += "\\subsection{P-values for $\\alpha=0.05$}\n\n"; int count = 4; if (Holm) { count++; } if (Scha) { count++; } out += "\\begin{table}[!htp]\n\\centering\\scriptsize\n" + "\\begin{tabular}{" + printC(count) + "}\n" + "$i$&algorithms&$z=(R_0 - R_i)/SE$&$p$"; if (Holm) { out += "&Holm"; } if (Scha) { out += "&Shaffer"; } out += "\\\\\n\\hline"; for (i = 0; i < order.length; i++) { out += (order.length - i) + "&" + ordenAlgoritmos[order[i]] + "&" + nf6.format(Math.abs((ordenRankings[order[i]]) / SE)) + "&" + nf6.format(Pi[i]); if (Holm) { out += "&" + nf6.format(ALPHAiHolm[i]); } if (Scha) { out += "&" + nf6.format(ALPHAiShaffer[i]); } out += "\\\\\n"; } out += "\\hline\n" + "\\end{tabular}\n\\caption{P-values Table for $\\alpha=0.05$}\n" + "\\end{table}"; /*Compute the rejected hipotheses for each test*/ if (Nemenyi) { out += "Nemenyi's procedure rejects those hypotheses that have a p-value $\\le" + nf6.format(0.05 / (double) (order.length)) + "$.\n\n"; } if (Holm) { parar = false; for (i = 0; i < order.length && !parar; i++) { if (Pi[i] > ALPHAiHolm[i]) { out += "Holm's procedure rejects those hypotheses that have a p-value $\\le" + nf6.format(ALPHAiHolm[i]) + "$.\n\n"; parar = true; } } } if (Scha) { parar = false; for (i = 0; i < order.length && !parar; i++) { if (Pi[i] <= ALPHAiShaffer[i]) { out += "Shaffer's procedure rejects those hypotheses that have a p-value $\\le" + nf6.format(ALPHAiShaffer[i]) + "$.\n\n"; parar = true; } } } /*For Bergmann-Hommel's procedure, 9 algorithms could suppose intense computation*/ if (algorithmName.length <= MAX_ALGORITHMS) { for (i = 0; i < algorithmName.length; i++) { indices.add(new Integer(i)); } exhaustiveI = obtainExhaustive(indices); cuadro = new boolean[algorithmName.length][algorithmName.length]; for (i = 0; i < algorithmName.length; i++) { Arrays.fill(cuadro[i], false); } for (i = 0; i < exhaustiveI.size(); i++) { minPi = 2 * CDF_Normal.normp( (-1) * Math.abs( Rj[ ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)) .elementAt(0)) .i] - Rj[ ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)) .elementAt(0)) .j]) / SE); for (j = 1; j < ((Vector<Relation>) exhaustiveI.elementAt(i)).size(); j++) { tmpPi = 2 * CDF_Normal.normp( (-1) * Math.abs( Rj[ ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)) .elementAt(j)) .i] - Rj[ ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)) .elementAt(j)) .j]) / SE); if (tmpPi < minPi) { minPi = tmpPi; } } if (minPi > (0.05 / ((double) ((Vector<Relation>) exhaustiveI.elementAt(i)).size()))) { for (j = 0; j < ((Vector<Relation>) exhaustiveI.elementAt(i)).size(); j++) { cuadro[((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)).elementAt(j)).i][ ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)).elementAt(j)).j] = true; } } } if (Berg) { cad = ""; cad += "Bergmann's procedure rejects these hypotheses:\n\n"; cad += "\\begin{itemize}\n\n"; counter = 0; for (i = 0; i < cuadro.length; i++) { for (j = i + 1; j < cuadro.length; j++) { if (cuadro[i][j] == false) { cad += "\\item " + algorithmName[i] + " vs. " + algorithmName[j] + "\n\n"; counter++; } } } cad += "\\end{itemize}\n\n"; if (counter > 0) { out += cad; } else { out += "Bergmann's procedure does not reject any hypotheses.\n\n"; } } } out += "\\pagebreak\n\n"; out += "\\subsection{P-values for $\\alpha=0.10$}\n\n"; /*Compute the unadjusted p_i value for each comparison alpha=0.10*/ Pi = new double[(int) combinatoria(2, algorithmName.length)]; ALPHAiHolm = new double[(int) combinatoria(2, algorithmName.length)]; ALPHAiShaffer = new double[(int) combinatoria(2, algorithmName.length)]; ordenAlgoritmos = new String[(int) combinatoria(2, algorithmName.length)]; ordenRankings = new double[(int) combinatoria(2, algorithmName.length)]; order = new int[(int) combinatoria(2, algorithmName.length)]; SE = termino3; vistos = new boolean[(int) combinatoria(2, algorithmName.length)]; for (i = 0, k = 0; i < algorithmName.length; i++) { for (j = i + 1; j < algorithmName.length; j++, k++) { ordenRankings[k] = Math.abs(Rj[i] - Rj[j]); ordenAlgoritmos[k] = (String) algorithmName[i] + " vs. " + (String) algorithmName[j]; } } Arrays.fill(vistos, false); for (i = 0; i < ordenRankings.length; i++) { for (j = 0; vistos[j] == true; j++) ; pos = j; maxVal = ordenRankings[j]; for (j = j + 1; j < ordenRankings.length; j++) { if (vistos[j] == false && ordenRankings[j] > maxVal) { pos = j; maxVal = ordenRankings[j]; } } vistos[pos] = true; order[i] = pos; } /*Computing the logically related hypotheses tests (Shaffer and Bergmann-Hommel)*/ pos = 0; tmp = Tarray.length - 1; for (i = 0; i < order.length; i++) { Pi[i] = 2 * CDF_Normal.normp((-1) * Math.abs((ordenRankings[order[i]]) / SE)); ALPHAiHolm[i] = 0.1 / ((double) order.length - (double) i); ALPHAiShaffer[i] = 0.1 / ((double) order.length - (double) Math.max(pos, i)); if (i == pos && Pi[i] <= ALPHAiShaffer[i]) { tmp--; pos = (int) combinatoria(2, algorithmName.length) - Tarray[tmp]; } } count = 4; if (Holm) { count++; } if (Scha) { count++; } out += "\\begin{table}[!htp]\n\\centering\\scriptsize\n" + "\\begin{tabular}{" + printC(count) + "}\n" + "$i$&algorithms&$z=(R_0 - R_i)/SE$&$p$"; if (Holm) { out += "&Holm"; } if (Scha) { out += "&Shaffer"; } out += "\\\\\n\\hline"; for (i = 0; i < order.length; i++) { out += (order.length - i) + "&" + ordenAlgoritmos[order[i]] + "&" + nf6.format(Math.abs((ordenRankings[order[i]]) / SE)) + "&" + nf6.format(Pi[i]); if (Holm) { out += "&" + nf6.format(ALPHAiHolm[i]); } if (Scha) { out += "&" + nf6.format(ALPHAiShaffer[i]); } out += "\\\\\n"; } out += "\\hline\n" + "\\end{tabular}\n\\caption{P-values Table for $\\alpha=0.10$}\n" + "\\end{table}"; /*Compute the rejected hipotheses for each test*/ if (Nemenyi) { out += "Nemenyi's procedure rejects those hypotheses that have a p-value $\\le" + nf6.format(0.10 / (double) (order.length)) + "$.\n\n"; } if (Holm) { parar = false; for (i = 0; i < order.length && !parar; i++) { if (Pi[i] > ALPHAiHolm[i]) { out += "Holm's procedure rejects those hypotheses that have a p-value $\\le" + nf6.format(ALPHAiHolm[i]) + "$.\n\n"; parar = true; } } } if (Scha) { parar = false; for (i = 0; i < order.length && !parar; i++) { if (Pi[i] <= ALPHAiShaffer[i]) { out += "Shaffer's procedure rejects those hypotheses that have a p-value $\\le" + nf6.format(ALPHAiShaffer[i]) + "$.\n\n"; parar = true; } } } /*For Bergmann-Hommel's procedure, 9 algorithms could suppose intense computation*/ if (algorithmName.length <= MAX_ALGORITHMS) { indices.removeAllElements(); for (i = 0; i < algorithmName.length; i++) { indices.add(new Integer(i)); } exhaustiveI = obtainExhaustive(indices); cuadro = new boolean[algorithmName.length][algorithmName.length]; for (i = 0; i < algorithmName.length; i++) { Arrays.fill(cuadro[i], false); } for (i = 0; i < exhaustiveI.size(); i++) { minPi = 2 * CDF_Normal.normp( (-1) * Math.abs( Rj[ ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)) .elementAt(0)) .i] - Rj[ ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)) .elementAt(0)) .j]) / SE); for (j = 1; j < ((Vector<Relation>) exhaustiveI.elementAt(i)).size(); j++) { tmpPi = 2 * CDF_Normal.normp( (-1) * Math.abs( Rj[ ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)) .elementAt(j)) .i] - Rj[ ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)) .elementAt(j)) .j]) / SE); if (tmpPi < minPi) { minPi = tmpPi; } } if (minPi > 0.1 / ((double) ((Vector<Relation>) exhaustiveI.elementAt(i)).size())) { for (j = 0; j < ((Vector<Relation>) exhaustiveI.elementAt(i)).size(); j++) { cuadro[((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)).elementAt(j)).i][ ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)).elementAt(j)).j] = true; } } } if (Berg) { cad = ""; cad += "Bergmann's procedure rejects these hypotheses:\n\n"; cad += "\\begin{itemize}\n\n"; counter = 0; for (i = 0; i < cuadro.length; i++) { for (j = i + 1; j < cuadro.length; j++) { if (cuadro[i][j] == false) { cad += "\\item " + algorithmName[i] + " vs. " + algorithmName[j] + "\n\n"; counter++; } } } cad += "\\end{itemize}\n\n"; if (counter > 0) { out += cad; } else { out += "Bergmann's procedure does not reject any hypotheses.\n\n"; } } } out += "\\pagebreak\n\n"; /** ********** ADJUSTED P-VALUES NxN COMPARISON ************* */ out += "\\subsection{Adjusted p-values}\n\n"; adjustedP = new double[Pi.length][4]; pos = 0; tmp = Tarray.length - 1; for (i = 0; i < adjustedP.length; i++) { adjustedP[i][0] = Pi[i] * (double) (adjustedP.length); adjustedP[i][1] = Pi[i] * (double) (adjustedP.length - i); adjustedP[i][2] = Pi[i] * ((double) adjustedP.length - (double) Math.max(pos, i)); if (i == pos) { tmp--; pos = (int) combinatoria(2, algorithmName.length) - Tarray[tmp]; } if (algorithmName.length <= MAX_ALGORITHMS) { maxAPi = Double.MIN_VALUE; minPi = Double.MAX_VALUE; for (j = 0; j < exhaustiveI.size(); j++) { if (exhaustiveI.elementAt(j).toString().contains(parejitas[order[i]].toString())) { minPi = 2 * CDF_Normal.normp( (-1) * Math.abs( Rj[ ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(j)) .elementAt(0)) .i] - Rj[ ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(j)) .elementAt(0)) .j]) / SE); for (k = 1; k < ((Vector<Relation>) exhaustiveI.elementAt(j)).size(); k++) { tmpPi = 2 * CDF_Normal.normp( (-1) * Math.abs( Rj[ ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(j)) .elementAt(k)) .i] - Rj[ ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(j)) .elementAt(k)) .j]) / SE); if (tmpPi < minPi) { minPi = tmpPi; } } tmpAPi = minPi * (double) (((Vector<Relation>) exhaustiveI.elementAt(j)).size()); if (tmpAPi > maxAPi) { maxAPi = tmpAPi; } } } adjustedP[i][3] = maxAPi; } } for (i = 1; i < adjustedP.length; i++) { if (adjustedP[i][1] < adjustedP[i - 1][1]) adjustedP[i][1] = adjustedP[i - 1][1]; if (adjustedP[i][2] < adjustedP[i - 1][2]) adjustedP[i][2] = adjustedP[i - 1][2]; if (adjustedP[i][3] < adjustedP[i - 1][3]) adjustedP[i][3] = adjustedP[i - 1][3]; } count = 3; if (Nemenyi) { count++; } if (Holm) { count++; } if (Scha) { count++; } if (Berg) { count++; } out += "\\begin{table}[!htp]\n\\centering\\scriptsize\n" + "\\begin{tabular}{" + printC(count) + "}\n" + "i&hypothesis&unadjusted $p$"; if (Nemenyi) { out += "&$p_{Neme}$"; } if (Holm) { out += "&$p_{Holm}$"; } if (Scha) { out += "&$p_{Shaf}$"; } if (Berg) { out += "&$p_{Berg}$"; } out += "\\\\\n\\hline"; for (i = 0; i < Pi.length; i++) { out += (i + 1) + "&" + algorithmName[parejitas[order[i]].i] + " vs ." + algorithmName[parejitas[order[i]].j] + "&" + nf6.format(Pi[i]); if (Nemenyi) { out += "&" + nf6.format(adjustedP[i][0]); } if (Holm) { out += "&" + nf6.format(adjustedP[i][1]); } if (Scha) { out += "&" + nf6.format(adjustedP[i][2]); } if (Berg) { out += "&" + nf6.format(adjustedP[i][3]); } out += "\\\\\n"; } out += "\\hline\n" + "\\end{tabular}\n\\caption{Adjusted $p$-values}\n" + "\\end{table}\n\n"; out += "\\end{landscape}\n\\end{document}"; return out; } // end-method
/** Process the training and test files provided in the parameters file to the constructor. */ public void process() { // declarations double[] outputs; double[] outputs2; Instance neighbor; double dist, mean; int actual; Randomize rnd = new Randomize(); Instance ex; gCenter kmeans = null; int iterations = 0; double E; double prevE; int totalMissing = 0; boolean allMissing = true; rnd.setSeed(semilla); // PROCESS try { // Load in memory a dataset that contains a classification problem IS.readSet(input_train_name, true); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][nvariables]; // matrix with transformed data kmeans = new gCenter(K, ndatos, nvariables); timesSeen = new FreqList[nvariables]; mostCommon = new String[nvariables]; // first, we choose k 'means' randomly from all // instances totalMissing = 0; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); if (inst.existsAnyMissingValue()) totalMissing++; } if (totalMissing == ndatos) allMissing = true; else allMissing = false; for (int numMeans = 0; numMeans < K; numMeans++) { do { actual = (int) (ndatos * rnd.Rand()); ex = IS.getInstance(actual); } while (ex.existsAnyMissingValue() && !allMissing); kmeans.copyCenter(ex, numMeans); } // now, iterate adjusting clusters' centers and // instances to them prevE = 0; iterations = 0; do { for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); kmeans.setClusterOf(inst, i); } // set new centers kmeans.recalculateCenters(IS); // compute RMSE E = 0; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); E += kmeans.distance(inst, kmeans.getClusterOf(i)); } iterations++; // System.out.println(iterations+"\t"+E); if (Math.abs(prevE - E) == 0) iterations = maxIter; else prevE = E; } while (E > minError && iterations < maxIter); for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); in = 0; out = 0; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) { X[i][j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } out++; } } } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_train_name); /** ************************************************************************************ */ // does a test file associated exist? if (input_train_name.compareTo(input_test_name) != 0) { try { // Load in memory a dataset that contains a classification problem IStest.readSet(input_test_name, false); int in = 0; int out = 0; ndatos = IStest.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); for (int i = 0; i < ndatos; i++) { Instance inst = IStest.getInstance(i); in = 0; out = 0; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) { X[i][j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } out++; } } } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_test_name); } }
/** * Computes and stores the information gain of each attribute (variable) of the dataset * * @param Examples Set of instances of the dataset * @param nFile Name of the file */ public void GainInit(TableDat Examples, String nFile) { int i, j, h, v; boolean encontrado; float info_gk, suma, suma1, suma2, p_clase, logaritmo; int num_clase[] = new int[n_clases]; int n_vars = this.getNVars(); int MaxVal = this.getMaxVal(); float p[][] = new float[n_vars][MaxVal]; float p_cond[][][] = new float[n_clases][n_vars][MaxVal]; GI = new float[n_vars]; intervalosGI = new float[n_vars][MaxVal]; String contents; contents = "\n--------------------------------------------\n"; contents += "| Computation of the info gain |\n"; contents += "--------------------------------------------\n"; contents += "Points for computation of the info gain:\n"; // Loads the values for "intervalosGI" float marca, p_corte; for (int v1 = 0; v1 < n_vars; v1++) { if (this.getContinuous(v1) == true) { contents += "\tVariable " + var[v1].getName() + ": "; marca = (this.getMax(v1) - this.getMin(v1)) / ((float) (this.getNLabelVar(v1) - 1)); p_corte = this.getMin(v1) + marca / 2; for (int et = 0; et < this.getNLabelVar(v1); et++) { intervalosGI[v1][et] = p_corte; contents += intervalosGI[v1][et] + " "; p_corte += marca; } contents += "\n"; } } // Structure initialization for (i = 0; i < n_clases; i++) num_clase[i] = 0; for (i = 0; i < n_vars; i++) for (j = 0; j < MaxVal; j++) { p[i][j] = 0; // Simple probabilities matrix for (h = 0; h < n_clases; h++) p_cond[h][i][j] = 0; // Conditional probabilities matrix } // Computation of the Simple and Conditional probabilities matrixs for (i = 0; i < Examples.getNEx(); i++) { num_clase[Examples.getClass(i)]++; // distribution by classes for (j = 0; j < n_vars; j++) { // distribution by values if (!this.getContinuous(j)) { // Discrete variable if (!Examples.getLost(this, i, j)) { // if the value is not a lost one p[j][(int) Examples.getDat(i, j)]++; p_cond[(int) Examples.getClass(i)][j][(int) Examples.getDat(i, j)]++; } } else { // Continuous variable encontrado = false; h = 0; while (!encontrado && h < this.getNLabelVar(j)) { if (Examples.getDat(i, j) <= intervalosGI[j][h]) encontrado = true; else h++; } if (encontrado == true) { p[j][h]++; p_cond[(int) Examples.getClass(i)][j][h]++; } else { if (!Examples.getLost(this, i, j)) { // Lost value System.out.println( "Fallo al calcular la ganancia de infor, Variable " + j + " Ejemplo " + i); return; } } } } } for (h = 0; h < n_clases; h++) for (i = 0; i < n_vars; i++) { if (!this.getContinuous(i)) // Discrete variable for (j = (int) this.getMin(i); j <= (int) this.getMax(i); j++) p_cond[h][i][j] = p_cond[h][i][j] / Examples.getNEx(); else // Continuous variable for (j = 0; j < this.getNLabelVar(i); j++) p_cond[h][i][j] = p_cond[h][i][j] / Examples.getNEx(); } for (i = 0; i < n_vars; i++) { if (!this.getContinuous(i)) // Discrete variable for (j = (int) this.getMin(i); j <= (int) this.getMax(i); j++) p[i][j] = p[i][j] / Examples.getNEx(); else // Continuous variable for (j = 0; j < this.getNLabelVar(i); j++) p[i][j] = p[i][j] / Examples.getNEx(); } // Info Gk computation suma = 0; for (i = 0; i < n_clases; i++) { p_clase = ((float) num_clase[i]) / Examples.getNEx(); if (p_clase > 0) { logaritmo = (float) (Math.log((double) p_clase) / Math.log(2)); suma += p_clase * logaritmo; } } info_gk = (-1) * suma; // Information gain computation for each attibute for (v = 0; v < n_vars; v++) { suma = info_gk; suma1 = 0; if (!this.getContinuous(v)) { // Discrete Variable for (i = (int) this.getMin(v); i <= (int) this.getMax(v); i++) { suma2 = 0; for (j = 0; j < n_clases; j++) if (p_cond[j][v][i] > 0) { logaritmo = (float) (Math.log(p_cond[j][v][i]) / Math.log(2)); suma2 += p_cond[j][v][i] * logaritmo; } suma1 += p[v][i] * (-1) * suma2; } } else { // Continuous Variable for (i = 0; i < this.getNLabelVar(v); i++) { suma2 = 0; for (j = 0; j < n_clases; j++) if (p_cond[j][v][i] > 0) { logaritmo = (float) (Math.log(p_cond[j][v][i]) / Math.log(2)); suma2 += p_cond[j][v][i] * logaritmo; } suma1 += p[v][i] * (-1) * suma2; } } GI[v] = suma + (-1) * suma1; } contents += "Information Gain of the variables:\n"; for (v = 0; v < n_vars; v++) { if (this.getContinuous(v) == true) contents += "\tVariable " + var[v].getName() + ": " + GI[v] + "\n"; } if (nFile != "") Files.addToFile(nFile, contents); }
/** It launches the algorithm */ public void execute() { int i, j, k, l; int t; int ele; double prob[]; double aux; double NUmax = 1.5; // used for lineal ranking double NUmin = 0.5; // used for lineal ranking double pos1, pos2; int sel1, sel2; int data[][]; int infoAttr[]; int classData[]; Vector<Rule> contenedor = new Vector<Rule>(); Vector<Rule> conjR = new Vector<Rule>(); Rule tmpRule; Condition tmpCondition[] = new Condition[1]; RuleSet population[]; RuleSet hijo1, hijo2; if (somethingWrong) { // We do not execute the program System.err.println("An error was found, the data-set has numerical values."); System.err.println("Aborting the program"); // We should not use the statement: System.exit(-1); } else { Randomize.setSeed(seed); nClasses = train.getnClasses(); /*Build the nominal data information*/ infoAttr = new int[train.getnInputs()]; for (i = 0; i < infoAttr.length; i++) { infoAttr[i] = train.numberValues(i); } data = new int[train.getnData()][train.getnInputs()]; for (i = 0; i < data.length; i++) { for (j = 0; j < data[i].length; j++) { if (train.isMissing(i, j)) data[i][j] = -1; else data[i][j] = train.valueExample(i, j); } } classData = new int[train.getnData()]; for (i = 0; i < classData.length; i++) { classData[i] = train.getOutputAsInteger(i); } /*Find first-order rules which result interesting*/ for (i = 0; i < nClasses; i++) { for (j = 0; j < infoAttr.length; j++) { for (k = 0; k < infoAttr[j]; k++) { tmpCondition[0] = new Condition(j, k); tmpRule = new Rule(tmpCondition); if (Math.abs(computeAdjustedResidual(data, classData, tmpRule, i)) > 1.96) { if (!contenedor.contains(tmpRule)) { contenedor.add(tmpRule); conjR.add(tmpRule); } } } } } // Construct the Baker selection roulette prob = new double[popSize]; for (j = 0; j < popSize; j++) { aux = (double) (NUmax - NUmin) * ((double) j / (popSize - 1)); prob[j] = (double) (1.0 / (popSize)) * (NUmax - aux); } for (j = 1; j < popSize; j++) prob[j] = prob[j] + prob[j - 1]; /*Steady-State Genetic Algorithm*/ ele = 2; population = new RuleSet[popSize]; while (conjR.size() >= 2) { t = 0; System.out.println("Producing rules of level " + ele); for (i = 0; i < population.length; i++) { population[i] = new RuleSet(conjR); population[i].computeFitness(data, classData, infoAttr, contenedor, nClasses); } Arrays.sort(population); while (t < numGenerations && !population[0].equals(population[popSize - 1])) { System.out.println("Generation " + t); t++; /*Baker's selection*/ pos1 = Randomize.Rand(); pos2 = Randomize.Rand(); for (l = 0; l < popSize && prob[l] < pos1; l++) ; sel1 = l; for (l = 0; l < popSize && prob[l] < pos2; l++) ; sel2 = l; hijo1 = new RuleSet(population[sel1]); hijo2 = new RuleSet(population[sel2]); if (Randomize.Rand() < pCross) { RuleSet.crossover1(hijo1, hijo2); } else { RuleSet.crossover2(hijo1, hijo2); } RuleSet.mutation(hijo1, conjR, pMut, data, classData, infoAttr, contenedor, nClasses); RuleSet.mutation(hijo2, conjR, pMut, data, classData, infoAttr, contenedor, nClasses); hijo1.computeFitness(data, classData, infoAttr, contenedor, nClasses); hijo2.computeFitness(data, classData, infoAttr, contenedor, nClasses); population[popSize - 2] = new RuleSet(hijo1); population[popSize - 1] = new RuleSet(hijo2); Arrays.sort(population); } /*Decode function*/ ele++; conjR.removeAllElements(); System.out.println( "Fitness of the best chromosome in rule level " + ele + ": " + population[0].fitness); for (i = 0; i < population[0].getRuleSet().length; i++) { if (Math.abs(computeAdjustedResidual(data, classData, population[0].getRule(i), i)) > 1.96) { if (validarRegla(population[0].getRule(i)) && !contenedor.contains(population[0].getRule(i))) { contenedor.add(population[0].getRule(i)); conjR.add(population[0].getRule(i)); } } } } // Finally we should fill the training and test output files doOutput(this.val, this.outputTr, data, classData, infoAttr, contenedor, nClasses); doOutput(this.test, this.outputTst, data, classData, infoAttr, contenedor, nClasses); /*Print the rule obtained*/ for (i = contenedor.size() - 1; i >= 0; i--) { if (reglaPositiva( this.train, data, classData, infoAttr, nClasses, contenedor.elementAt(i))) { Fichero.AnadirtoFichero(outputRule, contenedor.elementAt(i).toString(train)); Fichero.AnadirtoFichero( outputRule, " -> " + consecuente( this.train, data, classData, infoAttr, nClasses, contenedor.elementAt(i)) + "\n"); } } System.out.println("Algorithm Finished"); } }
/** * SMOTE preprocessing procedure * * @param datosTrain input training dta * @param realTrain actual training data * @param nominalTrain nominal attribute values * @param nulosTrain null values * @param clasesTrain training classes * @param datosArt synthetic instances */ public void SMOTE( double datosTrain[][], double realTrain[][], int nominalTrain[][], boolean nulosTrain[][], int clasesTrain[], double datosArt[][], double realArt[][], int nominalArt[][], boolean nulosArt[][], int clasesArt[], int kSMOTE, int ASMO, double smoting, boolean balance, int nPos, int posID, int nNeg, int negID, boolean distanceEu) { int i, j, l, m; int tmp, pos; int positives[]; int neighbors[][]; double genS[][]; double genR[][]; int genN[][]; boolean genM[][]; int clasesGen[]; int nn; /* Localize the positive instances */ positives = new int[nPos]; for (i = 0, j = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == posID) { positives[j] = i; j++; } } /* Randomize the instance presentation */ for (i = 0; i < positives.length; i++) { tmp = positives[i]; pos = Randomize.Randint(0, positives.length - 1); positives[i] = positives[pos]; positives[pos] = tmp; } /* Obtain k-nearest neighbors of each positive instance */ neighbors = new int[positives.length][kSMOTE]; for (i = 0; i < positives.length; i++) { switch (ASMO) { case 0: KNN.evaluacionKNN2( kSMOTE, datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosTrain[positives[i]], realTrain[positives[i]], nominalTrain[positives[i]], nulosTrain[positives[i]], Math.max(posID, negID) + 1, distanceEu, neighbors[i]); break; case 1: evaluacionKNNClass( kSMOTE, datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosTrain[positives[i]], realTrain[positives[i]], nominalTrain[positives[i]], nulosTrain[positives[i]], Math.max(posID, negID) + 1, distanceEu, neighbors[i], posID); break; case 2: evaluacionKNNClass( kSMOTE, datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosTrain[positives[i]], realTrain[positives[i]], nominalTrain[positives[i]], nulosTrain[positives[i]], Math.max(posID, negID) + 1, distanceEu, neighbors[i], negID); break; } } /* Interpolation of the minority instances */ if (balance) { genS = new double[nNeg - nPos][datosTrain[0].length]; genR = new double[nNeg - nPos][datosTrain[0].length]; genN = new int[nNeg - nPos][datosTrain[0].length]; genM = new boolean[nNeg - nPos][datosTrain[0].length]; clasesGen = new int[nNeg - nPos]; } else { genS = new double[(int) (nPos * smoting)][datosTrain[0].length]; genR = new double[(int) (nPos * smoting)][datosTrain[0].length]; genN = new int[(int) (nPos * smoting)][datosTrain[0].length]; genM = new boolean[(int) (nPos * smoting)][datosTrain[0].length]; clasesGen = new int[(int) (nPos * smoting)]; } for (i = 0; i < genS.length; i++) { clasesGen[i] = posID; nn = Randomize.Randint(0, kSMOTE - 1); interpola( realTrain[positives[i % positives.length]], realTrain[neighbors[i % positives.length][nn]], nominalTrain[positives[i % positives.length]], nominalTrain[neighbors[i % positives.length][nn]], nulosTrain[positives[i % positives.length]], nulosTrain[neighbors[i % positives.length][nn]], genS[i], genR[i], genN[i], genM[i]); } for (j = 0; j < datosTrain.length; j++) { for (l = 0; l < datosTrain[0].length; l++) { datosArt[j][l] = datosTrain[j][l]; realArt[j][l] = realTrain[j][l]; nominalArt[j][l] = nominalTrain[j][l]; nulosArt[j][l] = nulosTrain[j][l]; } clasesArt[j] = clasesTrain[j]; } for (m = 0; j < datosArt.length; j++, m++) { for (l = 0; l < datosTrain[0].length; l++) { datosArt[j][l] = genS[m][l]; realArt[j][l] = genR[m][l]; nominalArt[j][l] = genN[m][l]; nulosArt[j][l] = genM[m][l]; } clasesArt[j] = clasesGen[m]; } }
private double computeStandarizedResidual( int data[][], int classData[], Condition cond, int clase) { double tmp = computeEAipAjq(data, classData, cond, clase); return (computeCountAipAjq(data, classData, cond, clase) - tmp) / Math.sqrt(tmp); }
public void ejecutar() { int i, j, l, m; double alfai; int nClases; int claseObt; boolean marcas[]; boolean notFound; int init; int clasSel[]; int baraje[]; int pos, tmp; String instanciasIN[]; String instanciasOUT[]; long tiempo = System.currentTimeMillis(); /* Getting the number of differents classes */ nClases = 0; for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i]; nClases++; /* Shuffle the train set */ baraje = new int[datosTrain.length]; Randomize.setSeed(semilla); for (i = 0; i < datosTrain.length; i++) baraje[i] = i; for (i = 0; i < datosTrain.length; i++) { pos = Randomize.Randint(i, datosTrain.length - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } /* * Inicialization of the flagged instaces vector for a posterior * elimination */ marcas = new boolean[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) marcas[i] = false; if (datosTrain.length > 0) { // marcas[baraje[0]] = true; //the first instance is included always nSel = n_p; if (nSel < nClases) nSel = nClases; } else { System.err.println("Input dataset is empty"); nSel = 0; } clasSel = new int[nClases]; System.out.print("Selecting initial neurons... "); // at least, there must be 1 neuron of each class at the beginning init = nClases; for (i = 0; i < nClases && i < datosTrain.length; i++) { pos = Randomize.Randint(0, datosTrain.length - 1); tmp = 0; while ((clasesTrain[pos] != i || marcas[pos]) && tmp < datosTrain.length) { pos = (pos + 1) % datosTrain.length; tmp++; } if (tmp < datosTrain.length) marcas[pos] = true; else init--; // clasSel[i] = i; } for (i = init; i < Math.min(nSel, datosTrain.length); i++) { tmp = 0; pos = Randomize.Randint(0, datosTrain.length - 1); while (marcas[pos]) { pos = (pos + 1) % datosTrain.length; tmp++; } // if(i<nClases){ // notFound = true; // do{ // for(j=i-1;j>=0 && notFound;j--){ // if(clasSel[j] == clasesTrain[pos]) // notFound = false; // } // if(!notFound) // pos = Randomize.Randint (0, datosTrain.length-1); // }while(!notFound); // } // clasSel[i] = clasesTrain[pos]; marcas[pos] = true; init++; } nSel = init; System.out.println("Initial neurons selected: " + nSel); /* Building of the S set from the flags */ conjS = new double[nSel][datosTrain[0].length]; clasesS = new int[nSel]; for (m = 0, l = 0; m < datosTrain.length; m++) { if (marcas[m]) { // the instance must be copied to the solution for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[m][j]; } clasesS[l] = clasesTrain[m]; l++; } } alfai = alpha; boolean change = true; /* Body of the LVQ algorithm. */ // Train the network for (int it = 0; it < T && change; it++) { change = false; alpha = alfai; for (i = 1; i < datosTrain.length; i++) { // search for the nearest neuron to training instance pos = NN(nSel, conjS, datosTrain[baraje[i]]); // nearest neuron labels correctly the class of training // instance? if (clasesS[pos] != clasesTrain[baraje[i]]) { // NO - repel // the neuron for (j = 0; j < conjS[pos].length; j++) { conjS[pos][j] = conjS[pos][j] - alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]); } change = true; } else { // YES - migrate the neuron towards the input vector for (j = 0; j < conjS[pos].length; j++) { conjS[pos][j] = conjS[pos][j] + alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]); } } alpha = nu * alpha; } // Shuffle again the training partition baraje = new int[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) baraje[i] = i; for (i = 0; i < datosTrain.length; i++) { pos = Randomize.Randint(i, datosTrain.length - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } } System.out.println( "LVQ " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); // Classify the train data set instanciasIN = new String[datosReferencia.length]; instanciasOUT = new String[datosReferencia.length]; for (i = 0; i < datosReferencia.length; i++) { /* Classify the instance selected in this iteration */ Attribute a = Attributes.getOutputAttribute(0); int tipo = a.getType(); claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosReferencia[i], nClases); if (tipo != Attribute.NOMINAL) { instanciasIN[i] = new String(String.valueOf(clasesReferencia[i])); instanciasOUT[i] = new String(String.valueOf(claseObt)); } else { instanciasIN[i] = new String(a.getNominalValue(clasesReferencia[i])); instanciasOUT[i] = new String(a.getNominalValue(claseObt)); } } escribeSalida( ficheroSalida[0], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation); // Classify the test data set normalizarTest(); instanciasIN = new String[datosTest.length]; instanciasOUT = new String[datosTest.length]; for (i = 0; i < datosTest.length; i++) { /* Classify the instance selected in this iteration */ Attribute a = Attributes.getOutputAttribute(0); int tipo = a.getType(); claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosTest[i], nClases); if (tipo != Attribute.NOMINAL) { instanciasIN[i] = new String(String.valueOf(clasesTest[i])); instanciasOUT[i] = new String(String.valueOf(claseObt)); } else { instanciasIN[i] = new String(a.getNominalValue(clasesTest[i])); instanciasOUT[i] = new String(a.getNominalValue(claseObt)); } } escribeSalida( ficheroSalida[1], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation); // Print the network to a file printNetworkToFile(ficheroSalida[2], referencia.getHeader()); }
/** * Obtain all exhaustive comparisons possible from an array of indexes * * @param indices A verctos of indexes. * @return A vector with vectors containing all the possible relations between the indexes */ @SuppressWarnings("unchecked") public static Vector<Vector<Relation>> obtainExhaustive(Vector<Integer> indices) { Vector<Vector<Relation>> result = new Vector<Vector<Relation>>(); int i, j, k; String binario; boolean[] number = new boolean[indices.size()]; Vector<Integer> ind1, ind2; Vector<Relation> set = new Vector<Relation>(); Vector<Vector<Relation>> res1, res2; Vector<Relation> temp; Vector<Relation> temp2; Vector<Relation> temp3; ind1 = new Vector<Integer>(); ind2 = new Vector<Integer>(); temp = new Vector<Relation>(); temp2 = new Vector<Relation>(); temp3 = new Vector<Relation>(); for (i = 0; i < indices.size(); i++) { for (j = i + 1; j < indices.size(); j++) { set.addElement( new Relation( ((Integer) indices.elementAt(i)).intValue(), ((Integer) indices.elementAt(j)).intValue())); } } if (set.size() > 0) result.addElement(set); for (i = 1; i < (int) (Math.pow(2, indices.size() - 1)); i++) { Arrays.fill(number, false); ind1.removeAllElements(); ind2.removeAllElements(); temp.removeAllElements(); temp2.removeAllElements(); temp3.removeAllElements(); binario = Integer.toString(i, 2); for (k = 0; k < number.length - binario.length(); k++) { number[k] = false; } for (j = 0; j < binario.length(); j++, k++) { if (binario.charAt(j) == '1') number[k] = true; } for (j = 0; j < number.length; j++) { if (number[j] == true) { ind1.addElement(new Integer(((Integer) indices.elementAt(j)).intValue())); } else { ind2.addElement(new Integer(((Integer) indices.elementAt(j)).intValue())); } } res1 = obtainExhaustive(ind1); res2 = obtainExhaustive(ind2); for (j = 0; j < res1.size(); j++) { result.addElement(new Vector<Relation>((Vector<Relation>) res1.elementAt(j))); } for (j = 0; j < res2.size(); j++) { result.addElement(new Vector<Relation>((Vector<Relation>) res2.elementAt(j))); } for (j = 0; j < res1.size(); j++) { temp = (Vector<Relation>) ((Vector<Relation>) res1.elementAt(j)).clone(); for (k = 0; k < res2.size(); k++) { temp2 = (Vector<Relation>) temp.clone(); temp3 = (Vector<Relation>) ((Vector<Relation>) res2.elementAt(k)).clone(); if (((Relation) temp2.elementAt(0)).i < ((Relation) temp3.elementAt(0)).i) { temp2.addAll((Vector<Relation>) temp3); result.addElement(new Vector<Relation>(temp2)); } else { temp3.addAll((Vector<Relation>) temp2); result.addElement(new Vector<Relation>(temp3)); } } } } for (i = 0; i < result.size(); i++) { if (((Vector<Relation>) result.elementAt(i)).toString().equalsIgnoreCase("[]")) { result.removeElementAt(i); i--; } } for (i = 0; i < result.size(); i++) { for (j = i + 1; j < result.size(); j++) { if (((Vector<Relation>) result.elementAt(i)) .toString() .equalsIgnoreCase(((Vector<Relation>) result.elementAt(j)).toString())) { result.removeElementAt(j); j--; } } } return result; } // end-method