/** * Main method for ABB, that explores the search space by pruning nodes and checking their * inconsistency ratio. */ private void runABB() { boolean[] root = startSolution(); System.arraycopy(root, 0, features, 0, root.length); abb(root); /* checks if a subset satisfies the condition (more than 0 selected features) */ if (features == null) { System.err.println("ERROR: It couldn't be possible to find any solution."); System.exit(0); } }
private void normalizarTest() { int i, j, cont = 0, k; Instance temp; boolean hecho; double caja[]; StringTokenizer tokens; boolean nulls[]; /* Check if dataset corresponding with a classification problem */ if (Attributes.getOutputNumAttributes() < 1) { System.err.println( "This dataset haven´t outputs, so it not corresponding to a classification problem."); System.exit(-1); } else if (Attributes.getOutputNumAttributes() > 1) { System.err.println("This dataset have more of one output."); System.exit(-1); } if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) { System.err.println( "This dataset have an input attribute with floating values, so it not corresponding to a classification problem."); System.exit(-1); } datosTest = new double[test.getNumInstances()][Attributes.getInputNumAttributes()]; clasesTest = new int[test.getNumInstances()]; caja = new double[1]; for (i = 0; i < test.getNumInstances(); i++) { temp = test.getInstance(i); nulls = temp.getInputMissingValues(); datosTest[i] = test.getInstance(i).getAllInputValues(); for (j = 0; j < nulls.length; j++) if (nulls[j]) datosTest[i][j] = 0.0; caja = test.getInstance(i).getAllOutputValues(); clasesTest[i] = (int) caja[0]; for (k = 0; k < datosTest[i].length; k++) { if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) { datosTest[i][k] /= Attributes.getInputAttribute(k).getNominalValuesList().size() - 1; } else { datosTest[i][k] -= Attributes.getInputAttribute(k).getMinAttribute(); datosTest[i][k] /= Attributes.getInputAttribute(k).getMaxAttribute() - Attributes.getInputAttribute(k).getMinAttribute(); } } } }
private void generateModel() { String salida = new String(""); double max_auc = 0; ArrayList<String> solutions = this.getAllSolutions(); models = new ArrayList<Farchd>(); int nEjemplos = train.getnData(); if (this.instances == this.MAJ) { nEjemplos = train.getMajority(); } boolean[] variables = new boolean[train.getnInputs()]; boolean[] ejemplos = new boolean[nEjemplos]; this.weightsAUC = new double[solutions.size() / 2]; // Hay 2 soluciones FS e IS for (int i = 0, j = 0; i < solutions.size(); i += 2, j++) { int vars, ejs; vars = ejs = 0; variables = decode(solutions.get(i)); ejemplos = decode(solutions.get(i + 1)); for (int l = 0; l < variables.length; l++) { // variables[j] = solution[j]; if (variables[l]) vars++; } for (int l = 0; l < ejemplos.length; l++) { if (ejemplos[l]) ejs++; } try { Farchd model = new Farchd(train, val, test, variables, ejemplos); /** ******** */ // double fit = model.getAUCTr(); double auc_tr = model.execute(true); double auc_tst = model.getAUCTst(); if (auc_tr > max_auc) { max_auc = auc_tr; indexBest = j; } this.weightsAUC[j] = auc_tr; salida += "Solution[" + j + "]:\t" + vars + "\t" + ejs + "\t" + auc_tr + "\t" + auc_tst + "\n"; /** ******** */ models.add(model); } catch (Exception e) { System.err.println("Liada maxima al generar modelo "); e.printStackTrace(System.err); System.exit(-1); } } System.out.print(salida); Files.writeFile(header + "_AUC.txt", salida); }
public LVQ(String ficheroScript) { super(ficheroScript); try { referencia = new InstanceSet(); referencia.readSet(ficheroReferencia, false); /*Normalize the data*/ normalizarReferencia(); } catch (Exception e) { System.err.println(e); System.exit(1); } }
/** * Constructor of the Class Parametros * * @param nombreFileParametros is the pathname of input parameter file */ Parametros(String nombreFileParametros) { try { int i; String fichero, linea, tok; StringTokenizer lineasFile, tokens; /* read the parameter file using Files class */ fichero = Files.readFile(nombreFileParametros); fichero += "\n"; /* remove all \r characters. it is neccesary for a correst use in Windows and UNIX */ fichero = fichero.replace('\r', ' '); /* extracts the differents tokens of the file */ lineasFile = new StringTokenizer(fichero, "\n"); i = 0; while (lineasFile.hasMoreTokens()) { linea = lineasFile.nextToken(); i++; tokens = new StringTokenizer(linea, " ,\t"); if (tokens.hasMoreTokens()) { tok = tokens.nextToken(); if (tok.equalsIgnoreCase("algorithm")) nameAlgorithm = getParamString(tokens); else if (tok.equalsIgnoreCase("inputdata")) getInputFiles(tokens); else if (tok.equalsIgnoreCase("outputdata")) getOutputFiles(tokens); else if (tok.equalsIgnoreCase("seed")) seed = getParamLong(tokens); else throw new java.io.IOException("Syntax error on line " + i + ": [" + tok + "]\n"); } } } catch (java.io.FileNotFoundException e) { System.err.println(e + "Parameter file"); } catch (java.io.IOException e) { System.err.println(e + "Aborting program"); System.exit(-1); } /** show the read parameter in the standard output */ String contents = "-- Parameters echo --- \n"; contents += "Algorithm name: " + nameAlgorithm + "\n"; contents += "Input Train File: " + trainFileNameInput + "\n"; contents += "Input Test File: " + testFileNameInput + "\n"; contents += "Output Train File: " + trainFileNameOutput + "\n"; contents += "Output Test File: " + testFileNameOutput + "\n"; System.out.println(contents); }
/** * Removes one feature at a time, starting from the furthest on the right * * @param featuresVector solution to generate its neighbor * @param which number of the feature to remove starting from the right * @return next neighbor of the given solution with one less feature */ private static boolean[] removeOne(boolean featuresVector[], int which) { boolean[] fv = new boolean[featuresVector.length]; System.arraycopy(featuresVector, 0, fv, 0, fv.length); boolean stop = false; int count = 0; for (int i = fv.length - 1; i >= 0 && !stop; i--) { if (fv[i]) { count++; if (count == which) { fv[i] = false; stop = true; } } } return fv; }
/** It launches the algorithm */ public void execute() { if (this.somethingWrong) { // We do not execute the program System.err.println("An error was found, either the data-set has missing values."); System.err.println( "Please remove the examples with missing data or apply a MV preprocessing."); System.err.println("Aborting the program"); // We should not use the statement: System.exit(-1); } else { // We do here the algorithm's operations int nClasses = train.getnClasses(); aprioriClassDistribution = new double[nClasses]; for (int i = 0; i < nClasses; i++) { aprioriClassDistribution[i] = 1.0 * val.numberInstances(i) / val.size(); } if (model) { // el modelo no esta generado en fichero previamente NSGA2 search = new NSGA2( train, seed, populationSize, maxTrials, crossover, mutation, instances, fitness); try { search.execute(); } catch (Exception e) { e.printStackTrace(System.err); } } // Finally we should fill the training and test output files this.generateModel(); double avgTr = this.doOutput(val, this.outputTr, false); double aucTr = getAUC(val); double avgTst = this.doOutput(test, this.outputTst, true); double aucTst = getAUC(test); System.out.print("AUC Train: " + aucTr); System.out.println("; AvgAcc Train: " + avgTr); System.out.print("AUC Test: " + aucTst); System.out.println("; AvgAcc Test: " + avgTst); totalTime = System.currentTimeMillis() - startTime; System.out.println("Algorithm Finished: " + totalTime); } }
/** Recursive method for ABB */ private void abb(boolean feat[]) { boolean[] child; double measure; threshold = data.measureIEP(feat); for (int i = 0; i < cardinalidadCto(feat); i++) { child = removeOne(feat, i); measure = data.measureIEP(child); if (legitimate(child) && measure < threshold) { if (measure < data.measureIEP(features)) { // we keep the best found in 'features' System.arraycopy(child, 0, features, 0, child.length); } abb(child); } else { // we prune this node pruned.add(child); } } }
// Write data matrix X to disk, in KEEL format private void write_results(String output) { // File OutputFile = new File(output_train_name.substring(1, output_train_name.length()-1)); try { FileWriter file_write = new FileWriter(output); file_write.write(IS.getHeader()); // now, print the normalized data file_write.write("@data\n"); for (int i = 0; i < ndatos; i++) { file_write.write(X[i][0]); for (int j = 1; j < nvariables; j++) { file_write.write("," + X[i][j]); } file_write.write("\n"); } file_write.close(); } catch (IOException e) { System.out.println("IO exception = " + e); System.exit(-1); } }
public void ejecutar() { int i, j, l, m, o; int nClases; int claseObt; boolean marcas[]; double conjS[][]; int clasesS[]; int eleS[], eleT[]; int bestAc, aciertos; int temp[]; int pos, tmp; long tiempo = System.currentTimeMillis(); /*Getting the number of different classes*/ nClases = 0; for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i]; nClases++; /*Inicialization of the flagged instance vector of the S set*/ marcas = new boolean[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) marcas[i] = false; /*Allocate memory for the random selection*/ m = (int) ((porcentaje * datosTrain.length) / 100.0); eleS = new int[m]; eleT = new int[datosTrain.length - m]; temp = new int[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) temp[i] = i; /** Random distribution of elements in each set */ Randomize.setSeed(semilla); for (i = 0; i < eleS.length; i++) { pos = Randomize.Randint(i, datosTrain.length - 1); tmp = temp[i]; temp[i] = temp[pos]; temp[pos] = tmp; eleS[i] = temp[i]; } for (i = 0; i < eleT.length; i++) { pos = Randomize.Randint(m + i, datosTrain.length - 1); tmp = temp[m + i]; temp[m + i] = temp[pos]; temp[pos] = tmp; eleT[i] = temp[m + i]; } for (i = 0; i < eleS.length; i++) marcas[eleS[i]] = true; /*Building of the S set from the flags*/ conjS = new double[m][datosTrain[0].length]; clasesS = new int[m]; for (o = 0, l = 0; o < datosTrain.length; o++) { if (marcas[o]) { // the instance will be evaluated for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[o][j]; } clasesS[l] = clasesTrain[o]; l++; } } /*Evaluation of the S set*/ bestAc = 0; for (i = 0; i < datosTrain.length; i++) { claseObt = KNN.evaluacionKNN2(k, conjS, clasesS, datosTrain[i], nClases); if (claseObt == clasesTrain[i]) // correct clasification bestAc++; } /*Body of the ENNRS algorithm. Change the S set in each iteration for instances of the T set until get a complete sustitution*/ for (i = 0; i < n; i++) { /*Preparation the set to interchange*/ for (j = 0; j < eleS.length; j++) { pos = Randomize.Randint(j, eleT.length - 1); tmp = eleT[j]; eleT[j] = eleT[pos]; eleT[pos] = tmp; } /*Interchange of instances*/ for (j = 0; j < eleS.length; j++) { tmp = eleS[j]; eleS[j] = eleT[j]; eleT[j] = tmp; marcas[eleS[j]] = true; marcas[eleT[j]] = false; } /*Building of the S set from the flags*/ for (o = 0, l = 0; o < datosTrain.length; o++) { if (marcas[o]) { // the instance will evaluate for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[o][j]; } clasesS[l] = clasesTrain[o]; l++; } } /*Evaluation of the S set*/ aciertos = 0; for (j = 0; j < datosTrain.length; j++) { claseObt = KNN.evaluacionKNN2(k, conjS, clasesS, datosTrain[j], nClases); if (claseObt == clasesTrain[j]) // correct clasification aciertos++; } if (aciertos > bestAc) { // keep S bestAc = aciertos; } else { // undo changes for (j = 0; j < eleS.length; j++) { tmp = eleS[j]; eleS[j] = eleT[j]; eleT[j] = tmp; marcas[eleS[j]] = true; marcas[eleT[j]] = false; } } } /*Building of the S set from the flags*/ /*Building of the S set from the flags*/ for (o = 0, l = 0; o < datosTrain.length; o++) { if (marcas[o]) { // the instance will evaluate for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[o][j]; } clasesS[l] = clasesTrain[o]; l++; } } System.out.println( "ENNRS " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); // COn conjS me vale. int trainRealClass[][]; int trainPrediction[][]; trainRealClass = new int[datosTrain.length][1]; trainPrediction = new int[datosTrain.length][1]; // Working on training for (i = 0; i < datosTrain.length; i++) { trainRealClass[i][0] = clasesTrain[i]; trainPrediction[i][0] = KNN.evaluate(datosTrain[i], conjS, nClases, clasesS, this.k); } KNN.writeOutput(ficheroSalida[0], trainRealClass, trainPrediction, entradas, salida, relation); // Working on test int realClass[][] = new int[datosTest.length][1]; int prediction[][] = new int[datosTest.length][1]; // Check time for (i = 0; i < realClass.length; i++) { realClass[i][0] = clasesTest[i]; prediction[i][0] = KNN.evaluate(datosTest[i], conjS, nClases, clasesS, this.k); } KNN.writeOutput(ficheroSalida[1], realClass, prediction, entradas, salida, relation); }
/** * It reads the data from the input files (training, validation and test) and parse all the * parameters from the parameters array. * * @param parameters parseParameters It contains the input files, output files and parameters */ public Wrapper(parseParameters parameters) { this.startTime = System.currentTimeMillis(); this.train = new myDataset(); this.val = new myDataset(); this.test = new myDataset(); try { System.out.println("\nReading the training set: " + parameters.getTrainingInputFile()); this.train.readClassificationSet(parameters.getTrainingInputFile(), true); System.out.println("\nReading the validation set: " + parameters.getValidationInputFile()); this.val.readClassificationSet(parameters.getValidationInputFile(), false); System.out.println("\nReading the test set: " + parameters.getTestInputFile()); this.test.readClassificationSet(parameters.getTestInputFile(), false); } catch (IOException e) { System.err.println("There was a problem while reading the input data-sets: " + e); this.somethingWrong = true; } // We may check if there are some numerical attributes, because our algorithm may not handle // them: // somethingWrong = somethingWrong || train.hasNumericalAttributes(); this.somethingWrong = this.somethingWrong || this.train.hasMissingAttributes(); this.outputTr = parameters.getTrainingOutputFile(); this.outputTst = parameters.getTestOutputFile(); this.fileRB = parameters.getOutputFile(0); this.data = parameters.getTrainingInputFile(); // Now we parse the parameters seed = Long.parseLong(parameters.getParameter(0)); this.populationSize = Integer.parseInt(parameters.getParameter(1)); this.maxTrials = Integer.parseInt(parameters.getParameter(2)); if (this.populationSize % 2 > 0) this.populationSize++; this.crossover = Double.parseDouble(parameters.getParameter(3)); // crossover probability this.mutation = Double.parseDouble(parameters.getParameter(4)); String aux2 = parameters.getParameter(5); this.fitness = this.aucVal; if (aux2.equalsIgnoreCase("AUC_TR")) { fitness = this.aucTrain; } else if (aux2.equalsIgnoreCase("GM_VAL")) { fitness = this.gmVal; } aux2 = parameters.getParameter(6); this.instances = this.ALL; if (aux2.equalsIgnoreCase("MAJ")) { instances = this.MAJ; } aux2 = parameters.getParameter(7); this.ensemble = this.NONE; if (aux2.equalsIgnoreCase("WV")) { ensemble = this.WV; } else if (aux2.equalsIgnoreCase("WTA")) { ensemble = this.WTA; } else if (aux2.equalsIgnoreCase("VOTE")) { ensemble = this.VOTE; } // ensemble = aux2.equalsIgnoreCase("true"); aux2 = parameters.getParameter(8); model = aux2.equalsIgnoreCase("true"); aux2 = parameters.getParameter(9); this.weighting = aux2.equalsIgnoreCase("true"); header = parameters.getTestInputFile(); String[] aux = null; aux = header.split("\\."); header = aux[aux.length - 2]; // aux.length-1 is the extension aux = header.split("/"); header = aux[aux.length - 1]; // To be run in SGE Randomize.setSeed(seed); }
/** * The main method of the class that includes the operations of the algorithm. It includes all the * operations that the algorithm has and finishes when it writes the output information into * files. */ public void run() { int nPos = 0; int nNeg = 0; int i, j, l, m; int tmp; int posID; int positives[]; int overs[]; double conjS[][]; int clasesS[]; int tamS; long tiempo = System.currentTimeMillis(); /*Count of number of positive and negative examples*/ for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == 0) nPos++; else nNeg++; } if (nPos > nNeg) { tmp = nPos; nPos = nNeg; nNeg = tmp; posID = 1; } else { posID = 0; } /*Localize the positive instances*/ positives = new int[nPos]; for (i = 0, j = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == posID) { positives[j] = i; j++; } } /*Obtain the oversampling array taking account the previous array*/ overs = new int[nNeg - nPos]; Randomize.setSeed(semilla); for (i = 0; i < overs.length; i++) { tmp = Randomize.Randint(0, nPos - 1); overs[i] = positives[tmp]; } tamS = 2 * nNeg; /*Construction of the S set from the previous vector S*/ conjS = new double[tamS][datosTrain[0].length]; clasesS = new int[tamS]; for (j = 0; j < datosTrain.length; j++) { for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[j][l]; clasesS[j] = clasesTrain[j]; } for (m = 0; j < tamS; j++, m++) { for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[overs[m]][l]; clasesS[j] = clasesTrain[overs[m]]; } System.out.println( "RandomOverSampling " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); OutputIS.escribeSalida(ficheroSalida[0], conjS, clasesS, entradas, salida, nEntradas, relation); OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation); }
/** * The main method of the class that includes the operations of the algorithm. It includes all the * operations that the algorithm has and finishes when it writes the output information into * files. */ public void run() { int S[]; int i, j, l, m; int nPos = 0, nNeg = 0; int posID; int nClases; int pos; int baraje[]; int tmp; double conjS[][]; int clasesS[]; int tamS = 0; int claseObt; int cont; int busq; boolean marcas[]; int nSel; double conjS2[][]; int clasesS2[]; double minDist, dist; long tiempo = System.currentTimeMillis(); /*CNN PART*/ /*Count of number of positive and negative examples*/ for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == 0) nPos++; else nNeg++; } if (nPos > nNeg) { tmp = nPos; nPos = nNeg; nNeg = tmp; posID = 1; } else { posID = 0; } /*Inicialization of the candidates set*/ S = new int[datosTrain.length]; for (i = 0; i < S.length; i++) S[i] = Integer.MAX_VALUE; /*Inserting an element of mayority class*/ Randomize.setSeed(semilla); pos = Randomize.Randint(0, clasesTrain.length - 1); while (clasesTrain[pos] == posID) pos = (pos + 1) % clasesTrain.length; S[tamS] = pos; tamS++; /*Insert all subset of minority class*/ for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == posID) { S[tamS] = i; tamS++; } } /*Algorithm body. We resort randomly the instances of T and compare with the rest of S. If an instance doesn´t classified correctly, it is inserted in S*/ baraje = new int[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) baraje[i] = i; for (i = 0; i < datosTrain.length; i++) { pos = Randomize.Randint(i, clasesTrain.length - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } for (i = 0; i < datosTrain.length; i++) { if (clasesTrain[i] != posID) { // only for mayority class instances /*Construction of the S set from the previous vector S*/ conjS = new double[tamS][datosTrain[0].length]; clasesS = new int[tamS]; for (j = 0; j < tamS; j++) { for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[S[j]][l]; clasesS[j] = clasesTrain[S[j]]; } /*Do KNN to the instance*/ claseObt = KNN.evaluacionKNN(k, conjS, clasesS, datosTrain[baraje[i]], 2); if (claseObt != clasesTrain[baraje[i]]) { // fail in the class, it is included in S Arrays.sort(S); busq = Arrays.binarySearch(S, baraje[i]); if (busq < 0) { S[tamS] = baraje[i]; tamS++; } } } } /*Construction of the S set from the previous vector S*/ conjS = new double[tamS][datosTrain[0].length]; clasesS = new int[tamS]; for (j = 0; j < tamS; j++) { for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[S[j]][l]; clasesS[j] = clasesTrain[S[j]]; } /*TOMEK LINKS PART*/ /*Inicialization of the instance flagged vector of the S set*/ marcas = new boolean[conjS.length]; for (i = 0; i < conjS.length; i++) { marcas[i] = true; } nSel = conjS.length; for (i = 0; i < conjS.length; i++) { minDist = Double.POSITIVE_INFINITY; pos = 0; for (j = 0; j < conjS.length; j++) { if (i != j) { dist = KNN.distancia(conjS[i], conjS[j]); if (dist < minDist) { minDist = dist; pos = j; } } } if (clasesS[i] != clasesS[pos]) { if (clasesS[i] != posID) { if (marcas[i] == true) { marcas[i] = false; nSel--; } } else { if (marcas[pos] == true) { marcas[pos] = false; nSel--; } } } } /*Construction of the S set from the flags*/ conjS2 = new double[nSel][conjS[0].length]; clasesS2 = new int[nSel]; for (m = 0, l = 0; m < conjS.length; m++) { if (marcas[m]) { // the instance will evaluate for (j = 0; j < conjS[0].length; j++) { conjS2[l][j] = conjS[m][j]; } clasesS2[l] = clasesS[m]; l++; } } System.out.println( "CNN_TomekLinks " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); OutputIS.escribeSalida( ficheroSalida[0], conjS2, clasesS2, entradas, salida, nEntradas, relation); OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation); }
/** It runs the Qstatistic */ public void runAlgorithm() { int i, j, l, h; double conjS[][]; double conjR[][]; int conjN[][]; boolean conjM[][]; int clasesS[]; int nSel = 0; Chromosome poblacion[]; int ev = 0; Chromosome C[]; int baraje[]; int pos, tmp; Chromosome newPob[]; int d; int tamC; Chromosome pobTemp[]; int nPos = 0, nNeg = 0, posID, negID; double datosArt[][]; double realArt[][]; int nominalArt[][]; boolean nulosArt[][]; int clasesArt[]; int tamS; long tiempo = System.currentTimeMillis(); // Randomize.setSeed (semilla); posID = clasesTrain[0]; negID = -1; for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] != posID) { negID = clasesTrain[i]; break; } } /* Count of number of positive and negative examples */ for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == posID) nPos++; else nNeg++; } if (nPos > nNeg) { tmp = nPos; nPos = nNeg; nNeg = tmp; tmp = posID; posID = negID; negID = tmp; } else { /* * tmp = posID; posID = negID; negID = tmp; */ } if (hybrid.equalsIgnoreCase("smote + eus")) { if (balance) { tamS = 2 * nNeg; } else { tamS = nNeg + nPos + (int) (nPos * smoting); } datosArt = new double[tamS][datosTrain[0].length]; realArt = new double[tamS][datosTrain[0].length]; nominalArt = new int[tamS][datosTrain[0].length]; nulosArt = new boolean[tamS][datosTrain[0].length]; clasesArt = new int[tamS]; SMOTE( datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosArt, realArt, nominalArt, nulosArt, clasesArt, kSMOTE, ASMO, smoting, balance, nPos, posID, nNeg, negID, distanceEu); } else { datosArt = new double[datosTrain.length][datosTrain[0].length]; realArt = new double[datosTrain.length][datosTrain[0].length]; nominalArt = new int[datosTrain.length][datosTrain[0].length]; nulosArt = new boolean[datosTrain.length][datosTrain[0].length]; clasesArt = new int[clasesTrain.length]; for (i = 0; i < datosTrain.length; i++) { for (j = 0; j < datosTrain[i].length; j++) { datosArt[i][j] = datosTrain[i][j]; realArt[i][j] = realTrain[i][j]; nominalArt[i][j] = nominalTrain[i][j]; nulosArt[i][j] = nulosTrain[i][j]; } clasesArt[i] = clasesTrain[i]; } } /* Count of number of positive and negative examples */ nPos = nNeg = 0; for (i = 0; i < clasesArt.length; i++) { if (clasesArt[i] == posID) nPos++; else nNeg++; } if (majSelection) d = nNeg / 4; else d = datosArt.length / 4; /* Random initialization of the population */ poblacion = new Chromosome[popSize]; baraje = new int[popSize]; for (i = 0; i < popSize; i++) if (majSelection) poblacion[i] = new Chromosome(nNeg); else poblacion[i] = new Chromosome(datosArt.length); /* Initial evaluation of the population */ for (i = 0; i < popSize; i++) poblacion[i].evalua( datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosArt, realArt, nominalArt, nulosArt, clasesArt, wrapper, k, evMeas, majSelection, pFactor, P, posID, nPos, distanceEu, entradas, anteriores, salidasAnteriores); /* Until stop condition */ while (ev < nEval) { C = new Chromosome[popSize]; /* Selection(r) of C(t) from P(t) */ for (i = 0; i < popSize; i++) baraje[i] = i; for (i = 0; i < popSize; i++) { pos = Randomize.Randint(i, popSize - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } for (i = 0; i < popSize; i++) if (majSelection) C[i] = new Chromosome(nNeg, poblacion[baraje[i]]); else C[i] = new Chromosome(datosArt.length, poblacion[baraje[i]]); /* Structure recombination in C(t) constructing C'(t) */ tamC = recombinar(C, d, nNeg, nPos, majSelection); newPob = new Chromosome[tamC]; for (i = 0, l = 0; i < C.length; i++) { if (C[i].esValido()) { // the cromosome must be copied to the // new poblation C'(t) if (majSelection) newPob[l] = new Chromosome(nNeg, C[i]); else newPob[l] = new Chromosome(datosArt.length, C[i]); l++; } } /* Structure evaluation in C'(t) */ for (i = 0; i < newPob.length; i++) { newPob[i].evalua( datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosArt, realArt, nominalArt, nulosArt, clasesArt, wrapper, k, evMeas, majSelection, pFactor, P, posID, nPos, distanceEu, entradas, anteriores, salidasAnteriores); ev++; } /* Selection(s) of P(t) from C'(t) and P(t-1) */ Arrays.sort(poblacion); Arrays.sort(newPob); /* * If the best of C' is worse than the worst of P(t-1), then there * will no changes */ if (tamC == 0 || newPob[0].getCalidad() < poblacion[popSize - 1].getCalidad()) { d--; } else { pobTemp = new Chromosome[popSize]; for (i = 0, j = 0, l = 0; i < popSize && l < tamC; i++) { if (poblacion[j].getCalidad() > newPob[l].getCalidad()) { if (majSelection) pobTemp[i] = new Chromosome(nNeg, poblacion[j]); else pobTemp[i] = new Chromosome(datosArt.length, poblacion[j]); j++; } else { if (majSelection) pobTemp[i] = new Chromosome(nNeg, newPob[l]); else pobTemp[i] = new Chromosome(datosArt.length, newPob[l]); l++; } } if (l == tamC) { // there are cromosomes for copying for (; i < popSize; i++) { if (majSelection) pobTemp[i] = new Chromosome(nNeg, poblacion[j]); else pobTemp[i] = new Chromosome(datosArt.length, poblacion[j]); j++; } } poblacion = pobTemp; } /* Last step of the algorithm */ if (d <= 0) { for (i = 1; i < popSize; i++) { poblacion[i].divergeCHC(r, poblacion[0], prob0to1Div); } for (i = 0; i < popSize; i++) if (!(poblacion[i].estaEvaluado())) { poblacion[i].evalua( datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosArt, realArt, nominalArt, nulosArt, clasesArt, wrapper, k, evMeas, majSelection, pFactor, P, posID, nPos, distanceEu, entradas, anteriores, salidasAnteriores); ev++; } /* Reinicialization of d value */ if (majSelection) d = (int) (r * (1.0 - r) * (double) nNeg); else d = (int) (r * (1.0 - r) * (double) datosArt.length); } } Arrays.sort(poblacion); if (majSelection) { nSel = poblacion[0].genesActivos() + nPos; /* Construction of S set from the best cromosome */ conjS = new double[nSel][datosArt[0].length]; conjR = new double[nSel][datosArt[0].length]; conjN = new int[nSel][datosArt[0].length]; conjM = new boolean[nSel][datosArt[0].length]; clasesS = new int[nSel]; h = 0; for (i = 0, l = 0; i < nNeg; i++, h++) { for (; clasesArt[h] == posID && h < clasesArt.length; h++) ; if (poblacion[0].getGen(i)) { // the instance must be copied to // the solution for (j = 0; j < datosArt[h].length; j++) { conjS[l][j] = datosArt[h][j]; conjR[l][j] = realArt[h][j]; conjN[l][j] = nominalArt[h][j]; conjM[l][j] = nulosArt[h][j]; } clasesS[l] = clasesArt[h]; l++; } } for (i = 0; i < datosArt.length; i++) { if (clasesArt[i] == posID) { for (j = 0; j < datosArt[i].length; j++) { conjS[l][j] = datosArt[i][j]; conjR[l][j] = realArt[i][j]; conjN[l][j] = nominalArt[i][j]; conjM[l][j] = nulosArt[i][j]; } clasesS[l] = clasesArt[i]; l++; } } } else { nSel = poblacion[0].genesActivos(); /* Construction of S set from the best cromosome */ conjS = new double[nSel][datosArt[0].length]; conjR = new double[nSel][datosArt[0].length]; conjN = new int[nSel][datosArt[0].length]; conjM = new boolean[nSel][datosArt[0].length]; clasesS = new int[nSel]; for (i = 0, l = 0; i < datosArt.length; i++) { if (poblacion[0].getGen(i)) { // the instance must be copied to // the solution for (j = 0; j < datosArt[i].length; j++) { conjS[l][j] = datosArt[i][j]; conjR[l][j] = realArt[i][j]; conjN[l][j] = nominalArt[i][j]; conjM[l][j] = nulosArt[i][j]; } clasesS[l] = clasesArt[i]; l++; } } } if (hybrid.equalsIgnoreCase("eus + smote")) { nPos = nNeg = 0; for (i = 0; i < clasesS.length; i++) { if (clasesS[i] == posID) nPos++; else nNeg++; } if (nPos < nNeg) { if (balance) { tamS = 2 * nNeg; } else { tamS = nNeg + nPos + (int) (nPos * smoting); } datosArt = new double[tamS][datosTrain[0].length]; realArt = new double[tamS][datosTrain[0].length]; nominalArt = new int[tamS][datosTrain[0].length]; nulosArt = new boolean[tamS][datosTrain[0].length]; clasesArt = new int[tamS]; SMOTE( conjS, conjR, conjN, conjM, clasesS, datosArt, realArt, nominalArt, nulosArt, clasesArt, kSMOTE, ASMO, smoting, balance, nPos, posID, nNeg, negID, distanceEu); nSel = datosArt.length; /* Construction of S set from the best cromosome */ conjS = new double[nSel][datosArt[0].length]; conjR = new double[nSel][datosArt[0].length]; conjN = new int[nSel][datosArt[0].length]; conjM = new boolean[nSel][datosArt[0].length]; clasesS = new int[nSel]; for (i = 0; i < datosArt.length; i++) { for (j = 0; j < datosArt[i].length; j++) { conjS[i][j] = datosArt[i][j]; conjR[i][j] = realArt[i][j]; conjN[i][j] = nominalArt[i][j]; conjM[i][j] = nulosArt[i][j]; } clasesS[i] = clasesArt[i]; } } } /* * for (i = 0; i < poblacion.length; i++){ for (j = 0; j < * poblacion[0].cuerpo.length; j++){ * System.out.print((poblacion[i].cuerpo[j] ? 1 : 0)); } * System.out.println(" Calidad: " + poblacion[i].calidad); } */ best = poblacion[0].cuerpo.clone(); bestOutputs = poblacion[0].prediction.clone(); System.out.println( "QstatEUSCHC " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); OutputIS.escribeSalida( ficheroSalida[0], conjR, conjN, conjM, clasesS, entradas, salida, nEntradas, relation); // OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, // nEntradas, relation); }
public void ejecutar() { double conjS[][]; double conjR[][]; int conjN[][]; boolean conjM[][]; int clasesS[]; int S[]; /* Binary Vector, to decide if the instance will be included*/ int i, j, l, cont; int nClases; int tamS; int transformations; int claseObt[]; int clasePredominante; long tiempo = System.currentTimeMillis(); transformations = 0; /*Getting the number of different classes*/ nClases = 0; for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i]; nClases++; if (nClases < 2) { System.err.println("Input dataset is empty"); nClases = 0; } /*Algorithm body. First, S=TS. Then, for each instance of TS, the first step is to repeat the aplication of the k-nn, and then we decide if we need to change the label of the instance or we don't need it. */ /*Inicialization of the candidates set, S=X, where X is the original Training Set*/ S = new int[datosTrain.length]; for (i = 0; i < S.length; i++) S[i] = 1; /* All included*/ tamS = datosTrain.length; System.out.print("K= " + k + "\n"); System.out.print("K'= " + k2 + "\n"); for (i = 0; i < datosTrain.length; i++) { /* I need find the k-nn of i in X - {i}, so I make conjS without i*/ conjS = new double[datosTrain.length - 1][datosTrain[0].length]; conjR = new double[datosTrain.length - 1][datosTrain[0].length]; conjN = new int[datosTrain.length - 1][datosTrain[0].length]; conjM = new boolean[datosTrain.length - 1][datosTrain[0].length]; clasesS = new int[datosTrain.length - 1]; cont = 0; for (j = 0; j < datosTrain.length; j++) { if (i != j) { for (l = 0; l < datosTrain[0].length; l++) { conjS[cont][l] = datosTrain[j][l]; conjR[cont][l] = realTrain[j][l]; conjN[cont][l] = nominalTrain[j][l]; conjM[cont][l] = nulosTrain[j][l]; } clasesS[cont] = clasesTrain[j]; cont++; } } /*Do KNN to the instance*/ claseObt = KNN.evaluacionKNN3( k, conjS, conjR, conjN, conjM, clasesS, datosTrain[i], realTrain[i], nominalTrain[i], nulosTrain[i], nClases, distanceEu); /* System.out.print("Las clases de los k vecinos m�s cercanos son\n"); for(int m=0;m<k;m++){ System.out.print(claseObt[m]+ " "); } System.out.print("\n-----------------------------------------------\n"); */ /*Now, we must check that we have at least k2 neighboors with the same class. */ int max = 0; clasePredominante = 0; for (int m = 0; m < claseObt.length; m++) { int claseDeInstancia = claseObt[m]; // Select one class. int iguales = 0; for (j = 0; j < claseObt.length; j++) { // Check numbers of instances with this class if (j != m) { // I can't count the same. if (claseObt[j] == claseDeInstancia) { iguales++; } } } // I must check if there is another class with more instances. if (iguales > max) { max = iguales; clasePredominante = claseObt[m]; } } // System.out.print("max " + max +"\n"); // System.out.print("Clase Predominante: "+clasePredominante+"\n"); /* Max+1 = number of neighbours with the same class*/ if ((max) >= k2) { /* if there are at least k2 neighbour, we change the class in S, */ if (clasePredominante != clasesTrain[i]) transformations++; clasesTrain[i] = clasePredominante; S[i] = 1; } else { /* Discard.*/ tamS--; S[i] = 0; } } System.out.print("S size resultante= " + tamS + "\n"); System.out.print("Transformations = " + transformations + "\n"); /*Construction of the S set from the previous vector S*/ conjS = new double[tamS][datosTrain[0].length]; conjR = new double[tamS][datosTrain[0].length]; conjN = new int[tamS][datosTrain[0].length]; conjM = new boolean[tamS][datosTrain[0].length]; clasesS = new int[tamS]; cont = 0; /* To establish the sets' sizes */ for (j = 0; j < datosTrain.length; j++) { if (S[j] == 1) { /* Checking the instance is included*/ for (l = 0; l < datosTrain[0].length; l++) { conjS[cont][l] = datosTrain[j][l]; conjR[cont][l] = realTrain[j][l]; conjN[cont][l] = nominalTrain[j][l]; conjM[cont][l] = nulosTrain[j][l]; } clasesS[cont] = clasesTrain[j]; cont++; } } System.out.println( "Time elapse: " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); OutputIS.escribeSalida( ficheroSalida[0], conjR, conjN, conjM, clasesS, entradas, salida, nEntradas, relation); OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation); }
public void ejecutar() { int i, j, l; boolean marcas[]; boolean marcas2[]; boolean marcastmp[]; boolean incorrect[]; int nSel; double conjS[][]; double conjR[][]; int conjN[][]; boolean conjM[][]; int clasesS[]; Vector<Integer> vecinos[]; int next; int maxneigh; int pos; int borrado; int claseObt; int nClases; long tiempo = System.currentTimeMillis(); /*Getting the number of differents classes*/ nClases = 0; for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i]; nClases++; /*Inicialization of the flagged instances vector for a posterior copy*/ marcas = new boolean[datosTrain.length]; marcas2 = new boolean[datosTrain.length]; incorrect = new boolean[datosTrain.length]; marcastmp = new boolean[datosTrain.length]; Arrays.fill(marcas, true); Arrays.fill(marcas2, true); Arrays.fill(incorrect, false); Arrays.fill(marcastmp, true); vecinos = new Vector[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) vecinos[i] = new Vector<Integer>(); for (i = 0; i < datosTrain.length; i++) { next = nextNeighbour(marcas, datosTrain, i, vecinos[i]); for (j = 0; j < datosTrain.length; j++) marcastmp[j] = marcas[j]; while (next >= 0 && clasesTrain[next] == clasesTrain[i]) { vecinos[i].add(new Integer(next)); marcastmp[next] = false; next = nextNeighbour(marcastmp, datosTrain, i, vecinos[i]); } } maxneigh = vecinos[0].size(); pos = 0; for (i = 1; i < datosTrain.length; i++) { if (vecinos[i].size() > maxneigh) { maxneigh = vecinos[i].size(); pos = i; } } while (maxneigh > 0) { for (i = 0; i < vecinos[pos].size(); i++) { borrado = vecinos[pos].elementAt(i).intValue(); marcas[borrado] = false; for (j = 0; j < datosTrain.length; j++) { vecinos[j].removeElement(new Integer(borrado)); } vecinos[borrado].clear(); } vecinos[pos].clear(); maxneigh = vecinos[0].size(); pos = 0; for (i = 1; i < datosTrain.length; i++) { if (vecinos[i].size() > maxneigh) { maxneigh = vecinos[i].size(); pos = i; } } } /*Building of the S set from the flags*/ nSel = 0; for (i = 0; i < datosTrain.length; i++) if (marcas[i]) nSel++; conjS = new double[nSel][datosTrain[0].length]; conjR = new double[nSel][datosTrain[0].length]; conjN = new int[nSel][datosTrain[0].length]; conjM = new boolean[nSel][datosTrain[0].length]; clasesS = new int[nSel]; for (i = 0, l = 0; i < datosTrain.length; i++) { if (marcas[i]) { // the instance will be copied to the solution for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[i][j]; conjR[l][j] = realTrain[i][j]; conjN[l][j] = nominalTrain[i][j]; conjM[l][j] = nulosTrain[i][j]; } clasesS[l] = clasesTrain[i]; l++; } } for (i = 0; i < datosTrain.length; i++) { /*Apply 1-NN to the instance*/ claseObt = KNN.evaluacionKNN2( 1, conjS, conjR, conjN, conjM, clasesTrain, datosTrain[i], realTrain[i], nominalTrain[i], nulosTrain[i], nClases, true); if (claseObt != clasesTrain[i]) { incorrect[i] = true; } } for (i = 0; i < datosTrain.length; i++) vecinos[i] = new Vector<Integer>(); for (i = 0; i < datosTrain.length; i++) { if (incorrect[i]) { next = nextNeighbour(marcas2, datosTrain, i, vecinos[i]); for (j = 0; j < datosTrain.length; j++) marcastmp[j] = marcas2[j]; while (next >= 0 && clasesTrain[next] == clasesTrain[i]) { vecinos[i].add(new Integer(next)); marcastmp[next] = false; next = nextNeighbour(marcastmp, datosTrain, i, vecinos[i]); } } } maxneigh = vecinos[0].size(); pos = 0; for (i = 1; i < datosTrain.length; i++) { if (vecinos[i].size() > maxneigh) { maxneigh = vecinos[i].size(); pos = i; } } while (maxneigh > 0) { for (i = 0; i < vecinos[pos].size(); i++) { borrado = vecinos[pos].elementAt(i).intValue(); marcas2[borrado] = false; for (j = 0; j < datosTrain.length; j++) { vecinos[j].removeElement(new Integer(borrado)); } vecinos[borrado].clear(); } vecinos[pos].clear(); maxneigh = vecinos[0].size(); pos = 0; for (i = 1; i < datosTrain.length; i++) { if (vecinos[i].size() > maxneigh) { maxneigh = vecinos[i].size(); pos = i; } } } for (i = 0; i < marcas.length; i++) marcas[i] |= (marcas2[i] & incorrect[i]); /*Building of the S set from the flags*/ nSel = 0; for (i = 0; i < datosTrain.length; i++) if (marcas[i]) nSel++; conjS = new double[nSel][datosTrain[0].length]; conjR = new double[nSel][datosTrain[0].length]; conjN = new int[nSel][datosTrain[0].length]; conjM = new boolean[nSel][datosTrain[0].length]; clasesS = new int[nSel]; for (i = 0, l = 0; i < datosTrain.length; i++) { if (marcas[i]) { // the instance will be copied to the solution for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[i][j]; conjR[l][j] = realTrain[i][j]; conjN[l][j] = nominalTrain[i][j]; conjM[l][j] = nulosTrain[i][j]; } clasesS[l] = clasesTrain[i]; l++; } } System.out.println( "Reconsistent " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); // COn conjS me vale. int trainRealClass[][]; int trainPrediction[][]; trainRealClass = new int[datosTrain.length][1]; trainPrediction = new int[datosTrain.length][1]; // Working on training for (i = 0; i < datosTrain.length; i++) { trainRealClass[i][0] = clasesTrain[i]; trainPrediction[i][0] = KNN.evaluate(datosTrain[i], conjS, nClases, clasesS, 1); } KNN.writeOutput(ficheroSalida[0], trainRealClass, trainPrediction, entradas, salida, relation); // Working on test int realClass[][] = new int[datosTest.length][1]; int prediction[][] = new int[datosTest.length][1]; // Check time for (i = 0; i < realClass.length; i++) { realClass[i][0] = clasesTest[i]; prediction[i][0] = KNN.evaluate(datosTest[i], conjS, nClases, clasesS, 1); } KNN.writeOutput(ficheroSalida[1], realClass, prediction, entradas, salida, relation); }
/** Process the training and test files provided in the parameters file to the constructor. */ public void process() { // declarations double[] outputs; double[] outputs2; Instance neighbor; double dist, mean; int actual; Randomize rnd = new Randomize(); Instance ex; gCenter kmeans = null; int iterations = 0; double E; double prevE; int totalMissing = 0; boolean allMissing = true; rnd.setSeed(semilla); // PROCESS try { // Load in memory a dataset that contains a classification problem IS.readSet(input_train_name, true); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][nvariables]; // matrix with transformed data kmeans = new gCenter(K, ndatos, nvariables); timesSeen = new FreqList[nvariables]; mostCommon = new String[nvariables]; // first, we choose k 'means' randomly from all // instances totalMissing = 0; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); if (inst.existsAnyMissingValue()) totalMissing++; } if (totalMissing == ndatos) allMissing = true; else allMissing = false; for (int numMeans = 0; numMeans < K; numMeans++) { do { actual = (int) (ndatos * rnd.Rand()); ex = IS.getInstance(actual); } while (ex.existsAnyMissingValue() && !allMissing); kmeans.copyCenter(ex, numMeans); } // now, iterate adjusting clusters' centers and // instances to them prevE = 0; iterations = 0; do { for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); kmeans.setClusterOf(inst, i); } // set new centers kmeans.recalculateCenters(IS); // compute RMSE E = 0; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); E += kmeans.distance(inst, kmeans.getClusterOf(i)); } iterations++; // System.out.println(iterations+"\t"+E); if (Math.abs(prevE - E) == 0) iterations = maxIter; else prevE = E; } while (E > minError && iterations < maxIter); for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); in = 0; out = 0; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) { X[i][j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } out++; } } } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_train_name); /** ************************************************************************************ */ // does a test file associated exist? if (input_train_name.compareTo(input_test_name) != 0) { try { // Load in memory a dataset that contains a classification problem IStest.readSet(input_test_name, false); int in = 0; int out = 0; ndatos = IStest.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); for (int i = 0; i < ndatos; i++) { Instance inst = IStest.getInstance(i); in = 0; out = 0; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) { X[i][j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } out++; } } } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_test_name); } }
// Read the pattern file, and parse data into strings private void config_read(String fileParam) { File inputFile = new File(fileParam); if (inputFile == null || !inputFile.exists()) { System.out.println("parameter " + fileParam + " file doesn't exists!"); System.exit(-1); } // begin the configuration read from file try { FileReader file_reader = new FileReader(inputFile); BufferedReader buf_reader = new BufferedReader(file_reader); // FileWriter file_write = new FileWriter(outputFile); String line; do { line = buf_reader.readLine(); } while (line.length() == 0); // avoid empty lines for processing -> produce exec failure String out[] = line.split("algorithm = "); // alg_name = new String(out[1]); //catch the algorithm name // input & output filenames do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("inputData = "); out = out[1].split("\\s\""); input_train_name = new String(out[0].substring(1, out[0].length() - 1)); input_test_name = new String(out[1].substring(0, out[1].length() - 1)); if (input_test_name.charAt(input_test_name.length() - 1) == '"') input_test_name = input_test_name.substring(0, input_test_name.length() - 1); do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("outputData = "); out = out[1].split("\\s\""); output_train_name = new String(out[0].substring(1, out[0].length() - 1)); output_test_name = new String(out[1].substring(0, out[1].length() - 1)); if (output_test_name.charAt(output_test_name.length() - 1) == '"') output_test_name = output_test_name.substring(0, output_test_name.length() - 1); // parameters do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("seed = "); semilla = (new Long(out[1])).longValue(); // parse the string into a integer do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("k = "); K = (new Integer(out[1])).intValue(); // parse the string into a integer do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("error = "); minError = (new Double(out[1])).doubleValue(); // parse the string into a double do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("iterations = "); maxIter = (new Integer(out[1])).intValue(); // parse the string into a double file_reader.close(); } catch (IOException e) { System.out.println("IO exception = " + e); e.printStackTrace(); System.exit(-1); } }
public void ejecutar() { int i, j, l, m; double alfai; int nClases; int claseObt; boolean marcas[]; boolean notFound; int init; int clasSel[]; int baraje[]; int pos, tmp; String instanciasIN[]; String instanciasOUT[]; long tiempo = System.currentTimeMillis(); /* Getting the number of differents classes */ nClases = 0; for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i]; nClases++; /* Shuffle the train set */ baraje = new int[datosTrain.length]; Randomize.setSeed(semilla); for (i = 0; i < datosTrain.length; i++) baraje[i] = i; for (i = 0; i < datosTrain.length; i++) { pos = Randomize.Randint(i, datosTrain.length - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } /* * Inicialization of the flagged instaces vector for a posterior * elimination */ marcas = new boolean[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) marcas[i] = false; if (datosTrain.length > 0) { // marcas[baraje[0]] = true; //the first instance is included always nSel = n_p; if (nSel < nClases) nSel = nClases; } else { System.err.println("Input dataset is empty"); nSel = 0; } clasSel = new int[nClases]; System.out.print("Selecting initial neurons... "); // at least, there must be 1 neuron of each class at the beginning init = nClases; for (i = 0; i < nClases && i < datosTrain.length; i++) { pos = Randomize.Randint(0, datosTrain.length - 1); tmp = 0; while ((clasesTrain[pos] != i || marcas[pos]) && tmp < datosTrain.length) { pos = (pos + 1) % datosTrain.length; tmp++; } if (tmp < datosTrain.length) marcas[pos] = true; else init--; // clasSel[i] = i; } for (i = init; i < Math.min(nSel, datosTrain.length); i++) { tmp = 0; pos = Randomize.Randint(0, datosTrain.length - 1); while (marcas[pos]) { pos = (pos + 1) % datosTrain.length; tmp++; } // if(i<nClases){ // notFound = true; // do{ // for(j=i-1;j>=0 && notFound;j--){ // if(clasSel[j] == clasesTrain[pos]) // notFound = false; // } // if(!notFound) // pos = Randomize.Randint (0, datosTrain.length-1); // }while(!notFound); // } // clasSel[i] = clasesTrain[pos]; marcas[pos] = true; init++; } nSel = init; System.out.println("Initial neurons selected: " + nSel); /* Building of the S set from the flags */ conjS = new double[nSel][datosTrain[0].length]; clasesS = new int[nSel]; for (m = 0, l = 0; m < datosTrain.length; m++) { if (marcas[m]) { // the instance must be copied to the solution for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[m][j]; } clasesS[l] = clasesTrain[m]; l++; } } alfai = alpha; boolean change = true; /* Body of the LVQ algorithm. */ // Train the network for (int it = 0; it < T && change; it++) { change = false; alpha = alfai; for (i = 1; i < datosTrain.length; i++) { // search for the nearest neuron to training instance pos = NN(nSel, conjS, datosTrain[baraje[i]]); // nearest neuron labels correctly the class of training // instance? if (clasesS[pos] != clasesTrain[baraje[i]]) { // NO - repel // the neuron for (j = 0; j < conjS[pos].length; j++) { conjS[pos][j] = conjS[pos][j] - alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]); } change = true; } else { // YES - migrate the neuron towards the input vector for (j = 0; j < conjS[pos].length; j++) { conjS[pos][j] = conjS[pos][j] + alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]); } } alpha = nu * alpha; } // Shuffle again the training partition baraje = new int[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) baraje[i] = i; for (i = 0; i < datosTrain.length; i++) { pos = Randomize.Randint(i, datosTrain.length - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } } System.out.println( "LVQ " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); // Classify the train data set instanciasIN = new String[datosReferencia.length]; instanciasOUT = new String[datosReferencia.length]; for (i = 0; i < datosReferencia.length; i++) { /* Classify the instance selected in this iteration */ Attribute a = Attributes.getOutputAttribute(0); int tipo = a.getType(); claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosReferencia[i], nClases); if (tipo != Attribute.NOMINAL) { instanciasIN[i] = new String(String.valueOf(clasesReferencia[i])); instanciasOUT[i] = new String(String.valueOf(claseObt)); } else { instanciasIN[i] = new String(a.getNominalValue(clasesReferencia[i])); instanciasOUT[i] = new String(a.getNominalValue(claseObt)); } } escribeSalida( ficheroSalida[0], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation); // Classify the test data set normalizarTest(); instanciasIN = new String[datosTest.length]; instanciasOUT = new String[datosTest.length]; for (i = 0; i < datosTest.length; i++) { /* Classify the instance selected in this iteration */ Attribute a = Attributes.getOutputAttribute(0); int tipo = a.getType(); claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosTest[i], nClases); if (tipo != Attribute.NOMINAL) { instanciasIN[i] = new String(String.valueOf(clasesTest[i])); instanciasOUT[i] = new String(String.valueOf(claseObt)); } else { instanciasIN[i] = new String(a.getNominalValue(clasesTest[i])); instanciasOUT[i] = new String(a.getNominalValue(claseObt)); } } escribeSalida( ficheroSalida[1], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation); // Print the network to a file printNetworkToFile(ficheroSalida[2], referencia.getHeader()); }
/** Executes the algorithm */ public void ejecutar() { int i, j, l; int nClases; double conjS[][]; double conjR[][]; int conjN[][]; boolean conjM[][]; int clasesS[]; int nSel = 0; Cromosoma poblacion[]; int ev = 0; double prob[]; double NUmax = 1.5; double NUmin = 0.5; // used for lineal ranking double aux; double pos1, pos2; int sel1, sel2, comp1, comp2; Cromosoma newPob[]; long tiempo = System.currentTimeMillis(); /*Getting the number of different clases*/ nClases = 0; for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i]; nClases++; /*Random inicialization of the population*/ Randomize.setSeed(semilla); poblacion = new Cromosoma[tamPoblacion]; for (i = 0; i < tamPoblacion; i++) poblacion[i] = new Cromosoma(datosTrain.length); /*Initial evaluation of the population*/ for (i = 0; i < tamPoblacion; i++) poblacion[i].evalua( datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, alfa, kNeigh, nClases, distanceEu); if (torneo) { while (ev < nEval) { newPob = new Cromosoma[2]; /*Binary tournament selection*/ comp1 = Randomize.Randint(0, tamPoblacion - 1); do { comp2 = Randomize.Randint(0, tamPoblacion - 1); } while (comp2 == comp1); if (poblacion[comp1].getCalidad() > poblacion[comp2].getCalidad()) sel1 = comp1; else sel1 = comp2; comp1 = Randomize.Randint(0, tamPoblacion - 1); do { comp2 = Randomize.Randint(0, tamPoblacion - 1); } while (comp2 == comp1); if (poblacion[comp1].getCalidad() > poblacion[comp2].getCalidad()) sel2 = comp1; else sel2 = comp2; if (Randomize.Rand() < pCruce) { // there is cross crucePMX(poblacion, newPob, sel1, sel2); } else { // there is not cross newPob[0] = new Cromosoma(datosTrain.length, poblacion[sel1]); newPob[1] = new Cromosoma(datosTrain.length, poblacion[sel2]); } /*Mutation of the cromosomes*/ for (i = 0; i < 2; i++) newPob[i].mutacion(pMutacion1to0, pMutacion0to1); /*Evaluation of the population*/ for (i = 0; i < 2; i++) if (!(newPob[i].estaEvaluado())) { newPob[i].evalua( datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, alfa, kNeigh, nClases, distanceEu); ev++; } /*Replace the two worst*/ Arrays.sort(poblacion); poblacion[tamPoblacion - 2] = new Cromosoma(datosTrain.length, newPob[0]); poblacion[tamPoblacion - 1] = new Cromosoma(datosTrain.length, newPob[1]); } } else { /*Get the probabilities of lineal ranking in case of not use binary tournament*/ prob = new double[tamPoblacion]; for (i = 0; i < tamPoblacion; i++) { aux = (double) (NUmax - NUmin) * ((double) i / (tamPoblacion - 1)); prob[i] = (double) (1.0 / (tamPoblacion)) * (NUmax - aux); } for (i = 1; i < tamPoblacion; i++) prob[i] = prob[i] + prob[i - 1]; while (ev < nEval) { /*Sort the population by quality criterion*/ Arrays.sort(poblacion); newPob = new Cromosoma[2]; pos1 = Randomize.Rand(); pos2 = Randomize.Rand(); for (j = 0; j < tamPoblacion && prob[j] < pos1; j++) ; sel1 = j; for (j = 0; j < tamPoblacion && prob[j] < pos2; j++) ; sel2 = j; if (Randomize.Rand() < pCruce) { // there is cross crucePMX(poblacion, newPob, sel1, sel2); } else { // there is not cross newPob[0] = new Cromosoma(datosTrain.length, poblacion[sel1]); newPob[1] = new Cromosoma(datosTrain.length, poblacion[sel2]); } /*Mutation of the cromosomes*/ for (i = 0; i < 2; i++) newPob[i].mutacion(pMutacion1to0, pMutacion0to1); /*Evaluation of the population*/ for (i = 0; i < 2; i++) if (!(newPob[i].estaEvaluado())) { newPob[i].evalua( datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, alfa, kNeigh, nClases, distanceEu); ev++; } /*Replace the two worst*/ poblacion[tamPoblacion - 2] = new Cromosoma(datosTrain.length, newPob[0]); poblacion[tamPoblacion - 1] = new Cromosoma(datosTrain.length, newPob[1]); } } nSel = poblacion[0].genesActivos(); /*Building of S set from the best cromosome obtained*/ conjS = new double[nSel][datosTrain[0].length]; conjR = new double[nSel][datosTrain[0].length]; conjN = new int[nSel][datosTrain[0].length]; conjM = new boolean[nSel][datosTrain[0].length]; clasesS = new int[nSel]; for (i = 0, l = 0; i < datosTrain.length; i++) { if (poblacion[0].getGen(i)) { // the instance must be copied to the solution for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[i][j]; conjR[l][j] = realTrain[i][j]; conjN[l][j] = nominalTrain[i][j]; conjM[l][j] = nulosTrain[i][j]; } clasesS[l] = clasesTrain[i]; l++; } } System.out.println( "SGA " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); OutputIS.escribeSalida( ficheroSalida[0], conjR, conjN, conjM, clasesS, entradas, salida, nEntradas, relation); OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation); } // end-method