/** * Function to read the .dat file that contains the information of the dataset. * * @param name The reader object where the itemsets are readed. * @param train The flag if the file is for training */ public MyDataset(String name, boolean train) { try { // create the set of instances IS = new InstanceSet(); // Read the itemsets. IS.readSet(name, train); } catch (DatasetException e) { System.out.println("Error loading dataset instances"); e.printStackTrace(); System.exit(-1); } catch (HeaderFormatException e) { System.out.println("Error loading dataset instances"); e.printStackTrace(); System.exit(-1); } // Store Dataset file attributes readHeader(); itemsets = new Vector(IS.getNumInstances()); // read all the itemsets getItemsetFull(); totalCond = this.numAllConditions(); }
/** * It reads the whole input data-set and it stores each example and its associated output value in * local arrays to ease their use. * * @param datasetFile String name of the file containing the dataset * @param train boolean It must have the value "true" if we are reading the training data-set * @throws IOException If there ocurs any problem with the reading of the data-set */ public void readRegressionSet(String datasetFile, boolean train) throws IOException { try { // Load in memory a dataset that contains a regression problem IS.readSet(datasetFile, train); nData = IS.getNumInstances(); nInputs = Attributes.getInputNumAttributes(); nVars = nInputs + Attributes.getOutputNumAttributes(); // outputIntegerheck that there is only one output variable if (Attributes.getOutputNumAttributes() > 1) { System.out.println("This algorithm can not process MIMO datasets"); System.out.println("All outputs but the first one will be removed"); System.exit(1); } boolean noOutputs = false; if (Attributes.getOutputNumAttributes() < 1) { System.out.println("This algorithm can not process datasets without outputs"); System.out.println("Zero-valued output generated"); noOutputs = true; System.exit(1); } // Initialice and fill our own tables X = new double[nData][nInputs]; missing = new boolean[nData][nInputs]; outputInteger = new int[nData]; // Maximum and minimum of inputs emax = new double[nInputs]; emin = new double[nInputs]; for (int i = 0; i < nInputs; i++) { emax[i] = Attributes.getAttribute(i).getMaxAttribute(); emin[i] = Attributes.getAttribute(i).getMinAttribute(); } // All values are casted into double/integer nClasses = 0; for (int i = 0; i < nData; i++) { Instance inst = IS.getInstance(i); for (int j = 0; j < nInputs; j++) { X[i][j] = IS.getInputNumericValue(i, j); // inst.getInputRealValues(j); missing[i][j] = inst.getInputMissingValues(j); if (missing[i][j]) { X[i][j] = emin[j] - 1; } } if (noOutputs) { outputReal[i] = outputInteger[i] = 0; } else { outputReal[i] = IS.getOutputNumericValue(i, 0); outputInteger[i] = (int) outputReal[i]; } } } catch (Exception e) { System.out.println("DBG: Exception in readSet"); e.printStackTrace(); } computeStatistics(); }
/** * Main method for ABB, that explores the search space by pruning nodes and checking their * inconsistency ratio. */ private void runABB() { boolean[] root = startSolution(); System.arraycopy(root, 0, features, 0, root.length); abb(root); /* checks if a subset satisfies the condition (more than 0 selected features) */ if (features == null) { System.err.println("ERROR: It couldn't be possible to find any solution."); System.exit(0); } }
static void checkDataset() { Attribute[] outputs = Attributes.getOutputAttributes(); if (outputs.length != 1) { LogManager.printErr("Only datasets with one output are supported"); System.exit(1); } if (outputs[0].getType() != Attribute.NOMINAL) { LogManager.printErr("Output attribute should be nominal"); System.exit(1); } Parameters.numClasses = outputs[0].getNumNominalValues(); Parameters.numAttributes = Attributes.getInputAttributes().length; }
// Read the patron file, and parse data into strings private void config_read(String fileParam) { File inputFile = new File(fileParam); if (inputFile == null || !inputFile.exists()) { System.out.println("parameter " + fileParam + " file doesn't exists!"); System.exit(-1); } // begin the configuration read from file try { FileReader file_reader = new FileReader(inputFile); BufferedReader buf_reader = new BufferedReader(file_reader); // FileWriter file_write = new FileWriter(outputFile); String line; do { line = buf_reader.readLine(); } while (line.length() == 0); // avoid empty lines for processing -> produce exec failure String out[] = line.split("algorithm = "); // alg_name = new String(out[1]); //catch the algorithm name // input & output filenames do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("inputData = "); out = out[1].split("\\s\""); input_train_name = new String(out[0].substring(1, out[0].length() - 1)); input_test_name = new String(out[1].substring(0, out[1].length() - 1)); if (input_test_name.charAt(input_test_name.length() - 1) == '"') input_test_name = input_test_name.substring(0, input_test_name.length() - 1); do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("outputData = "); out = out[1].split("\\s\""); output_train_name = new String(out[0].substring(1, out[0].length() - 1)); output_test_name = new String(out[1].substring(0, out[1].length() - 1)); if (output_test_name.charAt(output_test_name.length() - 1) == '"') output_test_name = output_test_name.substring(0, output_test_name.length() - 1); file_reader.close(); } catch (IOException e) { System.out.println("IO exception = " + e); e.printStackTrace(); System.exit(-1); } }
private void normalizarTest() { int i, j, cont = 0, k; Instance temp; boolean hecho; double caja[]; StringTokenizer tokens; boolean nulls[]; /* Check if dataset corresponding with a classification problem */ if (Attributes.getOutputNumAttributes() < 1) { System.err.println( "This dataset haven´t outputs, so it not corresponding to a classification problem."); System.exit(-1); } else if (Attributes.getOutputNumAttributes() > 1) { System.err.println("This dataset have more of one output."); System.exit(-1); } if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) { System.err.println( "This dataset have an input attribute with floating values, so it not corresponding to a classification problem."); System.exit(-1); } datosTest = new double[test.getNumInstances()][Attributes.getInputNumAttributes()]; clasesTest = new int[test.getNumInstances()]; caja = new double[1]; for (i = 0; i < test.getNumInstances(); i++) { temp = test.getInstance(i); nulls = temp.getInputMissingValues(); datosTest[i] = test.getInstance(i).getAllInputValues(); for (j = 0; j < nulls.length; j++) if (nulls[j]) datosTest[i][j] = 0.0; caja = test.getInstance(i).getAllOutputValues(); clasesTest[i] = (int) caja[0]; for (k = 0; k < datosTest[i].length; k++) { if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) { datosTest[i][k] /= Attributes.getInputAttribute(k).getNominalValuesList().size() - 1; } else { datosTest[i][k] -= Attributes.getInputAttribute(k).getMinAttribute(); datosTest[i][k] /= Attributes.getInputAttribute(k).getMaxAttribute() - Attributes.getInputAttribute(k).getMinAttribute(); } } } }
public LVQ(String ficheroScript) { super(ficheroScript); try { referencia = new InstanceSet(); referencia.readSet(ficheroReferencia, false); /*Normalize the data*/ normalizarReferencia(); } catch (Exception e) { System.err.println(e); System.exit(1); } }
/** * Constructor of the Class Parametros * * @param nombreFileParametros is the pathname of input parameter file */ Parametros(String nombreFileParametros) { try { int i; String fichero, linea, tok; StringTokenizer lineasFile, tokens; /* read the parameter file using Files class */ fichero = Files.readFile(nombreFileParametros); fichero += "\n"; /* remove all \r characters. it is neccesary for a correst use in Windows and UNIX */ fichero = fichero.replace('\r', ' '); /* extracts the differents tokens of the file */ lineasFile = new StringTokenizer(fichero, "\n"); i = 0; while (lineasFile.hasMoreTokens()) { linea = lineasFile.nextToken(); i++; tokens = new StringTokenizer(linea, " ,\t"); if (tokens.hasMoreTokens()) { tok = tokens.nextToken(); if (tok.equalsIgnoreCase("algorithm")) nameAlgorithm = getParamString(tokens); else if (tok.equalsIgnoreCase("inputdata")) getInputFiles(tokens); else if (tok.equalsIgnoreCase("outputdata")) getOutputFiles(tokens); else if (tok.equalsIgnoreCase("seed")) seed = getParamLong(tokens); else throw new java.io.IOException("Syntax error on line " + i + ": [" + tok + "]\n"); } } } catch (java.io.FileNotFoundException e) { System.err.println(e + "Parameter file"); } catch (java.io.IOException e) { System.err.println(e + "Aborting program"); System.exit(-1); } /** show the read parameter in the standard output */ String contents = "-- Parameters echo --- \n"; contents += "Algorithm name: " + nameAlgorithm + "\n"; contents += "Input Train File: " + trainFileNameInput + "\n"; contents += "Input Test File: " + testFileNameInput + "\n"; contents += "Output Train File: " + trainFileNameOutput + "\n"; contents += "Output Test File: " + testFileNameOutput + "\n"; System.out.println(contents); }
/** * Removes one feature at a time, starting from the furthest on the right * * @param featuresVector solution to generate its neighbor * @param which number of the feature to remove starting from the right * @return next neighbor of the given solution with one less feature */ private static boolean[] removeOne(boolean featuresVector[], int which) { boolean[] fv = new boolean[featuresVector.length]; System.arraycopy(featuresVector, 0, fv, 0, fv.length); boolean stop = false; int count = 0; for (int i = fv.length - 1; i >= 0 && !stop; i--) { if (fv[i]) { count++; if (count == which) { fv[i] = false; stop = true; } } } return fv; }
/** @param args the command line arguments */ public static void main(String[] args) { ParserParameters.doParse(args[0]); LogManager.initLogManager(); InstanceSet is = new InstanceSet(); try { is.readSet(Parameters.trainInputFile, true); } catch (Exception e) { LogManager.printErr(e.toString()); System.exit(1); } checkDataset(); Discretizer dis; String name = Parameters.algorithmName; dis = new FayyadDiscretizer(); dis.buildCutPoints(is); dis.applyDiscretization(Parameters.trainInputFile, Parameters.trainOutputFile); dis.applyDiscretization(Parameters.testInputFile, Parameters.testOutputFile); LogManager.closeLog(); }
/** Recursive method for ABB */ private void abb(boolean feat[]) { boolean[] child; double measure; threshold = data.measureIEP(feat); for (int i = 0; i < cardinalidadCto(feat); i++) { child = removeOne(feat, i); measure = data.measureIEP(child); if (legitimate(child) && measure < threshold) { if (measure < data.measureIEP(features)) { // we keep the best found in 'features' System.arraycopy(child, 0, features, 0, child.length); } abb(child); } else { // we prune this node pruned.add(child); } } }
// Write data matrix X to disk, in KEEL format private void write_results(String output) { // File OutputFile = new File(output_train_name.substring(1, output_train_name.length()-1)); try { FileWriter file_write = new FileWriter(output); file_write.write(IS.getHeader()); // now, print the normalized data file_write.write("@data\n"); for (int i = 0; i < ndatos; i++) { file_write.write(X[i][0]); for (int j = 1; j < nvariables; j++) { file_write.write("," + X[i][j]); } file_write.write("\n"); } file_write.close(); } catch (IOException e) { System.out.println("IO exception = " + e); System.exit(-1); } }
public void ejecutar() { int i, j, l, m; double alfai; int nClases; int claseObt; boolean marcas[]; boolean notFound; int init; int clasSel[]; int baraje[]; int pos, tmp; String instanciasIN[]; String instanciasOUT[]; long tiempo = System.currentTimeMillis(); /* Getting the number of differents classes */ nClases = 0; for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i]; nClases++; /* Shuffle the train set */ baraje = new int[datosTrain.length]; Randomize.setSeed(semilla); for (i = 0; i < datosTrain.length; i++) baraje[i] = i; for (i = 0; i < datosTrain.length; i++) { pos = Randomize.Randint(i, datosTrain.length - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } /* * Inicialization of the flagged instaces vector for a posterior * elimination */ marcas = new boolean[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) marcas[i] = false; if (datosTrain.length > 0) { // marcas[baraje[0]] = true; //the first instance is included always nSel = n_p; if (nSel < nClases) nSel = nClases; } else { System.err.println("Input dataset is empty"); nSel = 0; } clasSel = new int[nClases]; System.out.print("Selecting initial neurons... "); // at least, there must be 1 neuron of each class at the beginning init = nClases; for (i = 0; i < nClases && i < datosTrain.length; i++) { pos = Randomize.Randint(0, datosTrain.length - 1); tmp = 0; while ((clasesTrain[pos] != i || marcas[pos]) && tmp < datosTrain.length) { pos = (pos + 1) % datosTrain.length; tmp++; } if (tmp < datosTrain.length) marcas[pos] = true; else init--; // clasSel[i] = i; } for (i = init; i < Math.min(nSel, datosTrain.length); i++) { tmp = 0; pos = Randomize.Randint(0, datosTrain.length - 1); while (marcas[pos]) { pos = (pos + 1) % datosTrain.length; tmp++; } // if(i<nClases){ // notFound = true; // do{ // for(j=i-1;j>=0 && notFound;j--){ // if(clasSel[j] == clasesTrain[pos]) // notFound = false; // } // if(!notFound) // pos = Randomize.Randint (0, datosTrain.length-1); // }while(!notFound); // } // clasSel[i] = clasesTrain[pos]; marcas[pos] = true; init++; } nSel = init; System.out.println("Initial neurons selected: " + nSel); /* Building of the S set from the flags */ conjS = new double[nSel][datosTrain[0].length]; clasesS = new int[nSel]; for (m = 0, l = 0; m < datosTrain.length; m++) { if (marcas[m]) { // the instance must be copied to the solution for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[m][j]; } clasesS[l] = clasesTrain[m]; l++; } } alfai = alpha; boolean change = true; /* Body of the LVQ algorithm. */ // Train the network for (int it = 0; it < T && change; it++) { change = false; alpha = alfai; for (i = 1; i < datosTrain.length; i++) { // search for the nearest neuron to training instance pos = NN(nSel, conjS, datosTrain[baraje[i]]); // nearest neuron labels correctly the class of training // instance? if (clasesS[pos] != clasesTrain[baraje[i]]) { // NO - repel // the neuron for (j = 0; j < conjS[pos].length; j++) { conjS[pos][j] = conjS[pos][j] - alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]); } change = true; } else { // YES - migrate the neuron towards the input vector for (j = 0; j < conjS[pos].length; j++) { conjS[pos][j] = conjS[pos][j] + alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]); } } alpha = nu * alpha; } // Shuffle again the training partition baraje = new int[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) baraje[i] = i; for (i = 0; i < datosTrain.length; i++) { pos = Randomize.Randint(i, datosTrain.length - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } } System.out.println( "LVQ " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); // Classify the train data set instanciasIN = new String[datosReferencia.length]; instanciasOUT = new String[datosReferencia.length]; for (i = 0; i < datosReferencia.length; i++) { /* Classify the instance selected in this iteration */ Attribute a = Attributes.getOutputAttribute(0); int tipo = a.getType(); claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosReferencia[i], nClases); if (tipo != Attribute.NOMINAL) { instanciasIN[i] = new String(String.valueOf(clasesReferencia[i])); instanciasOUT[i] = new String(String.valueOf(claseObt)); } else { instanciasIN[i] = new String(a.getNominalValue(clasesReferencia[i])); instanciasOUT[i] = new String(a.getNominalValue(claseObt)); } } escribeSalida( ficheroSalida[0], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation); // Classify the test data set normalizarTest(); instanciasIN = new String[datosTest.length]; instanciasOUT = new String[datosTest.length]; for (i = 0; i < datosTest.length; i++) { /* Classify the instance selected in this iteration */ Attribute a = Attributes.getOutputAttribute(0); int tipo = a.getType(); claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosTest[i], nClases); if (tipo != Attribute.NOMINAL) { instanciasIN[i] = new String(String.valueOf(clasesTest[i])); instanciasOUT[i] = new String(String.valueOf(claseObt)); } else { instanciasIN[i] = new String(a.getNominalValue(clasesTest[i])); instanciasOUT[i] = new String(a.getNominalValue(claseObt)); } } escribeSalida( ficheroSalida[1], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation); // Print the network to a file printNetworkToFile(ficheroSalida[2], referencia.getHeader()); }
/** Process the training and test files provided in the parameters file to the constructor. */ public void process() { double[] outputs; double[] outputs2; Instance neighbor; double dist, mean; int actual; int[] N = new int[nneigh]; double[] Ndist = new double[nneigh]; boolean allNull; svm_problem SVMp = null; svm_parameter SVMparam = new svm_parameter(); svm_model svr = null; svm_node SVMn[]; double[] outputsCandidate = null; boolean same = true; Vector instancesSelected = new Vector(); Vector instancesSelected2 = new Vector(); // SVM PARAMETERS SVMparam.C = C; SVMparam.cache_size = 10; // 10MB of cache SVMparam.degree = degree; SVMparam.eps = eps; SVMparam.gamma = gamma; SVMparam.nr_weight = 0; SVMparam.nu = nu; SVMparam.p = p; SVMparam.shrinking = shrinking; SVMparam.probability = 0; if (kernelType.compareTo("LINEAR") == 0) { SVMparam.kernel_type = svm_parameter.LINEAR; } else if (kernelType.compareTo("POLY") == 0) { SVMparam.kernel_type = svm_parameter.POLY; } else if (kernelType.compareTo("RBF") == 0) { SVMparam.kernel_type = svm_parameter.RBF; } else if (kernelType.compareTo("SIGMOID") == 0) { SVMparam.kernel_type = svm_parameter.SIGMOID; } SVMparam.svm_type = svm_parameter.EPSILON_SVR; try { // Load in memory a dataset that contains a classification problem IS.readSet(input_train_name, true); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][2]; // matrix with transformed data mostCommon = new String[nvariables]; SVMp = new svm_problem(); SVMp.l = ndatos; SVMp.y = new double[SVMp.l]; SVMp.x = new svm_node[SVMp.l][nentradas + 1]; for (int l = 0; l < SVMp.l; l++) { for (int n = 0; n < Attributes.getInputNumAttributes() + 1; n++) { SVMp.x[l][n] = new svm_node(); } } for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); SVMp.y[i] = inst.getAllOutputValues()[0]; for (int n = 0; n < Attributes.getInputNumAttributes(); n++) { SVMp.x[i][n].index = n; SVMp.x[i][n].value = inst.getAllInputValues()[n]; SVMp.y[i] = inst.getAllOutputValues()[0]; } // end of instance SVMp.x[i][nentradas].index = -1; } if (svm.svm_check_parameter(SVMp, SVMparam) != null) { System.out.println("SVM parameter error in training:"); System.out.println(svm.svm_check_parameter(SVMp, SVMparam)); System.exit(-1); } // train the SVM if (ndatos > 0) { svr = svm.svm_train(SVMp, SVMparam); } for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); X[i][0] = new String(String.valueOf(inst.getAllOutputValues()[0])); // the values used for regression SVMn = new svm_node[Attributes.getInputNumAttributes() + 1]; for (int n = 0; n < Attributes.getInputNumAttributes(); n++) { SVMn[n] = new svm_node(); SVMn[n].index = n; SVMn[n].value = inst.getAllInputValues()[n]; } SVMn[nentradas] = new svm_node(); SVMn[nentradas].index = -1; // pedict the class X[i][1] = new String(String.valueOf((svm.svm_predict(svr, SVMn)))); } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_train_name); /** ************************************************************************************ */ try { // Load in memory a dataset that contains a classification // problem IS.readSet(input_test_name, false); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][2]; // matrix with transformed data // data mostCommon = new String[nvariables]; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); X[i][0] = new String(String.valueOf(inst.getAllOutputValues()[0])); SVMn = new svm_node[Attributes.getInputNumAttributes() + 1]; for (int n = 0; n < Attributes.getInputNumAttributes(); n++) { SVMn[n] = new svm_node(); SVMn[n].index = n; SVMn[n].value = inst.getAllInputValues()[n]; } SVMn[nentradas] = new svm_node(); SVMn[nentradas].index = -1; // pedict the class X[i][1] = new String(String.valueOf(svm.svm_predict(svr, SVMn))); } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } System.out.println("escribiendo test"); write_results(output_test_name); }
/** * The main method of the class that includes the operations of the algorithm. It includes all the * operations that the algorithm has and finishes when it writes the output information into * files. */ public void run() { int S[]; int i, j, l, m; int nPos = 0, nNeg = 0; int posID; int nClases; int pos; int baraje[]; int tmp; double conjS[][]; int clasesS[]; int tamS = 0; int claseObt; int cont; int busq; boolean marcas[]; int nSel; double conjS2[][]; int clasesS2[]; double minDist, dist; long tiempo = System.currentTimeMillis(); /*CNN PART*/ /*Count of number of positive and negative examples*/ for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == 0) nPos++; else nNeg++; } if (nPos > nNeg) { tmp = nPos; nPos = nNeg; nNeg = tmp; posID = 1; } else { posID = 0; } /*Inicialization of the candidates set*/ S = new int[datosTrain.length]; for (i = 0; i < S.length; i++) S[i] = Integer.MAX_VALUE; /*Inserting an element of mayority class*/ Randomize.setSeed(semilla); pos = Randomize.Randint(0, clasesTrain.length - 1); while (clasesTrain[pos] == posID) pos = (pos + 1) % clasesTrain.length; S[tamS] = pos; tamS++; /*Insert all subset of minority class*/ for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == posID) { S[tamS] = i; tamS++; } } /*Algorithm body. We resort randomly the instances of T and compare with the rest of S. If an instance doesn´t classified correctly, it is inserted in S*/ baraje = new int[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) baraje[i] = i; for (i = 0; i < datosTrain.length; i++) { pos = Randomize.Randint(i, clasesTrain.length - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } for (i = 0; i < datosTrain.length; i++) { if (clasesTrain[i] != posID) { // only for mayority class instances /*Construction of the S set from the previous vector S*/ conjS = new double[tamS][datosTrain[0].length]; clasesS = new int[tamS]; for (j = 0; j < tamS; j++) { for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[S[j]][l]; clasesS[j] = clasesTrain[S[j]]; } /*Do KNN to the instance*/ claseObt = KNN.evaluacionKNN(k, conjS, clasesS, datosTrain[baraje[i]], 2); if (claseObt != clasesTrain[baraje[i]]) { // fail in the class, it is included in S Arrays.sort(S); busq = Arrays.binarySearch(S, baraje[i]); if (busq < 0) { S[tamS] = baraje[i]; tamS++; } } } } /*Construction of the S set from the previous vector S*/ conjS = new double[tamS][datosTrain[0].length]; clasesS = new int[tamS]; for (j = 0; j < tamS; j++) { for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[S[j]][l]; clasesS[j] = clasesTrain[S[j]]; } /*TOMEK LINKS PART*/ /*Inicialization of the instance flagged vector of the S set*/ marcas = new boolean[conjS.length]; for (i = 0; i < conjS.length; i++) { marcas[i] = true; } nSel = conjS.length; for (i = 0; i < conjS.length; i++) { minDist = Double.POSITIVE_INFINITY; pos = 0; for (j = 0; j < conjS.length; j++) { if (i != j) { dist = KNN.distancia(conjS[i], conjS[j]); if (dist < minDist) { minDist = dist; pos = j; } } } if (clasesS[i] != clasesS[pos]) { if (clasesS[i] != posID) { if (marcas[i] == true) { marcas[i] = false; nSel--; } } else { if (marcas[pos] == true) { marcas[pos] = false; nSel--; } } } } /*Construction of the S set from the flags*/ conjS2 = new double[nSel][conjS[0].length]; clasesS2 = new int[nSel]; for (m = 0, l = 0; m < conjS.length; m++) { if (marcas[m]) { // the instance will evaluate for (j = 0; j < conjS[0].length; j++) { conjS2[l][j] = conjS[m][j]; } clasesS2[l] = clasesS[m]; l++; } } System.out.println( "CNN_TomekLinks " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); OutputIS.escribeSalida( ficheroSalida[0], conjS2, clasesS2, entradas, salida, nEntradas, relation); OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation); }
/** It runs the Qstatistic */ public void runAlgorithm() { int i, j, l, h; double conjS[][]; double conjR[][]; int conjN[][]; boolean conjM[][]; int clasesS[]; int nSel = 0; Chromosome poblacion[]; int ev = 0; Chromosome C[]; int baraje[]; int pos, tmp; Chromosome newPob[]; int d; int tamC; Chromosome pobTemp[]; int nPos = 0, nNeg = 0, posID, negID; double datosArt[][]; double realArt[][]; int nominalArt[][]; boolean nulosArt[][]; int clasesArt[]; int tamS; long tiempo = System.currentTimeMillis(); // Randomize.setSeed (semilla); posID = clasesTrain[0]; negID = -1; for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] != posID) { negID = clasesTrain[i]; break; } } /* Count of number of positive and negative examples */ for (i = 0; i < clasesTrain.length; i++) { if (clasesTrain[i] == posID) nPos++; else nNeg++; } if (nPos > nNeg) { tmp = nPos; nPos = nNeg; nNeg = tmp; tmp = posID; posID = negID; negID = tmp; } else { /* * tmp = posID; posID = negID; negID = tmp; */ } if (hybrid.equalsIgnoreCase("smote + eus")) { if (balance) { tamS = 2 * nNeg; } else { tamS = nNeg + nPos + (int) (nPos * smoting); } datosArt = new double[tamS][datosTrain[0].length]; realArt = new double[tamS][datosTrain[0].length]; nominalArt = new int[tamS][datosTrain[0].length]; nulosArt = new boolean[tamS][datosTrain[0].length]; clasesArt = new int[tamS]; SMOTE( datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosArt, realArt, nominalArt, nulosArt, clasesArt, kSMOTE, ASMO, smoting, balance, nPos, posID, nNeg, negID, distanceEu); } else { datosArt = new double[datosTrain.length][datosTrain[0].length]; realArt = new double[datosTrain.length][datosTrain[0].length]; nominalArt = new int[datosTrain.length][datosTrain[0].length]; nulosArt = new boolean[datosTrain.length][datosTrain[0].length]; clasesArt = new int[clasesTrain.length]; for (i = 0; i < datosTrain.length; i++) { for (j = 0; j < datosTrain[i].length; j++) { datosArt[i][j] = datosTrain[i][j]; realArt[i][j] = realTrain[i][j]; nominalArt[i][j] = nominalTrain[i][j]; nulosArt[i][j] = nulosTrain[i][j]; } clasesArt[i] = clasesTrain[i]; } } /* Count of number of positive and negative examples */ nPos = nNeg = 0; for (i = 0; i < clasesArt.length; i++) { if (clasesArt[i] == posID) nPos++; else nNeg++; } if (majSelection) d = nNeg / 4; else d = datosArt.length / 4; /* Random initialization of the population */ poblacion = new Chromosome[popSize]; baraje = new int[popSize]; for (i = 0; i < popSize; i++) if (majSelection) poblacion[i] = new Chromosome(nNeg); else poblacion[i] = new Chromosome(datosArt.length); /* Initial evaluation of the population */ for (i = 0; i < popSize; i++) poblacion[i].evalua( datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosArt, realArt, nominalArt, nulosArt, clasesArt, wrapper, k, evMeas, majSelection, pFactor, P, posID, nPos, distanceEu, entradas, anteriores, salidasAnteriores); /* Until stop condition */ while (ev < nEval) { C = new Chromosome[popSize]; /* Selection(r) of C(t) from P(t) */ for (i = 0; i < popSize; i++) baraje[i] = i; for (i = 0; i < popSize; i++) { pos = Randomize.Randint(i, popSize - 1); tmp = baraje[i]; baraje[i] = baraje[pos]; baraje[pos] = tmp; } for (i = 0; i < popSize; i++) if (majSelection) C[i] = new Chromosome(nNeg, poblacion[baraje[i]]); else C[i] = new Chromosome(datosArt.length, poblacion[baraje[i]]); /* Structure recombination in C(t) constructing C'(t) */ tamC = recombinar(C, d, nNeg, nPos, majSelection); newPob = new Chromosome[tamC]; for (i = 0, l = 0; i < C.length; i++) { if (C[i].esValido()) { // the cromosome must be copied to the // new poblation C'(t) if (majSelection) newPob[l] = new Chromosome(nNeg, C[i]); else newPob[l] = new Chromosome(datosArt.length, C[i]); l++; } } /* Structure evaluation in C'(t) */ for (i = 0; i < newPob.length; i++) { newPob[i].evalua( datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosArt, realArt, nominalArt, nulosArt, clasesArt, wrapper, k, evMeas, majSelection, pFactor, P, posID, nPos, distanceEu, entradas, anteriores, salidasAnteriores); ev++; } /* Selection(s) of P(t) from C'(t) and P(t-1) */ Arrays.sort(poblacion); Arrays.sort(newPob); /* * If the best of C' is worse than the worst of P(t-1), then there * will no changes */ if (tamC == 0 || newPob[0].getCalidad() < poblacion[popSize - 1].getCalidad()) { d--; } else { pobTemp = new Chromosome[popSize]; for (i = 0, j = 0, l = 0; i < popSize && l < tamC; i++) { if (poblacion[j].getCalidad() > newPob[l].getCalidad()) { if (majSelection) pobTemp[i] = new Chromosome(nNeg, poblacion[j]); else pobTemp[i] = new Chromosome(datosArt.length, poblacion[j]); j++; } else { if (majSelection) pobTemp[i] = new Chromosome(nNeg, newPob[l]); else pobTemp[i] = new Chromosome(datosArt.length, newPob[l]); l++; } } if (l == tamC) { // there are cromosomes for copying for (; i < popSize; i++) { if (majSelection) pobTemp[i] = new Chromosome(nNeg, poblacion[j]); else pobTemp[i] = new Chromosome(datosArt.length, poblacion[j]); j++; } } poblacion = pobTemp; } /* Last step of the algorithm */ if (d <= 0) { for (i = 1; i < popSize; i++) { poblacion[i].divergeCHC(r, poblacion[0], prob0to1Div); } for (i = 0; i < popSize; i++) if (!(poblacion[i].estaEvaluado())) { poblacion[i].evalua( datosTrain, realTrain, nominalTrain, nulosTrain, clasesTrain, datosArt, realArt, nominalArt, nulosArt, clasesArt, wrapper, k, evMeas, majSelection, pFactor, P, posID, nPos, distanceEu, entradas, anteriores, salidasAnteriores); ev++; } /* Reinicialization of d value */ if (majSelection) d = (int) (r * (1.0 - r) * (double) nNeg); else d = (int) (r * (1.0 - r) * (double) datosArt.length); } } Arrays.sort(poblacion); if (majSelection) { nSel = poblacion[0].genesActivos() + nPos; /* Construction of S set from the best cromosome */ conjS = new double[nSel][datosArt[0].length]; conjR = new double[nSel][datosArt[0].length]; conjN = new int[nSel][datosArt[0].length]; conjM = new boolean[nSel][datosArt[0].length]; clasesS = new int[nSel]; h = 0; for (i = 0, l = 0; i < nNeg; i++, h++) { for (; clasesArt[h] == posID && h < clasesArt.length; h++) ; if (poblacion[0].getGen(i)) { // the instance must be copied to // the solution for (j = 0; j < datosArt[h].length; j++) { conjS[l][j] = datosArt[h][j]; conjR[l][j] = realArt[h][j]; conjN[l][j] = nominalArt[h][j]; conjM[l][j] = nulosArt[h][j]; } clasesS[l] = clasesArt[h]; l++; } } for (i = 0; i < datosArt.length; i++) { if (clasesArt[i] == posID) { for (j = 0; j < datosArt[i].length; j++) { conjS[l][j] = datosArt[i][j]; conjR[l][j] = realArt[i][j]; conjN[l][j] = nominalArt[i][j]; conjM[l][j] = nulosArt[i][j]; } clasesS[l] = clasesArt[i]; l++; } } } else { nSel = poblacion[0].genesActivos(); /* Construction of S set from the best cromosome */ conjS = new double[nSel][datosArt[0].length]; conjR = new double[nSel][datosArt[0].length]; conjN = new int[nSel][datosArt[0].length]; conjM = new boolean[nSel][datosArt[0].length]; clasesS = new int[nSel]; for (i = 0, l = 0; i < datosArt.length; i++) { if (poblacion[0].getGen(i)) { // the instance must be copied to // the solution for (j = 0; j < datosArt[i].length; j++) { conjS[l][j] = datosArt[i][j]; conjR[l][j] = realArt[i][j]; conjN[l][j] = nominalArt[i][j]; conjM[l][j] = nulosArt[i][j]; } clasesS[l] = clasesArt[i]; l++; } } } if (hybrid.equalsIgnoreCase("eus + smote")) { nPos = nNeg = 0; for (i = 0; i < clasesS.length; i++) { if (clasesS[i] == posID) nPos++; else nNeg++; } if (nPos < nNeg) { if (balance) { tamS = 2 * nNeg; } else { tamS = nNeg + nPos + (int) (nPos * smoting); } datosArt = new double[tamS][datosTrain[0].length]; realArt = new double[tamS][datosTrain[0].length]; nominalArt = new int[tamS][datosTrain[0].length]; nulosArt = new boolean[tamS][datosTrain[0].length]; clasesArt = new int[tamS]; SMOTE( conjS, conjR, conjN, conjM, clasesS, datosArt, realArt, nominalArt, nulosArt, clasesArt, kSMOTE, ASMO, smoting, balance, nPos, posID, nNeg, negID, distanceEu); nSel = datosArt.length; /* Construction of S set from the best cromosome */ conjS = new double[nSel][datosArt[0].length]; conjR = new double[nSel][datosArt[0].length]; conjN = new int[nSel][datosArt[0].length]; conjM = new boolean[nSel][datosArt[0].length]; clasesS = new int[nSel]; for (i = 0; i < datosArt.length; i++) { for (j = 0; j < datosArt[i].length; j++) { conjS[i][j] = datosArt[i][j]; conjR[i][j] = realArt[i][j]; conjN[i][j] = nominalArt[i][j]; conjM[i][j] = nulosArt[i][j]; } clasesS[i] = clasesArt[i]; } } } /* * for (i = 0; i < poblacion.length; i++){ for (j = 0; j < * poblacion[0].cuerpo.length; j++){ * System.out.print((poblacion[i].cuerpo[j] ? 1 : 0)); } * System.out.println(" Calidad: " + poblacion[i].calidad); } */ best = poblacion[0].cuerpo.clone(); bestOutputs = poblacion[0].prediction.clone(); System.out.println( "QstatEUSCHC " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); OutputIS.escribeSalida( ficheroSalida[0], conjR, conjN, conjM, clasesS, entradas, salida, nEntradas, relation); // OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, // nEntradas, relation); }
public void ejecutar() { int i, j, l, m, o; int nClases; int claseObt; boolean marcas[]; double conjS[][]; int clasesS[]; int eleS[], eleT[]; int bestAc, aciertos; int temp[]; int pos, tmp; long tiempo = System.currentTimeMillis(); /*Getting the number of different classes*/ nClases = 0; for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i]; nClases++; /*Inicialization of the flagged instance vector of the S set*/ marcas = new boolean[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) marcas[i] = false; /*Allocate memory for the random selection*/ m = (int) ((porcentaje * datosTrain.length) / 100.0); eleS = new int[m]; eleT = new int[datosTrain.length - m]; temp = new int[datosTrain.length]; for (i = 0; i < datosTrain.length; i++) temp[i] = i; /** Random distribution of elements in each set */ Randomize.setSeed(semilla); for (i = 0; i < eleS.length; i++) { pos = Randomize.Randint(i, datosTrain.length - 1); tmp = temp[i]; temp[i] = temp[pos]; temp[pos] = tmp; eleS[i] = temp[i]; } for (i = 0; i < eleT.length; i++) { pos = Randomize.Randint(m + i, datosTrain.length - 1); tmp = temp[m + i]; temp[m + i] = temp[pos]; temp[pos] = tmp; eleT[i] = temp[m + i]; } for (i = 0; i < eleS.length; i++) marcas[eleS[i]] = true; /*Building of the S set from the flags*/ conjS = new double[m][datosTrain[0].length]; clasesS = new int[m]; for (o = 0, l = 0; o < datosTrain.length; o++) { if (marcas[o]) { // the instance will be evaluated for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[o][j]; } clasesS[l] = clasesTrain[o]; l++; } } /*Evaluation of the S set*/ bestAc = 0; for (i = 0; i < datosTrain.length; i++) { claseObt = KNN.evaluacionKNN2(k, conjS, clasesS, datosTrain[i], nClases); if (claseObt == clasesTrain[i]) // correct clasification bestAc++; } /*Body of the ENNRS algorithm. Change the S set in each iteration for instances of the T set until get a complete sustitution*/ for (i = 0; i < n; i++) { /*Preparation the set to interchange*/ for (j = 0; j < eleS.length; j++) { pos = Randomize.Randint(j, eleT.length - 1); tmp = eleT[j]; eleT[j] = eleT[pos]; eleT[pos] = tmp; } /*Interchange of instances*/ for (j = 0; j < eleS.length; j++) { tmp = eleS[j]; eleS[j] = eleT[j]; eleT[j] = tmp; marcas[eleS[j]] = true; marcas[eleT[j]] = false; } /*Building of the S set from the flags*/ for (o = 0, l = 0; o < datosTrain.length; o++) { if (marcas[o]) { // the instance will evaluate for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[o][j]; } clasesS[l] = clasesTrain[o]; l++; } } /*Evaluation of the S set*/ aciertos = 0; for (j = 0; j < datosTrain.length; j++) { claseObt = KNN.evaluacionKNN2(k, conjS, clasesS, datosTrain[j], nClases); if (claseObt == clasesTrain[j]) // correct clasification aciertos++; } if (aciertos > bestAc) { // keep S bestAc = aciertos; } else { // undo changes for (j = 0; j < eleS.length; j++) { tmp = eleS[j]; eleS[j] = eleT[j]; eleT[j] = tmp; marcas[eleS[j]] = true; marcas[eleT[j]] = false; } } } /*Building of the S set from the flags*/ /*Building of the S set from the flags*/ for (o = 0, l = 0; o < datosTrain.length; o++) { if (marcas[o]) { // the instance will evaluate for (j = 0; j < datosTrain[0].length; j++) { conjS[l][j] = datosTrain[o][j]; } clasesS[l] = clasesTrain[o]; l++; } } System.out.println( "ENNRS " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s"); // COn conjS me vale. int trainRealClass[][]; int trainPrediction[][]; trainRealClass = new int[datosTrain.length][1]; trainPrediction = new int[datosTrain.length][1]; // Working on training for (i = 0; i < datosTrain.length; i++) { trainRealClass[i][0] = clasesTrain[i]; trainPrediction[i][0] = KNN.evaluate(datosTrain[i], conjS, nClases, clasesS, this.k); } KNN.writeOutput(ficheroSalida[0], trainRealClass, trainPrediction, entradas, salida, relation); // Working on test int realClass[][] = new int[datosTest.length][1]; int prediction[][] = new int[datosTest.length][1]; // Check time for (i = 0; i < realClass.length; i++) { realClass[i][0] = clasesTest[i]; prediction[i][0] = KNN.evaluate(datosTest[i], conjS, nClases, clasesS, this.k); } KNN.writeOutput(ficheroSalida[1], realClass, prediction, entradas, salida, relation); }
/** Process the training and test files provided in the parameters file to the constructor. */ public void process() { double[] outputs; double[] outputs2; try { FileWriter file_write = new FileWriter(output_train_name); try { // Load in memory a dataset that contains a classification problem IS.readSet(input_train_name, true); int in = 0; int out = 0; int in2 = 0; int out2 = 0; int lastMissing = -1; boolean fin = false; boolean stepNext = false; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); String[] row = null; X = new Vector[ndatos]; // matrix with transformed data for (int i = 0; i < ndatos; i++) X[i] = new Vector(); timesSeen = new FreqList[nvariables]; mostCommon = new String[nvariables]; file_write.write(IS.getHeader()); // now, print the normalized data file_write.write("@data\n"); // now, search for missed data, and replace them with // the most common value for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); in = 0; out = 0; row = new String[nvariables]; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.existsAnyMissingValue()) { row[j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.existsAnyMissingValue()) row[j] = inst.getInputNominalValues(in); else { // missing data outputs = inst.getAllOutputValues(); in2 = 0; out2 = 0; for (int attr = 0; attr < nvariables; attr++) { Attribute b = Attributes.getAttribute(attr); direccion = b.getDirectionAttribute(); tipo = b.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in2)) { row[attr] = new String(String.valueOf(inst.getInputRealValues(in2))); } else { if (!inst.getInputMissingValues(in2)) row[attr] = inst.getInputNominalValues(in2); } in2++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out2)) { row[attr] = new String(String.valueOf(inst.getOutputRealValues(out2))); } else { if (!inst.getOutputMissingValues(out2)) row[attr] = inst.getOutputNominalValues(out2); } out2++; } } } // make frecuencies for each attribute for (int attr = 0; attr < nvariables; attr++) { Attribute b = Attributes.getAttribute(attr); direccion = b.getDirectionAttribute(); tipo = b.getType(); if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) { lastMissing = attr; timesSeen[attr] = new FreqList(); for (int m = 0; m < ndatos; m++) { Instance inst2 = IS.getInstance(m); outputs2 = inst2.getAllOutputValues(); boolean sameClass = true; // are they same concept instances?? for (int k = 0; k < nsalidas && sameClass; k++) if (outputs[k] != outputs2[k]) sameClass = false; if (sameClass) { if (tipo != Attribute.NOMINAL && !inst2.getInputMissingValues(attr)) { timesSeen[attr].AddElement( new String(String.valueOf(inst2.getInputRealValues(attr)))); } else { if (!inst2.getInputMissingValues(attr)) { timesSeen[attr].AddElement(inst2.getInputNominalValues(attr)); } } } } } } for (int attr = 0; attr < nvariables; attr++) { if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) { timesSeen[attr].reset(); } } fin = false; stepNext = false; while (!fin) { in2 = 0; for (int attr = 0; attr < nvariables && !fin; attr++) { Attribute b = Attributes.getAttribute(attr); direccion = b.getDirectionAttribute(); tipo = b.getType(); if (direccion == Attribute.INPUT && inst.getInputMissingValues(in2)) { if (stepNext) { timesSeen[attr].iterate(); stepNext = false; } if (timesSeen[attr].outOfBounds()) { stepNext = true; if (attr == lastMissing) fin = true; timesSeen[attr].reset(); } if (!fin) row[attr] = ((ValueFreq) timesSeen[attr].getCurrent()) .getValue(); // replace missing data } in2++; } if (!fin) { stepNext = true; file_write.write(row[0]); for (int y = 1; y < nvariables; y++) { file_write.write("," + row[y]); } file_write.write("\n"); // X[i].addElement(row); // row = (String[])row.clone(); } } } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { row[j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) row[j] = inst.getOutputNominalValues(out); else row[j] = new String("?"); } out++; } } } if (!inst.existsAnyMissingValue()) { file_write.write(row[0]); for (int y = 1; y < nvariables; y++) { file_write.write("," + row[y]); } file_write.write("\n"); } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } file_write.close(); } catch (IOException e) { System.out.println("IO exception = " + e); e.printStackTrace(); System.exit(-1); } /** ************************************************************************************ */ // does a test file associated exist? if (input_train_name.compareTo(input_test_name) != 0) { try { FileWriter file_write = new FileWriter(output_test_name); try { // Load in memory a dataset that contains a classification problem IS.readSet(input_test_name, false); int in = 0; int out = 0; int in2 = 0; int out2 = 0; int lastMissing = -1; boolean fin = false; boolean stepNext = false; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); String[] row = null; X = new Vector[ndatos]; // matrix with transformed data for (int i = 0; i < ndatos; i++) X[i] = new Vector(); timesSeen = new FreqList[nvariables]; mostCommon = new String[nvariables]; file_write.write(IS.getHeader()); // now, print the normalized data file_write.write("@data\n"); // now, search for missed data, and replace them with // the most common value for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); in = 0; out = 0; row = new String[nvariables]; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.existsAnyMissingValue()) { row[j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.existsAnyMissingValue()) row[j] = inst.getInputNominalValues(in); else { // missing data outputs = inst.getAllOutputValues(); in2 = 0; out2 = 0; for (int attr = 0; attr < nvariables; attr++) { Attribute b = Attributes.getAttribute(attr); direccion = b.getDirectionAttribute(); tipo = b.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in2)) { row[attr] = new String(String.valueOf(inst.getInputRealValues(in2))); } else { if (!inst.getInputMissingValues(in2)) row[attr] = inst.getInputNominalValues(in2); } in2++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out2)) { row[attr] = new String(String.valueOf(inst.getOutputRealValues(out2))); } else { if (!inst.getOutputMissingValues(out2)) row[attr] = inst.getOutputNominalValues(out2); } out2++; } } } // make frecuencies for each attribute for (int attr = 0; attr < nvariables; attr++) { Attribute b = Attributes.getAttribute(attr); direccion = b.getDirectionAttribute(); tipo = b.getType(); if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) { lastMissing = attr; timesSeen[attr] = new FreqList(); for (int m = 0; m < ndatos; m++) { Instance inst2 = IS.getInstance(m); outputs2 = inst2.getAllOutputValues(); boolean sameClass = true; // are they same concept instances?? for (int k = 0; k < nsalidas && sameClass; k++) if (outputs[k] != outputs2[k]) sameClass = false; if (sameClass) { if (tipo != Attribute.NOMINAL && !inst2.getInputMissingValues(attr)) { timesSeen[attr].AddElement( new String(String.valueOf(inst2.getInputRealValues(attr)))); } else { if (!inst2.getInputMissingValues(attr)) { timesSeen[attr].AddElement(inst2.getInputNominalValues(attr)); } } } } } } for (int attr = 0; attr < nvariables; attr++) { if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) { timesSeen[attr].reset(); } } fin = false; stepNext = false; while (!fin) { in2 = 0; for (int attr = 0; attr < nvariables && !fin; attr++) { Attribute b = Attributes.getAttribute(attr); direccion = b.getDirectionAttribute(); tipo = b.getType(); if (direccion == Attribute.INPUT && inst.getInputMissingValues(in2)) { if (stepNext) { timesSeen[attr].iterate(); stepNext = false; } if (timesSeen[attr].outOfBounds()) { stepNext = true; if (attr == lastMissing) fin = true; timesSeen[attr].reset(); } if (!fin) row[attr] = ((ValueFreq) timesSeen[attr].getCurrent()) .getValue(); // replace missing data } in2++; } if (!fin) { stepNext = true; file_write.write(row[0]); for (int y = 1; y < nvariables; y++) { file_write.write("," + row[y]); } file_write.write("\n"); // X[i].addElement(row); // row = (String[])row.clone(); } } } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { row[j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) row[j] = inst.getOutputNominalValues(out); else row[j] = new String("?"); } out++; } } } if (!inst.existsAnyMissingValue()) { file_write.write(row[0]); for (int y = 1; y < nvariables; y++) { file_write.write("," + row[y]); } file_write.write("\n"); } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } file_write.close(); } catch (IOException e) { System.out.println("IO exception = " + e); e.printStackTrace(); System.exit(-1); } } }
/** Process the training and test files provided in the parameters file to the constructor. */ public void process() { // declarations double[] outputs; double[] outputs2; Instance neighbor; double dist, mean; int actual; Randomize rnd = new Randomize(); Instance ex; gCenter kmeans = null; int iterations = 0; double E; double prevE; int totalMissing = 0; boolean allMissing = true; rnd.setSeed(semilla); // PROCESS try { // Load in memory a dataset that contains a classification problem IS.readSet(input_train_name, true); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][nvariables]; // matrix with transformed data kmeans = new gCenter(K, ndatos, nvariables); timesSeen = new FreqList[nvariables]; mostCommon = new String[nvariables]; // first, we choose k 'means' randomly from all // instances totalMissing = 0; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); if (inst.existsAnyMissingValue()) totalMissing++; } if (totalMissing == ndatos) allMissing = true; else allMissing = false; for (int numMeans = 0; numMeans < K; numMeans++) { do { actual = (int) (ndatos * rnd.Rand()); ex = IS.getInstance(actual); } while (ex.existsAnyMissingValue() && !allMissing); kmeans.copyCenter(ex, numMeans); } // now, iterate adjusting clusters' centers and // instances to them prevE = 0; iterations = 0; do { for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); kmeans.setClusterOf(inst, i); } // set new centers kmeans.recalculateCenters(IS); // compute RMSE E = 0; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); E += kmeans.distance(inst, kmeans.getClusterOf(i)); } iterations++; // System.out.println(iterations+"\t"+E); if (Math.abs(prevE - E) == 0) iterations = maxIter; else prevE = E; } while (E > minError && iterations < maxIter); for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); in = 0; out = 0; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) { X[i][j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } out++; } } } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_train_name); /** ************************************************************************************ */ // does a test file associated exist? if (input_train_name.compareTo(input_test_name) != 0) { try { // Load in memory a dataset that contains a classification problem IStest.readSet(input_test_name, false); int in = 0; int out = 0; ndatos = IStest.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); for (int i = 0; i < ndatos; i++) { Instance inst = IStest.getInstance(i); in = 0; out = 0; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) { X[i][j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } out++; } } } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_test_name); } }
/** Process the training and test files provided in the parameters file to the constructor. */ public void process() { try { // Load in memory a dataset that contains a classification problem IS.readSet(input_train_name, true); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][nvariables]; // matrix with transformed data boolean[] isMissed = new boolean[ndatos]; // vector which points out instances with missed data try { FileWriter file_write = new FileWriter(output_train_name); file_write.write(IS.getHeader()); // now, print the normalized data file_write.write("@data\n"); // file_write.close(); PrintWriter pw = new PrintWriter(file_write); for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); if (!inst.existsAnyMissingValue()) { inst.printAsOriginal(pw); // file_write.write(inst.toString()); // DOES NOT WRITE BACK NON-DEF DIRECTION // ATTRIBUTES!!!! file_write.write("\n"); } } pw.close(); file_write.close(); } catch (IOException e) { System.out.println("IO exception = " + e); System.exit(-1); } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } // does a test file associated exist? if (input_train_name.compareTo(input_test_name) != 0) { try { // Load in memory a dataset that contains a classification problem IS.readSet(input_test_name, false); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][nvariables]; // matrix with transformed data boolean[] isMissed = new boolean[ndatos]; // vector which points out instances with missed data try { FileWriter file_write = new FileWriter(output_test_name); file_write.write(IS.getHeader()); // now, print the normalized data file_write.write("@data\n"); PrintWriter pw = new PrintWriter(file_write); for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); if (!inst.existsAnyMissingValue()) { inst.printAsOriginal(pw); file_write.write("\n"); } } pw.close(); file_write.close(); } catch (IOException e) { System.out.println("IO exception = " + e); System.exit(-1); } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } } // write_results(); / since there ins't any data transformation, is not needed }