private void normalizarTest() { int i, j, cont = 0, k; Instance temp; boolean hecho; double caja[]; StringTokenizer tokens; boolean nulls[]; /* Check if dataset corresponding with a classification problem */ if (Attributes.getOutputNumAttributes() < 1) { System.err.println( "This dataset haven´t outputs, so it not corresponding to a classification problem."); System.exit(-1); } else if (Attributes.getOutputNumAttributes() > 1) { System.err.println("This dataset have more of one output."); System.exit(-1); } if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) { System.err.println( "This dataset have an input attribute with floating values, so it not corresponding to a classification problem."); System.exit(-1); } datosTest = new double[test.getNumInstances()][Attributes.getInputNumAttributes()]; clasesTest = new int[test.getNumInstances()]; caja = new double[1]; for (i = 0; i < test.getNumInstances(); i++) { temp = test.getInstance(i); nulls = temp.getInputMissingValues(); datosTest[i] = test.getInstance(i).getAllInputValues(); for (j = 0; j < nulls.length; j++) if (nulls[j]) datosTest[i][j] = 0.0; caja = test.getInstance(i).getAllOutputValues(); clasesTest[i] = (int) caja[0]; for (k = 0; k < datosTest[i].length; k++) { if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) { datosTest[i][k] /= Attributes.getInputAttribute(k).getNominalValuesList().size() - 1; } else { datosTest[i][k] -= Attributes.getInputAttribute(k).getMinAttribute(); datosTest[i][k] /= Attributes.getInputAttribute(k).getMaxAttribute() - Attributes.getInputAttribute(k).getMinAttribute(); } } } }
private void generateModel() { String salida = new String(""); double max_auc = 0; ArrayList<String> solutions = this.getAllSolutions(); models = new ArrayList<Farchd>(); int nEjemplos = train.getnData(); if (this.instances == this.MAJ) { nEjemplos = train.getMajority(); } boolean[] variables = new boolean[train.getnInputs()]; boolean[] ejemplos = new boolean[nEjemplos]; this.weightsAUC = new double[solutions.size() / 2]; // Hay 2 soluciones FS e IS for (int i = 0, j = 0; i < solutions.size(); i += 2, j++) { int vars, ejs; vars = ejs = 0; variables = decode(solutions.get(i)); ejemplos = decode(solutions.get(i + 1)); for (int l = 0; l < variables.length; l++) { // variables[j] = solution[j]; if (variables[l]) vars++; } for (int l = 0; l < ejemplos.length; l++) { if (ejemplos[l]) ejs++; } try { Farchd model = new Farchd(train, val, test, variables, ejemplos); /** ******** */ // double fit = model.getAUCTr(); double auc_tr = model.execute(true); double auc_tst = model.getAUCTst(); if (auc_tr > max_auc) { max_auc = auc_tr; indexBest = j; } this.weightsAUC[j] = auc_tr; salida += "Solution[" + j + "]:\t" + vars + "\t" + ejs + "\t" + auc_tr + "\t" + auc_tst + "\n"; /** ******** */ models.add(model); } catch (Exception e) { System.err.println("Liada maxima al generar modelo "); e.printStackTrace(System.err); System.exit(-1); } } System.out.print(salida); Files.writeFile(header + "_AUC.txt", salida); }
/** * Main method for ABB, that explores the search space by pruning nodes and checking their * inconsistency ratio. */ private void runABB() { boolean[] root = startSolution(); System.arraycopy(root, 0, features, 0, root.length); abb(root); /* checks if a subset satisfies the condition (more than 0 selected features) */ if (features == null) { System.err.println("ERROR: It couldn't be possible to find any solution."); System.exit(0); } }
public LVQ(String ficheroScript) { super(ficheroScript); try { referencia = new InstanceSet(); referencia.readSet(ficheroReferencia, false); /*Normalize the data*/ normalizarReferencia(); } catch (Exception e) { System.err.println(e); System.exit(1); } }
/** * Constructor of the Class Parametros * * @param nombreFileParametros is the pathname of input parameter file */ Parametros(String nombreFileParametros) { try { int i; String fichero, linea, tok; StringTokenizer lineasFile, tokens; /* read the parameter file using Files class */ fichero = Files.readFile(nombreFileParametros); fichero += "\n"; /* remove all \r characters. it is neccesary for a correst use in Windows and UNIX */ fichero = fichero.replace('\r', ' '); /* extracts the differents tokens of the file */ lineasFile = new StringTokenizer(fichero, "\n"); i = 0; while (lineasFile.hasMoreTokens()) { linea = lineasFile.nextToken(); i++; tokens = new StringTokenizer(linea, " ,\t"); if (tokens.hasMoreTokens()) { tok = tokens.nextToken(); if (tok.equalsIgnoreCase("algorithm")) nameAlgorithm = getParamString(tokens); else if (tok.equalsIgnoreCase("inputdata")) getInputFiles(tokens); else if (tok.equalsIgnoreCase("outputdata")) getOutputFiles(tokens); else if (tok.equalsIgnoreCase("seed")) seed = getParamLong(tokens); else throw new java.io.IOException("Syntax error on line " + i + ": [" + tok + "]\n"); } } } catch (java.io.FileNotFoundException e) { System.err.println(e + "Parameter file"); } catch (java.io.IOException e) { System.err.println(e + "Aborting program"); System.exit(-1); } /** show the read parameter in the standard output */ String contents = "-- Parameters echo --- \n"; contents += "Algorithm name: " + nameAlgorithm + "\n"; contents += "Input Train File: " + trainFileNameInput + "\n"; contents += "Input Test File: " + testFileNameInput + "\n"; contents += "Output Train File: " + trainFileNameOutput + "\n"; contents += "Output Test File: " + testFileNameOutput + "\n"; System.out.println(contents); }
// Write data matrix X to disk, in KEEL format private void write_results(String output) { // File OutputFile = new File(output_train_name.substring(1, output_train_name.length()-1)); try { FileWriter file_write = new FileWriter(output); file_write.write(IS.getHeader()); // now, print the normalized data file_write.write("@data\n"); for (int i = 0; i < ndatos; i++) { file_write.write(X[i][0]); for (int j = 1; j < nvariables; j++) { file_write.write("," + X[i][j]); } file_write.write("\n"); } file_write.close(); } catch (IOException e) { System.out.println("IO exception = " + e); System.exit(-1); } }
/** Process the training and test files provided in the parameters file to the constructor. */ public void process() { // declarations double[] outputs; double[] outputs2; Instance neighbor; double dist, mean; int actual; Randomize rnd = new Randomize(); Instance ex; gCenter kmeans = null; int iterations = 0; double E; double prevE; int totalMissing = 0; boolean allMissing = true; rnd.setSeed(semilla); // PROCESS try { // Load in memory a dataset that contains a classification problem IS.readSet(input_train_name, true); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][nvariables]; // matrix with transformed data kmeans = new gCenter(K, ndatos, nvariables); timesSeen = new FreqList[nvariables]; mostCommon = new String[nvariables]; // first, we choose k 'means' randomly from all // instances totalMissing = 0; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); if (inst.existsAnyMissingValue()) totalMissing++; } if (totalMissing == ndatos) allMissing = true; else allMissing = false; for (int numMeans = 0; numMeans < K; numMeans++) { do { actual = (int) (ndatos * rnd.Rand()); ex = IS.getInstance(actual); } while (ex.existsAnyMissingValue() && !allMissing); kmeans.copyCenter(ex, numMeans); } // now, iterate adjusting clusters' centers and // instances to them prevE = 0; iterations = 0; do { for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); kmeans.setClusterOf(inst, i); } // set new centers kmeans.recalculateCenters(IS); // compute RMSE E = 0; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); E += kmeans.distance(inst, kmeans.getClusterOf(i)); } iterations++; // System.out.println(iterations+"\t"+E); if (Math.abs(prevE - E) == 0) iterations = maxIter; else prevE = E; } while (E > minError && iterations < maxIter); for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); in = 0; out = 0; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) { X[i][j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } out++; } } } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_train_name); /** ************************************************************************************ */ // does a test file associated exist? if (input_train_name.compareTo(input_test_name) != 0) { try { // Load in memory a dataset that contains a classification problem IStest.readSet(input_test_name, false); int in = 0; int out = 0; ndatos = IStest.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); for (int i = 0; i < ndatos; i++) { Instance inst = IStest.getInstance(i); in = 0; out = 0; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) { X[i][j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } out++; } } } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_test_name); } }
// Read the pattern file, and parse data into strings private void config_read(String fileParam) { File inputFile = new File(fileParam); if (inputFile == null || !inputFile.exists()) { System.out.println("parameter " + fileParam + " file doesn't exists!"); System.exit(-1); } // begin the configuration read from file try { FileReader file_reader = new FileReader(inputFile); BufferedReader buf_reader = new BufferedReader(file_reader); // FileWriter file_write = new FileWriter(outputFile); String line; do { line = buf_reader.readLine(); } while (line.length() == 0); // avoid empty lines for processing -> produce exec failure String out[] = line.split("algorithm = "); // alg_name = new String(out[1]); //catch the algorithm name // input & output filenames do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("inputData = "); out = out[1].split("\\s\""); input_train_name = new String(out[0].substring(1, out[0].length() - 1)); input_test_name = new String(out[1].substring(0, out[1].length() - 1)); if (input_test_name.charAt(input_test_name.length() - 1) == '"') input_test_name = input_test_name.substring(0, input_test_name.length() - 1); do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("outputData = "); out = out[1].split("\\s\""); output_train_name = new String(out[0].substring(1, out[0].length() - 1)); output_test_name = new String(out[1].substring(0, out[1].length() - 1)); if (output_test_name.charAt(output_test_name.length() - 1) == '"') output_test_name = output_test_name.substring(0, output_test_name.length() - 1); // parameters do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("seed = "); semilla = (new Long(out[1])).longValue(); // parse the string into a integer do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("k = "); K = (new Integer(out[1])).intValue(); // parse the string into a integer do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("error = "); minError = (new Double(out[1])).doubleValue(); // parse the string into a double do { line = buf_reader.readLine(); } while (line.length() == 0); out = line.split("iterations = "); maxIter = (new Integer(out[1])).intValue(); // parse the string into a double file_reader.close(); } catch (IOException e) { System.out.println("IO exception = " + e); e.printStackTrace(); System.exit(-1); } }