private void generateModel() { String salida = new String(""); double max_auc = 0; ArrayList<String> solutions = this.getAllSolutions(); models = new ArrayList<Farchd>(); int nEjemplos = train.getnData(); if (this.instances == this.MAJ) { nEjemplos = train.getMajority(); } boolean[] variables = new boolean[train.getnInputs()]; boolean[] ejemplos = new boolean[nEjemplos]; this.weightsAUC = new double[solutions.size() / 2]; // Hay 2 soluciones FS e IS for (int i = 0, j = 0; i < solutions.size(); i += 2, j++) { int vars, ejs; vars = ejs = 0; variables = decode(solutions.get(i)); ejemplos = decode(solutions.get(i + 1)); for (int l = 0; l < variables.length; l++) { // variables[j] = solution[j]; if (variables[l]) vars++; } for (int l = 0; l < ejemplos.length; l++) { if (ejemplos[l]) ejs++; } try { Farchd model = new Farchd(train, val, test, variables, ejemplos); /** ******** */ // double fit = model.getAUCTr(); double auc_tr = model.execute(true); double auc_tst = model.getAUCTst(); if (auc_tr > max_auc) { max_auc = auc_tr; indexBest = j; } this.weightsAUC[j] = auc_tr; salida += "Solution[" + j + "]:\t" + vars + "\t" + ejs + "\t" + auc_tr + "\t" + auc_tst + "\n"; /** ******** */ models.add(model); } catch (Exception e) { System.err.println("Liada maxima al generar modelo "); e.printStackTrace(System.err); System.exit(-1); } } System.out.print(salida); Files.writeFile(header + "_AUC.txt", salida); }
/** * Process a dataset file for a clustering problem. * * @param nfexamples Name of the dataset file * @param train The dataset file is for training or for test * @throws java.io.IOException if there is any semantical, lexical or sintactical error in the * input file. */ public void processClusterDataset(String nfexamples, boolean train) throws IOException { try { // Load in memory a dataset that contains a classification problem IS.readSet(nfexamples, train); nData = IS.getNumInstances(); nInputs = Attributes.getInputNumAttributes(); nVariables = nInputs + Attributes.getOutputNumAttributes(); if (Attributes.getOutputNumAttributes() != 0) { System.out.println("This algorithm can not process datasets with outputs"); System.out.println("All outputs will be removed"); } // Initialize and fill our own tables X = new double[nData][nInputs]; missing = new boolean[nData][nInputs]; // Maximum and minimum of inputs iMaximum = new double[nInputs]; iMinimum = new double[nInputs]; // Maximum and minimum for output data oMaximum = 0; oMinimum = 0; // All values are casted into double/integer nClasses = 0; for (int i = 0; i < X.length; i++) { Instance inst = IS.getInstance(i); for (int j = 0; j < nInputs; j++) { X[i][j] = IS.getInputNumericValue(i, j); missing[i][j] = inst.getInputMissingValues(j); if (X[i][j] > iMaximum[j] || i == 0) { iMaximum[j] = X[i][j]; } if (X[i][j] < iMinimum[j] || i == 0) { iMinimum[j] = X[i][j]; } } } } catch (Exception e) { System.out.println("DBG: Exception in readSet"); e.printStackTrace(); } }
/** It launches the algorithm */ public void execute() { if (this.somethingWrong) { // We do not execute the program System.err.println("An error was found, either the data-set has missing values."); System.err.println( "Please remove the examples with missing data or apply a MV preprocessing."); System.err.println("Aborting the program"); // We should not use the statement: System.exit(-1); } else { // We do here the algorithm's operations int nClasses = train.getnClasses(); aprioriClassDistribution = new double[nClasses]; for (int i = 0; i < nClasses; i++) { aprioriClassDistribution[i] = 1.0 * val.numberInstances(i) / val.size(); } if (model) { // el modelo no esta generado en fichero previamente NSGA2 search = new NSGA2( train, seed, populationSize, maxTrials, crossover, mutation, instances, fitness); try { search.execute(); } catch (Exception e) { e.printStackTrace(System.err); } } // Finally we should fill the training and test output files this.generateModel(); double avgTr = this.doOutput(val, this.outputTr, false); double aucTr = getAUC(val); double avgTst = this.doOutput(test, this.outputTst, true); double aucTst = getAUC(test); System.out.print("AUC Train: " + aucTr); System.out.println("; AvgAcc Train: " + avgTr); System.out.print("AUC Test: " + aucTst); System.out.println("; AvgAcc Test: " + avgTst); totalTime = System.currentTimeMillis() - startTime; System.out.println("Algorithm Finished: " + totalTime); } }
/** * This private method extract the dataset and the method's parameters from the KEEL environment, * calculates the centroids using the KMeans class and print out the results with the validation * dataset. * * @param tty unused boolean parameter, kept for compatibility * @param pc ProcessConfig object to obtain the train and test datasets and the method's * parameters. */ private void clustering_kmeans(boolean tty, ProcessConfig pc) { try { String linea; ProcessDataset pd = new ProcessDataset(); linea = (String) pc.parInputData.get(ProcessConfig.IndexTrain); if (pc.parNewFormat) pd.processClusterDataset(linea, true); else pd.procesa_clustering_old(linea); int ndatos = pd.getNdata(); // Number of examples int nvariables = pd.getNvariables(); // Number of variables int nentradas = pd.getNinputs(); // Number of inputs pd.showDatasetStatistics(); System.out.println("Number of examples=" + ndatos); System.out.println("Number of inputs=" + nentradas); double[][] X = pd.getX(); // Input data double[] emaximo = pd.getImaximum(); // Maximum and Minimum for input data double[] eminimo = pd.getIminimum(); int[] neparticion = new int[nentradas]; int s; s = pc.parNClusters; KMeans KM = new KMeans(X, s, rand); double fallos = 0; try { for (int i = 0; i < X.length; i++) { int clase = KM.nearestCentroid(X[i]); // System.out.println("pattern="+i+" cluster="+clase); } } catch (Exception e) { System.out.println(e.toString()); } // Clusters in the test set ProcessDataset pdt = new ProcessDataset(); int nprueba, npentradas, npvariables; linea = (String) pc.parInputData.get(ProcessConfig.IndexTestKMeans); if (pc.parNewFormat) pdt.processClusterDataset(linea, false); else pdt.procesa_clustering_old(linea); nprueba = pdt.getNdata(); npvariables = pdt.getNvariables(); npentradas = pdt.getNinputs(); pdt.showDatasetStatistics(); if (npentradas != nentradas) throw new IOException("Error in test file"); double[][] Xp = pdt.getX(); int[] Co = new int[Xp.length]; // Test set is classified try { for (int i = 0; i < Xp.length; i++) { Co[i] = KM.nearestCentroid(Xp[i]); // System.out.println("pattern test="+i+" cluster="+Co[i]); } } catch (Exception e) { System.out.println(e.toString()); } // Output format for clustering algorithms pc.results(Xp, Co); KM.print(); } catch (FileNotFoundException e) { System.err.println(e + " Training data not found"); } catch (IOException e) { System.err.println(e + " Read error"); } }
/** * Process a dataset file for a classification problem. * * @param nfejemplos Name of the dataset file * @param train The dataset file is for training or for test * @throws java.io.IOException if there is any semantical, lexical or sintactical error in the * input file. */ public void processClassifierDataset(String nfejemplos, boolean train) throws IOException { try { // Load in memory a dataset that contains a classification problem IS.readSet(nfejemplos, train); nData = IS.getNumInstances(); nInputs = Attributes.getInputNumAttributes(); nVariables = nInputs + Attributes.getOutputNumAttributes(); // Check that there is only one output variable and // it is nominal if (Attributes.getOutputNumAttributes() > 1) { System.out.println("This algorithm can not process MIMO datasets"); System.out.println("All outputs but the first one will be removed"); } boolean noOutputs = false; if (Attributes.getOutputNumAttributes() < 1) { System.out.println("This algorithm can not process datasets without outputs"); System.out.println("Zero-valued output generated"); noOutputs = true; } // Initialize and fill our own tables X = new double[nData][nInputs]; missing = new boolean[nData][nInputs]; C = new int[nData]; // Maximum and minimum of inputs iMaximum = new double[nInputs]; iMinimum = new double[nInputs]; // Maximum and minimum for output data oMaximum = 0; oMinimum = 0; // All values are casted into double/integer nClasses = 0; for (int i = 0; i < X.length; i++) { Instance inst = IS.getInstance(i); for (int j = 0; j < nInputs; j++) { X[i][j] = IS.getInputNumericValue(i, j); missing[i][j] = inst.getInputMissingValues(j); if (X[i][j] > iMaximum[j] || i == 0) { iMaximum[j] = X[i][j]; } if (X[i][j] < iMinimum[j] || i == 0) { iMinimum[j] = X[i][j]; } } if (noOutputs) { C[i] = 0; } else { C[i] = (int) IS.getOutputNumericValue(i, 0); } if (C[i] > nClasses) { nClasses = C[i]; } } nClasses++; System.out.println("Number of classes=" + nClasses); } catch (Exception e) { System.out.println("DBG: Exception in readSet"); e.printStackTrace(); } }
/** * Main Function * * @param args the Command line arguments. Only one is processed: the name of the file containing * the * <p>parameters */ public static void main(String[] args) throws IOException { double[][] X; double[][] Y; int nInpt, nOutpl, ndata, i, j; Rbfn net; try { // Help required if (args.length > 0) { if (args[0].equals("--help") || args[0].equals("-help") || args[0].equals("-h") || args[0].equals("-?")) { doHelp(); return; } } System.out.println("- Executing doRbfnDec " + args.length); // Reading parameters String paramFile = (args.length > 0) ? args[0] : "parameters.txt"; setParameters(paramFile); System.out.println(" - Parameters file: " + paramFile); // Random generator setup if (reallySeed) { Randomize.setSeed((long) seed); } // Reading Training dataset ProcDataset Dtrn = new ProcDataset(trnFile, true); // Training System.out.println("Modeling Dataset"); Dtrn.processModelDataset(); nInpt = Dtrn.getninputs(); nOutpl = 1; // PD.getnvariables()-nInpt; ndata = Dtrn.getndata(); Y = new double[ndata][1]; X = Dtrn.getX(); double[] auxY; auxY = Dtrn.getY(); for (i = 0; i < ndata; i++) Y[i][0] = auxY[i]; // Building and training the net net = new Rbfn(nNeuronsIni, X, ndata, nInpt, nOutpl); net.decremental(X, Y, ndata, percent, alfa); double[] obtained = new double[ndata]; net.testModeling(X, ndata, obtained); Dtrn.generateResultsModeling(outTrnFile, auxY, obtained); // TEST ProcDataset Dtst = new ProcDataset(tstFile, false); Dtst.processModelDataset(); nInpt = Dtst.getninputs(); nOutpl = 1; // PD.getnvariables()-nInpt; ndata = Dtst.getndata(); X = Dtst.getX(); auxY = Dtst.getY(); Y = new double[ndata][1]; for (i = 0; i < ndata; i++) Y[i][0] = auxY[i]; obtained = new double[ndata]; net.testModeling(X, ndata, obtained); Dtst.generateResultsModeling(outTstFile, auxY, obtained); RBFUtils.createOutputFile(trnFile, outRbfFile); net.printRbfn(outRbfFile); if (Dtrn.datasetType() == 2) System.out.println("This is not a clustering algorithm"); System.out.println( "- End of doRbfnDec. See results in output files named according to " + paramFile + " parameters file."); } catch (Exception e) { throw new InternalError(e.toString()); } }
/** Process the training and test files provided in the parameters file to the constructor. */ public void process() { // declarations double[] outputs; double[] outputs2; Instance neighbor; double dist, mean; int actual; Randomize rnd = new Randomize(); Instance ex; gCenter kmeans = null; int iterations = 0; double E; double prevE; int totalMissing = 0; boolean allMissing = true; rnd.setSeed(semilla); // PROCESS try { // Load in memory a dataset that contains a classification problem IS.readSet(input_train_name, true); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][nvariables]; // matrix with transformed data kmeans = new gCenter(K, ndatos, nvariables); timesSeen = new FreqList[nvariables]; mostCommon = new String[nvariables]; // first, we choose k 'means' randomly from all // instances totalMissing = 0; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); if (inst.existsAnyMissingValue()) totalMissing++; } if (totalMissing == ndatos) allMissing = true; else allMissing = false; for (int numMeans = 0; numMeans < K; numMeans++) { do { actual = (int) (ndatos * rnd.Rand()); ex = IS.getInstance(actual); } while (ex.existsAnyMissingValue() && !allMissing); kmeans.copyCenter(ex, numMeans); } // now, iterate adjusting clusters' centers and // instances to them prevE = 0; iterations = 0; do { for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); kmeans.setClusterOf(inst, i); } // set new centers kmeans.recalculateCenters(IS); // compute RMSE E = 0; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); E += kmeans.distance(inst, kmeans.getClusterOf(i)); } iterations++; // System.out.println(iterations+"\t"+E); if (Math.abs(prevE - E) == 0) iterations = maxIter; else prevE = E; } while (E > minError && iterations < maxIter); for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); in = 0; out = 0; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) { X[i][j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } out++; } } } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_train_name); /** ************************************************************************************ */ // does a test file associated exist? if (input_train_name.compareTo(input_test_name) != 0) { try { // Load in memory a dataset that contains a classification problem IStest.readSet(input_test_name, false); int in = 0; int out = 0; ndatos = IStest.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); for (int i = 0; i < ndatos; i++) { Instance inst = IStest.getInstance(i); in = 0; out = 0; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) { X[i][j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } out++; } } } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_test_name); } }