/** * Process a dataset for classification * * @return Nothing */ public void processClassifierDataset() throws IOException { try { ndata = IS.getNumInstances(); ninputs = Attributes.getInputNumAttributes(); nvariables = ninputs + Attributes.getOutputNumAttributes(); // Check that there is only one output variable and // it is nominal if (Attributes.getOutputNumAttributes() > 1) { System.out.println("This algorithm can not process MIMO datasets"); System.out.println("All outputs but the first one will be removed"); } boolean noOutputs = false; if (Attributes.getOutputNumAttributes() < 1) { System.out.println("This algorithm cannot process datasets without outputs"); System.out.println("Zero-valued output generated"); noOutputs = true; } // Initialice and fill our own tables X = new double[ndata][ninputs]; missing = new boolean[ndata][ninputs]; C = new int[ndata]; // Maximum and minimum of inputs imax = new double[ninputs]; imin = new double[ninputs]; // Maximum and minimum for output data omax = 0; omin = 0; // All values are casted into double/integer nclasses = 0; for (int i = 0; i < X.length; i++) { Instance inst = IS.getInstance(i); for (int j = 0; j < ninputs; j++) { X[i][j] = IS.getInputNumericValue(i, j); missing[i][j] = inst.getInputMissingValues(j); if (X[i][j] > imax[j] || i == 0) imax[j] = X[i][j]; if (X[i][j] < imin[j] || i == 0) imin[j] = X[i][j]; } if (noOutputs) C[i] = 0; else C[i] = (int) IS.getOutputNumericValue(i, 0); if (C[i] > nclasses) nclasses = C[i]; } nclasses++; System.out.println("Number of classes=" + nclasses); } catch (Exception e) { System.out.println("DBG: Exception in readSet"); e.printStackTrace(); } }
/** * Process a dataset file for a clustering problem. * * @param nfexamples Name of the dataset file * @param train The dataset file is for training or for test * @throws java.io.IOException if there is any semantical, lexical or sintactical error in the * input file. */ public void processClusterDataset(String nfexamples, boolean train) throws IOException { try { // Load in memory a dataset that contains a classification problem IS.readSet(nfexamples, train); nData = IS.getNumInstances(); nInputs = Attributes.getInputNumAttributes(); nVariables = nInputs + Attributes.getOutputNumAttributes(); if (Attributes.getOutputNumAttributes() != 0) { System.out.println("This algorithm can not process datasets with outputs"); System.out.println("All outputs will be removed"); } // Initialize and fill our own tables X = new double[nData][nInputs]; missing = new boolean[nData][nInputs]; // Maximum and minimum of inputs iMaximum = new double[nInputs]; iMinimum = new double[nInputs]; // Maximum and minimum for output data oMaximum = 0; oMinimum = 0; // All values are casted into double/integer nClasses = 0; for (int i = 0; i < X.length; i++) { Instance inst = IS.getInstance(i); for (int j = 0; j < nInputs; j++) { X[i][j] = IS.getInputNumericValue(i, j); missing[i][j] = inst.getInputMissingValues(j); if (X[i][j] > iMaximum[j] || i == 0) { iMaximum[j] = X[i][j]; } if (X[i][j] < iMinimum[j] || i == 0) { iMinimum[j] = X[i][j]; } } } } catch (Exception e) { System.out.println("DBG: Exception in readSet"); e.printStackTrace(); } }
/** * Process a dataset for modelling * * @return Nothing */ public void processModelDataset() throws IOException { try { // Load in memory a dataset that contains a classification problem // IS.readSet(nfejemplos,train); ndata = IS.getNumInstances(); ninputs = Attributes.getInputNumAttributes(); nvariables = ninputs + Attributes.getOutputNumAttributes(); if (Attributes.getOutputNumAttributes() > 1) { System.out.println("This algorithm can not process MIMO datasets"); System.out.println("All outputs but the first one will be removed"); } boolean noOutputs = false; if (Attributes.getOutputNumAttributes() < 1) { System.out.println("This algorithm can not process datasets without outputs"); System.out.println("Zero-valued output generated"); noOutputs = true; } // Initialice and fill our own tables X = new double[ndata][ninputs]; missing = new boolean[ndata][ninputs]; Y = new double[ndata]; // Maximum and minimum of inputs imax = new double[ninputs]; imin = new double[ninputs]; // Maximum and minimum for output data omax = 0; omin = 0; // All values are casted into double/integer nclasses = 0; for (int i = 0; i < X.length; i++) { Instance inst = IS.getInstance(i); for (int j = 0; j < ninputs; j++) { X[i][j] = IS.getInputNumericValue(i, j); missing[i][j] = inst.getInputMissingValues(j); if (X[i][j] > imax[j] || i == 0) imax[j] = X[i][j]; if (X[i][j] < imin[j] || i == 0) imin[j] = X[i][j]; } if (noOutputs) Y[i] = 0; else Y[i] = IS.getOutputNumericValue(i, 0); if (Y[i] > omax || i == 0) omax = Y[i]; if (Y[i] < omin || i == 0) omin = Y[i]; } } catch (Exception e) { System.out.println("DBG: Exception in readSet"); e.printStackTrace(); } }
/** * Returs the type of the dataset 0-Modelling, 1-Clasiffication, 2-Clustering * * @return type of the dataset 0-Modelling, 1-Clasiffication, 2-Clustering */ public int datasetType() { if (Attributes.getOutputNumAttributes() >= 1) { if (Attributes.hasNominalAttributes()) return (1); else return (0); } else return (2); }
/** Process the training and test files provided in the parameters file to the constructor. */ public void process() { double[] outputs; double[] outputs2; Instance neighbor; double dist, mean; int actual; int[] N = new int[nneigh]; double[] Ndist = new double[nneigh]; boolean allNull; svm_problem SVMp = null; svm_parameter SVMparam = new svm_parameter(); svm_model svr = null; svm_node SVMn[]; double[] outputsCandidate = null; boolean same = true; Vector instancesSelected = new Vector(); Vector instancesSelected2 = new Vector(); // SVM PARAMETERS SVMparam.C = C; SVMparam.cache_size = 10; // 10MB of cache SVMparam.degree = degree; SVMparam.eps = eps; SVMparam.gamma = gamma; SVMparam.nr_weight = 0; SVMparam.nu = nu; SVMparam.p = p; SVMparam.shrinking = shrinking; SVMparam.probability = 0; if (kernelType.compareTo("LINEAR") == 0) { SVMparam.kernel_type = svm_parameter.LINEAR; } else if (kernelType.compareTo("POLY") == 0) { SVMparam.kernel_type = svm_parameter.POLY; } else if (kernelType.compareTo("RBF") == 0) { SVMparam.kernel_type = svm_parameter.RBF; } else if (kernelType.compareTo("SIGMOID") == 0) { SVMparam.kernel_type = svm_parameter.SIGMOID; } SVMparam.svm_type = svm_parameter.EPSILON_SVR; try { // Load in memory a dataset that contains a classification problem IS.readSet(input_train_name, true); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][2]; // matrix with transformed data mostCommon = new String[nvariables]; SVMp = new svm_problem(); SVMp.l = ndatos; SVMp.y = new double[SVMp.l]; SVMp.x = new svm_node[SVMp.l][nentradas + 1]; for (int l = 0; l < SVMp.l; l++) { for (int n = 0; n < Attributes.getInputNumAttributes() + 1; n++) { SVMp.x[l][n] = new svm_node(); } } for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); SVMp.y[i] = inst.getAllOutputValues()[0]; for (int n = 0; n < Attributes.getInputNumAttributes(); n++) { SVMp.x[i][n].index = n; SVMp.x[i][n].value = inst.getAllInputValues()[n]; SVMp.y[i] = inst.getAllOutputValues()[0]; } // end of instance SVMp.x[i][nentradas].index = -1; } if (svm.svm_check_parameter(SVMp, SVMparam) != null) { System.out.println("SVM parameter error in training:"); System.out.println(svm.svm_check_parameter(SVMp, SVMparam)); System.exit(-1); } // train the SVM if (ndatos > 0) { svr = svm.svm_train(SVMp, SVMparam); } for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); X[i][0] = new String(String.valueOf(inst.getAllOutputValues()[0])); // the values used for regression SVMn = new svm_node[Attributes.getInputNumAttributes() + 1]; for (int n = 0; n < Attributes.getInputNumAttributes(); n++) { SVMn[n] = new svm_node(); SVMn[n].index = n; SVMn[n].value = inst.getAllInputValues()[n]; } SVMn[nentradas] = new svm_node(); SVMn[nentradas].index = -1; // pedict the class X[i][1] = new String(String.valueOf((svm.svm_predict(svr, SVMn)))); } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_train_name); /** ************************************************************************************ */ try { // Load in memory a dataset that contains a classification // problem IS.readSet(input_test_name, false); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][2]; // matrix with transformed data // data mostCommon = new String[nvariables]; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); X[i][0] = new String(String.valueOf(inst.getAllOutputValues()[0])); SVMn = new svm_node[Attributes.getInputNumAttributes() + 1]; for (int n = 0; n < Attributes.getInputNumAttributes(); n++) { SVMn[n] = new svm_node(); SVMn[n].index = n; SVMn[n].value = inst.getAllInputValues()[n]; } SVMn[nentradas] = new svm_node(); SVMn[nentradas].index = -1; // pedict the class X[i][1] = new String(String.valueOf(svm.svm_predict(svr, SVMn))); } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } System.out.println("escribiendo test"); write_results(output_test_name); }
/** * Process a dataset file for a classification problem. * * @param nfejemplos Name of the dataset file * @param train The dataset file is for training or for test * @throws java.io.IOException if there is any semantical, lexical or sintactical error in the * input file. */ public void processClassifierDataset(String nfejemplos, boolean train) throws IOException { try { // Load in memory a dataset that contains a classification problem IS.readSet(nfejemplos, train); nData = IS.getNumInstances(); nInputs = Attributes.getInputNumAttributes(); nVariables = nInputs + Attributes.getOutputNumAttributes(); // Check that there is only one output variable and // it is nominal if (Attributes.getOutputNumAttributes() > 1) { System.out.println("This algorithm can not process MIMO datasets"); System.out.println("All outputs but the first one will be removed"); } boolean noOutputs = false; if (Attributes.getOutputNumAttributes() < 1) { System.out.println("This algorithm can not process datasets without outputs"); System.out.println("Zero-valued output generated"); noOutputs = true; } // Initialize and fill our own tables X = new double[nData][nInputs]; missing = new boolean[nData][nInputs]; C = new int[nData]; // Maximum and minimum of inputs iMaximum = new double[nInputs]; iMinimum = new double[nInputs]; // Maximum and minimum for output data oMaximum = 0; oMinimum = 0; // All values are casted into double/integer nClasses = 0; for (int i = 0; i < X.length; i++) { Instance inst = IS.getInstance(i); for (int j = 0; j < nInputs; j++) { X[i][j] = IS.getInputNumericValue(i, j); missing[i][j] = inst.getInputMissingValues(j); if (X[i][j] > iMaximum[j] || i == 0) { iMaximum[j] = X[i][j]; } if (X[i][j] < iMinimum[j] || i == 0) { iMinimum[j] = X[i][j]; } } if (noOutputs) { C[i] = 0; } else { C[i] = (int) IS.getOutputNumericValue(i, 0); } if (C[i] > nClasses) { nClasses = C[i]; } } nClasses++; System.out.println("Number of classes=" + nClasses); } catch (Exception e) { System.out.println("DBG: Exception in readSet"); e.printStackTrace(); } }
/** * This method reads all the information in a DB and load it to memory. * * @param fileName is the database file name. * @param isTrain is a flag that indicate if the database is for a train or for a test. * @throws DatasetException if there is any semantical error in the input file. * @throws HeaderFormatException if there is any lexical or sintactical error in the header of the * input file */ public void readSet(String fileName, boolean isTrain) throws DatasetException, HeaderFormatException { String line; System.out.println("Opening the file: " + fileName + "."); // Parsing the header of the DB. errorLogger = new FormatErrorKeeper(); // Declaring an instance parser InstanceParser parser = new InstanceParser(fileName, isTrain); // Reading information in the header, i.e., @relation, @attribute, @inputs and @outputs parseHeader(parser, isTrain); System.out.println( " The number of output attributes is: " + Attributes.getOutputNumAttributes()); // The attributes statistics are init if we are in train mode. if (isTrain && Attributes.getOutputNumAttributes() == 1) { Attributes.initStatistics(); } // A temporal vector is used to store the instances read. System.out.println("\n\n > Reading the data "); Vector tempSet = new Vector(1000, 100000); while ((line = parser.getLine()) != null) { // System.out.println (" > Data line: " + line ); tempSet.addElement(new Instance(line, isTrain, tempSet.size())); } // The vector of instances is converted to an array of instances. int sizeInstance = tempSet.size(); System.out.println(" > Number of instances read: " + tempSet.size()); instanceSet = new Instance[sizeInstance]; for (int i = 0; i < sizeInstance; i++) { instanceSet[i] = (Instance) tempSet.elementAt(i); } // System.out.println("After converting all instances"); // System.out.println("The error logger has any error: "+errorLogger.getNumErrors()); if (errorLogger.getNumErrors() > 0) { System.out.println( "There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format."); for (int k = 0; k < errorLogger.getNumErrors(); k++) { errorLogger.getError(k).print(); } throw new DatasetException( "There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format", errorLogger.getAllErrors()); } System.out.println( "\n > Finishing the statistics: (isTrain)" + isTrain + ", (# out attributes)" + Attributes.getOutputNumAttributes()); // If being on a train dataset, the statistics are finished if (isTrain && Attributes.getOutputNumAttributes() == 1) { Attributes.finishStatistics(); } System.out.println(" >> File LOADED CORRECTLY!!"); } // end of InstanceSet constructor.
/** Process the training and test files provided in the parameters file to the constructor. */ public void process() { // declarations double[] outputs; double[] outputs2; Instance neighbor; double dist, mean; int actual; Randomize rnd = new Randomize(); Instance ex; gCenter kmeans = null; int iterations = 0; double E; double prevE; int totalMissing = 0; boolean allMissing = true; rnd.setSeed(semilla); // PROCESS try { // Load in memory a dataset that contains a classification problem IS.readSet(input_train_name, true); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][nvariables]; // matrix with transformed data kmeans = new gCenter(K, ndatos, nvariables); timesSeen = new FreqList[nvariables]; mostCommon = new String[nvariables]; // first, we choose k 'means' randomly from all // instances totalMissing = 0; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); if (inst.existsAnyMissingValue()) totalMissing++; } if (totalMissing == ndatos) allMissing = true; else allMissing = false; for (int numMeans = 0; numMeans < K; numMeans++) { do { actual = (int) (ndatos * rnd.Rand()); ex = IS.getInstance(actual); } while (ex.existsAnyMissingValue() && !allMissing); kmeans.copyCenter(ex, numMeans); } // now, iterate adjusting clusters' centers and // instances to them prevE = 0; iterations = 0; do { for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); kmeans.setClusterOf(inst, i); } // set new centers kmeans.recalculateCenters(IS); // compute RMSE E = 0; for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); E += kmeans.distance(inst, kmeans.getClusterOf(i)); } iterations++; // System.out.println(iterations+"\t"+E); if (Math.abs(prevE - E) == 0) iterations = maxIter; else prevE = E; } while (E > minError && iterations < maxIter); for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); in = 0; out = 0; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) { X[i][j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } out++; } } } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_train_name); /** ************************************************************************************ */ // does a test file associated exist? if (input_train_name.compareTo(input_test_name) != 0) { try { // Load in memory a dataset that contains a classification problem IStest.readSet(input_test_name, false); int in = 0; int out = 0; ndatos = IStest.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); for (int i = 0; i < ndatos; i++) { Instance inst = IStest.getInstance(i); in = 0; out = 0; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) { X[i][j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out); else { actual = kmeans.getClusterOf(i); X[i][j] = new String(kmeans.valueAt(actual, j)); } } out++; } } } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } write_results(output_test_name); } }
/** Process the training and test files provided in the parameters file to the constructor. */ public void process() { try { // Load in memory a dataset that contains a classification problem IS.readSet(input_train_name, true); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][nvariables]; // matrix with transformed data boolean[] isMissed = new boolean[ndatos]; // vector which points out instances with missed data try { FileWriter file_write = new FileWriter(output_train_name); file_write.write(IS.getHeader()); // now, print the normalized data file_write.write("@data\n"); // file_write.close(); PrintWriter pw = new PrintWriter(file_write); for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); if (!inst.existsAnyMissingValue()) { inst.printAsOriginal(pw); // file_write.write(inst.toString()); // DOES NOT WRITE BACK NON-DEF DIRECTION // ATTRIBUTES!!!! file_write.write("\n"); } } pw.close(); file_write.close(); } catch (IOException e) { System.out.println("IO exception = " + e); System.exit(-1); } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } // does a test file associated exist? if (input_train_name.compareTo(input_test_name) != 0) { try { // Load in memory a dataset that contains a classification problem IS.readSet(input_test_name, false); int in = 0; int out = 0; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); X = new String[ndatos][nvariables]; // matrix with transformed data boolean[] isMissed = new boolean[ndatos]; // vector which points out instances with missed data try { FileWriter file_write = new FileWriter(output_test_name); file_write.write(IS.getHeader()); // now, print the normalized data file_write.write("@data\n"); PrintWriter pw = new PrintWriter(file_write); for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); if (!inst.existsAnyMissingValue()) { inst.printAsOriginal(pw); file_write.write("\n"); } } pw.close(); file_write.close(); } catch (IOException e) { System.out.println("IO exception = " + e); System.exit(-1); } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } } // write_results(); / since there ins't any data transformation, is not needed }
/** Process the training and test files provided in the parameters file to the constructor. */ public void process() { double[] outputs; double[] outputs2; try { FileWriter file_write = new FileWriter(output_train_name); try { // Load in memory a dataset that contains a classification problem IS.readSet(input_train_name, true); int in = 0; int out = 0; int in2 = 0; int out2 = 0; int lastMissing = -1; boolean fin = false; boolean stepNext = false; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); String[] row = null; X = new Vector[ndatos]; // matrix with transformed data for (int i = 0; i < ndatos; i++) X[i] = new Vector(); timesSeen = new FreqList[nvariables]; mostCommon = new String[nvariables]; file_write.write(IS.getHeader()); // now, print the normalized data file_write.write("@data\n"); // now, search for missed data, and replace them with // the most common value for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); in = 0; out = 0; row = new String[nvariables]; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.existsAnyMissingValue()) { row[j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.existsAnyMissingValue()) row[j] = inst.getInputNominalValues(in); else { // missing data outputs = inst.getAllOutputValues(); in2 = 0; out2 = 0; for (int attr = 0; attr < nvariables; attr++) { Attribute b = Attributes.getAttribute(attr); direccion = b.getDirectionAttribute(); tipo = b.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in2)) { row[attr] = new String(String.valueOf(inst.getInputRealValues(in2))); } else { if (!inst.getInputMissingValues(in2)) row[attr] = inst.getInputNominalValues(in2); } in2++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out2)) { row[attr] = new String(String.valueOf(inst.getOutputRealValues(out2))); } else { if (!inst.getOutputMissingValues(out2)) row[attr] = inst.getOutputNominalValues(out2); } out2++; } } } // make frecuencies for each attribute for (int attr = 0; attr < nvariables; attr++) { Attribute b = Attributes.getAttribute(attr); direccion = b.getDirectionAttribute(); tipo = b.getType(); if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) { lastMissing = attr; timesSeen[attr] = new FreqList(); for (int m = 0; m < ndatos; m++) { Instance inst2 = IS.getInstance(m); outputs2 = inst2.getAllOutputValues(); boolean sameClass = true; // are they same concept instances?? for (int k = 0; k < nsalidas && sameClass; k++) if (outputs[k] != outputs2[k]) sameClass = false; if (sameClass) { if (tipo != Attribute.NOMINAL && !inst2.getInputMissingValues(attr)) { timesSeen[attr].AddElement( new String(String.valueOf(inst2.getInputRealValues(attr)))); } else { if (!inst2.getInputMissingValues(attr)) { timesSeen[attr].AddElement(inst2.getInputNominalValues(attr)); } } } } } } for (int attr = 0; attr < nvariables; attr++) { if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) { timesSeen[attr].reset(); } } fin = false; stepNext = false; while (!fin) { in2 = 0; for (int attr = 0; attr < nvariables && !fin; attr++) { Attribute b = Attributes.getAttribute(attr); direccion = b.getDirectionAttribute(); tipo = b.getType(); if (direccion == Attribute.INPUT && inst.getInputMissingValues(in2)) { if (stepNext) { timesSeen[attr].iterate(); stepNext = false; } if (timesSeen[attr].outOfBounds()) { stepNext = true; if (attr == lastMissing) fin = true; timesSeen[attr].reset(); } if (!fin) row[attr] = ((ValueFreq) timesSeen[attr].getCurrent()) .getValue(); // replace missing data } in2++; } if (!fin) { stepNext = true; file_write.write(row[0]); for (int y = 1; y < nvariables; y++) { file_write.write("," + row[y]); } file_write.write("\n"); // X[i].addElement(row); // row = (String[])row.clone(); } } } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { row[j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) row[j] = inst.getOutputNominalValues(out); else row[j] = new String("?"); } out++; } } } if (!inst.existsAnyMissingValue()) { file_write.write(row[0]); for (int y = 1; y < nvariables; y++) { file_write.write("," + row[y]); } file_write.write("\n"); } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } file_write.close(); } catch (IOException e) { System.out.println("IO exception = " + e); e.printStackTrace(); System.exit(-1); } /** ************************************************************************************ */ // does a test file associated exist? if (input_train_name.compareTo(input_test_name) != 0) { try { FileWriter file_write = new FileWriter(output_test_name); try { // Load in memory a dataset that contains a classification problem IS.readSet(input_test_name, false); int in = 0; int out = 0; int in2 = 0; int out2 = 0; int lastMissing = -1; boolean fin = false; boolean stepNext = false; ndatos = IS.getNumInstances(); nvariables = Attributes.getNumAttributes(); nentradas = Attributes.getInputNumAttributes(); nsalidas = Attributes.getOutputNumAttributes(); String[] row = null; X = new Vector[ndatos]; // matrix with transformed data for (int i = 0; i < ndatos; i++) X[i] = new Vector(); timesSeen = new FreqList[nvariables]; mostCommon = new String[nvariables]; file_write.write(IS.getHeader()); // now, print the normalized data file_write.write("@data\n"); // now, search for missed data, and replace them with // the most common value for (int i = 0; i < ndatos; i++) { Instance inst = IS.getInstance(i); in = 0; out = 0; row = new String[nvariables]; for (int j = 0; j < nvariables; j++) { Attribute a = Attributes.getAttribute(j); direccion = a.getDirectionAttribute(); tipo = a.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.existsAnyMissingValue()) { row[j] = new String(String.valueOf(inst.getInputRealValues(in))); } else { if (!inst.existsAnyMissingValue()) row[j] = inst.getInputNominalValues(in); else { // missing data outputs = inst.getAllOutputValues(); in2 = 0; out2 = 0; for (int attr = 0; attr < nvariables; attr++) { Attribute b = Attributes.getAttribute(attr); direccion = b.getDirectionAttribute(); tipo = b.getType(); if (direccion == Attribute.INPUT) { if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in2)) { row[attr] = new String(String.valueOf(inst.getInputRealValues(in2))); } else { if (!inst.getInputMissingValues(in2)) row[attr] = inst.getInputNominalValues(in2); } in2++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out2)) { row[attr] = new String(String.valueOf(inst.getOutputRealValues(out2))); } else { if (!inst.getOutputMissingValues(out2)) row[attr] = inst.getOutputNominalValues(out2); } out2++; } } } // make frecuencies for each attribute for (int attr = 0; attr < nvariables; attr++) { Attribute b = Attributes.getAttribute(attr); direccion = b.getDirectionAttribute(); tipo = b.getType(); if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) { lastMissing = attr; timesSeen[attr] = new FreqList(); for (int m = 0; m < ndatos; m++) { Instance inst2 = IS.getInstance(m); outputs2 = inst2.getAllOutputValues(); boolean sameClass = true; // are they same concept instances?? for (int k = 0; k < nsalidas && sameClass; k++) if (outputs[k] != outputs2[k]) sameClass = false; if (sameClass) { if (tipo != Attribute.NOMINAL && !inst2.getInputMissingValues(attr)) { timesSeen[attr].AddElement( new String(String.valueOf(inst2.getInputRealValues(attr)))); } else { if (!inst2.getInputMissingValues(attr)) { timesSeen[attr].AddElement(inst2.getInputNominalValues(attr)); } } } } } } for (int attr = 0; attr < nvariables; attr++) { if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) { timesSeen[attr].reset(); } } fin = false; stepNext = false; while (!fin) { in2 = 0; for (int attr = 0; attr < nvariables && !fin; attr++) { Attribute b = Attributes.getAttribute(attr); direccion = b.getDirectionAttribute(); tipo = b.getType(); if (direccion == Attribute.INPUT && inst.getInputMissingValues(in2)) { if (stepNext) { timesSeen[attr].iterate(); stepNext = false; } if (timesSeen[attr].outOfBounds()) { stepNext = true; if (attr == lastMissing) fin = true; timesSeen[attr].reset(); } if (!fin) row[attr] = ((ValueFreq) timesSeen[attr].getCurrent()) .getValue(); // replace missing data } in2++; } if (!fin) { stepNext = true; file_write.write(row[0]); for (int y = 1; y < nvariables; y++) { file_write.write("," + row[y]); } file_write.write("\n"); // X[i].addElement(row); // row = (String[])row.clone(); } } } } in++; } else { if (direccion == Attribute.OUTPUT) { if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) { row[j] = new String(String.valueOf(inst.getOutputRealValues(out))); } else { if (!inst.getOutputMissingValues(out)) row[j] = inst.getOutputNominalValues(out); else row[j] = new String("?"); } out++; } } } if (!inst.existsAnyMissingValue()) { file_write.write(row[0]); for (int y = 1; y < nvariables; y++) { file_write.write("," + row[y]); } file_write.write("\n"); } } } catch (Exception e) { System.out.println("Dataset exception = " + e); e.printStackTrace(); System.exit(-1); } file_write.close(); } catch (IOException e) { System.out.println("IO exception = " + e); e.printStackTrace(); System.exit(-1); } } }