예제 #1
0
파일: Prism.java 프로젝트: Navieclipse/KEEL
  /**
   * Returns the fraction of correct instances of the instance's set for the rule 'regla'
   *
   * @param i Number of the rule
   * @return Fraction of correct instances of the instance's set for the rule 'regla'
   */
  private double getAccuracy(int i) {

    Instance instancia;
    double Accuracy;
    num_cubiertas = 0;
    num_correctas = 0;
    for (int k = 0; k < instancias.getNumInstances(); k++) {
      instancia = instancias.getInstance(k);
      cubierta = regla.reglaCubreInstancia(instancia);
      if (cubierta) {
        num_cubiertas++;
        clase = instancia.getOutputNominalValuesInt(0);
        if (clase == i) num_correctas++;
      }
    }
    Accuracy = (double) num_correctas / (double) num_cubiertas;
    if (num_cubiertas == 0) Accuracy = 0;
    return Accuracy;
  }
예제 #2
0
파일: Main.java 프로젝트: micyee/granada
  /** @param args the command line arguments */
  public static void main(String[] args) {
    ParserParameters.doParse(args[0]);
    LogManager.initLogManager();

    InstanceSet is = new InstanceSet();
    try {
      is.readSet(Parameters.trainInputFile, true);
    } catch (Exception e) {
      LogManager.printErr(e.toString());
      System.exit(1);
    }
    checkDataset();

    Discretizer dis;
    String name = Parameters.algorithmName;
    dis = new FayyadDiscretizer();
    dis.buildCutPoints(is);
    dis.applyDiscretization(Parameters.trainInputFile, Parameters.trainOutputFile);
    dis.applyDiscretization(Parameters.testInputFile, Parameters.testOutputFile);
    LogManager.closeLog();
  }
예제 #3
0
파일: Prism.java 프로젝트: Navieclipse/KEEL
  /**
   * Removes from the instance's set those instances that matches with the rule
   *
   * @param i Numebr of the rule
   */
  private void removeInstancesCovered(int i) {

    for (int k = 0; k < instancias.getNumInstances(); k++) {

      instancia = instancias.getInstance(k);
      /*System.out.print(k+" ");
         instancia.print();
      System.out.println();*/
      cubierta = regla.reglaCubreInstancia(instancia);
      if (cubierta) {
        //  System.out.println("CUBIERTA");
        clase = instancia.getOutputNominalValuesInt(0);
        // if(clase==i){
        instancias.removeInstance(k);
        instancia.print();
        System.out.println();
        k = k - 1;
        // }
      }
    }
  }
예제 #4
0
  // Write data matrix X to disk, in KEEL format
  private void write_results(String output) {
    // File OutputFile = new File(output_train_name.substring(1, output_train_name.length()-1));
    try {
      FileWriter file_write = new FileWriter(output);

      file_write.write(IS.getHeader());

      // now, print the normalized data
      file_write.write("@data\n");
      for (int i = 0; i < ndatos; i++) {
        file_write.write(X[i][0]);
        for (int j = 1; j < nvariables; j++) {
          file_write.write("," + X[i][j]);
        }
        file_write.write("\n");
      }
      file_write.close();
    } catch (IOException e) {
      System.out.println("IO exception = " + e);
      System.exit(-1);
    }
  }
예제 #5
0
파일: Prism.java 프로젝트: Navieclipse/KEEL
  /**
   * Constructor with all the attributes to initialize
   *
   * @param ficheroTrain Train file
   * @param ficheroTest Test file
   * @param fSalidaTr Out-put train file
   * @param fSalidaTst Out-put test file
   * @param fsalida Out-put file
   * @param semilla seed
   */
  public Prism(
      String ficheroTrain,
      String ficheroTest,
      String fSalidaTr,
      String fSalidaTst,
      String fsalida,
      long semilla) {

    ficheroSalida = fsalida;
    ficheroSalidaTr = fSalidaTr;
    ficheroSalidaTst = fSalidaTst;
    seed = semilla;

    datosTrain = new ConjDatos(); // datosEval = new ConjDatos();
    datosTest = new ConjDatos();

    train = new Dataset();
    test = new Dataset();
    s = new Selector(0, 0, 0.);
    conjunto_reglas = new ConjReglas();

    try {
      Randomize.setSeed(seed);
      System.out.println("la semilla es " + seed);
      train.leeConjunto(ficheroTrain, true);
      test.leeConjunto(ficheroTest, false); //
      if (train.hayAtributosContinuos() /*|| train.hayAtributosDiscretos()*/) {
        System.err.println("\nPrism may not work properly with real or integer attributes.\n");
        // System.exit(-1);
        hayContinuos = true;
      }
      if (!hayContinuos) {
        train.calculaMasComunes(); // eval.calculaMasComunes();
        test.calculaMasComunes();
        datosTrain =
            creaConjunto(
                train); // Leemos los datos de entrenamiento (todos seguidos como un
                        // String)//datosEval = creaConjunto(eval);
        datosTest = creaConjunto(test);

        valores = train.getX2(); // obtengo los valores nominales
        clases = train.getC2();
        clasitas = train.getC();
        /*System.out.println(train.getndatos());
        System.out.println(train.getnentradas());
        for(int i=0;i<train.getndatos();i++){
        	for(int j=0;j<train.getnentradas();j++)
        		System.out.print(valores[i][j]);
        	System.out.print(clases[i]);System.out.println(clasitas[i]);}*/
        // COMENZAMOS EL ALGORITMO PRISM
        // FOR EACH CLASS C
        clases = train.dameClases();
        int unavez = 0, candidato;
        for (int i = 0; i < train.getnclases(); i++) {
          System.out.println("CLASE :" + clases[i] + "\n");
          // initialize E to the instance set
          /*Cuando haya que inicializar de nuevo el conjunto de instancias no es necesario insertar aquellas que se eliminaron, sino que nos va a bastar con inicializar otra vez el conjunto mediante el fichero de entrenamiento. Por eso hay un metodo para insertar una instancia*/
          train.leeConjunto(ficheroTrain, false);
          nombre_atributos = train.dameNombres();
          instancias = train.getInstanceSet();

          // While E contains instances in class C
          while (train.hayInstanciasDeClaseC(i)) {
            // Create a rule R with an empty left-hand side that predicts class C
            regla = new Complejo(train.getnclases());
            regla.setClase(i);
            regla.adjuntaNombreAtributos(nombre_atributos);
            // esto lo hacemos solo aqui pq luego vamos quitando selectores del almacen
            almacen = hazSelectores(train);
            almacen.adjuntaNombreAtributos(nombre_atributos);
            do {
              // FOR EACH ATTRIBUTE A NOT MENTIONED IN R, AND EACH VALUE V
              accuracy_ant = -1.;
              p = 0;
              int seleccionados[] = new int[almacen.size()];
              for (int jj = 0; jj < almacen.size(); jj++) seleccionados[jj] = 0;
              System.out.println();
              for (int j = 0; j < almacen.size(); j++) {
                // tenemos que quitar el selector anterior
                if (j > 0) regla.removeSelector(s);
                s = almacen.getSelector(j);
                // if(i==0)
                s.print();
                // CONSIDER ADDING THE CONDITION A=V TO THE LHS OF R
                regla.addSelector(s);
                accuracy = getAccuracy(i);
                // if(i==0)	{
                System.out.println("correctas " + num_correctas + " cubiertas " + num_cubiertas);
                System.out.println("Acurracy " + accuracy);
                // }

                if ((accuracy > accuracy_ant)
                    || ((accuracy == accuracy_ant) && (num_correctas > p))) {

                  // if((accuracy==accuracy_ant) &&(num_correctas>p)){
                  // System.out.println("atn "+accuracy_ant);
                  // System.out.println("ahora "+accuracy);
                  accuracy_ant = accuracy;
                  seleccionado = j;
                  p = num_correctas;

                  // si se encuentra un superior hay que quitar
                  // todo lo q se hay ido almacenando en esta iteracion
                  for (int jj = 0; jj < almacen.size(); jj++) seleccionados[jj] = 0;
                  // }
                } else {
                  if ((accuracy == accuracy_ant) && (num_correctas == p)) {
                    seleccionados[seleccionado] = 1;
                    seleccionados[j] = 1;
                  }
                }
              }
              // seleccionamos uno de los seleccionados en el caso de empate
              int contador = 0;
              for (int jj = 0; jj < almacen.size(); jj++) {
                if (seleccionados[jj] == 1) {
                  contador++;
                  System.out.println("OPCION " + jj);
                }
              }
              if (contador > 0) {
                candidato = Randomize.RandintClosed(1, contador);
                contador = 0;
                for (int jj = 0; jj < almacen.size(); jj++) {
                  if (seleccionados[jj] == 1) {
                    contador++;
                    if (contador == candidato) seleccionado = jj;
                  }
                }
              }
              System.out.println("ELEGIDO " + seleccionado);

              // antes hay que quitar el q metimos
              regla.removeSelector(s);
              s = almacen.getSelector(seleccionado);
              s.print();
              // ADD A=V TO R
              regla.addSelector(s);
              /*AHORA HAY QUE QUITAR DEL ALMACEN SE SELECTORES AQUELLOS QUE
              HACEN REFERENCIA AL ATRIBUTO SELECCIONADO*/
              // obtener el atributo del selector ganador
              atributo = s.getAtributo();
              // se borran todos los q tengan ese atributo
              // System.out.println("ALMACEN");almacen.print();
              almacen.removeSelectorAtributo(atributo);

              reglaPerfecta = perfectRule(regla, train);
            } while (!reglaPerfecta && (regla.size() < train.getnentradas()));

            System.out.println("\n");
            System.out.println("\nREGLA............................................");
            regla.print();
            System.out.println("\n");
            /*necesitamos evaluar la regla para obtener la salida del metodo
            para compararla con la salida esperada*/
            evaluarComplejo(regla, datosTrain);
            // INCLUIMOS ESTA REGLA YA PARA EL CONJUNTO FINAL DE REGLAS
            conjunto_reglas.addRegla(regla);
            // REMOVE THE INSTANCES COVERED BY R FROM E

            // Instance instancia;
            /*for(int k=0;k<instancias.getNumInstances();k++){
            instancia=instancias.getInstance(k);
            System.out.print(k+" ");
              	instancia.print();
            System.out.println();
              	}*/
            removeInstancesCovered(i);
            for (int k = 0; k < instancias.getNumInstances(); k++) {
              instancia = instancias.getInstance(k);
              clase = instancia.getOutputNominalValuesInt(0);
              if (clase == i) {
                System.out.print(k + " ");
                instancia.print();
                System.out.println();
              }
            }
            // instancias.print();
            System.out.println("\n");
          } // del while
        } // del for de las clases

        // EVALUAMOS LA CALIDAD DE LAS REGLAS
        int[] clasesEval;
        clasesEval = train.getC();
        muestPorClaseEval = new int[train.getnclases()];
        for (int j = 0; j < train.getnclases(); j++) {
          muestPorClaseEval[j] = 0;
          for (int i = 0; i < datosTrain.size(); i++) {
            if (
            /*valorClases[j]*/ j == clasesEval[i]) {
              muestPorClaseEval[j]++;
            }
          }
        }
        conjunto_reglas.eliminaRepetidos(1);
        evReg =
            new EvaluaCalidadReglas(
                conjunto_reglas,
                datosTrain,
                datosTest,
                muestPorClaseEval,
                muestPorClaseEval,
                clases);
        // GENERAMOS LA SALIDA
        generaSalida();
        System.out.println("la semilla es " + seed);
      } // del if
    } catch (IOException e) {
      System.err.println("There was a problem while trying to read the dataset files:");
      System.err.println("-> " + e);
      // System.exit(0);
    }
  }
예제 #6
0
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    double[] outputs;
    double[] outputs2;
    Instance neighbor;
    double dist, mean;
    int actual;
    int[] N = new int[nneigh];
    double[] Ndist = new double[nneigh];
    boolean allNull;
    svm_problem SVMp = null;
    svm_parameter SVMparam = new svm_parameter();
    svm_model svr = null;
    svm_node SVMn[];
    double[] outputsCandidate = null;
    boolean same = true;
    Vector instancesSelected = new Vector();
    Vector instancesSelected2 = new Vector();

    // SVM PARAMETERS
    SVMparam.C = C;
    SVMparam.cache_size = 10; // 10MB of cache
    SVMparam.degree = degree;
    SVMparam.eps = eps;
    SVMparam.gamma = gamma;
    SVMparam.nr_weight = 0;
    SVMparam.nu = nu;
    SVMparam.p = p;
    SVMparam.shrinking = shrinking;
    SVMparam.probability = 0;
    if (kernelType.compareTo("LINEAR") == 0) {
      SVMparam.kernel_type = svm_parameter.LINEAR;
    } else if (kernelType.compareTo("POLY") == 0) {
      SVMparam.kernel_type = svm_parameter.POLY;
    } else if (kernelType.compareTo("RBF") == 0) {
      SVMparam.kernel_type = svm_parameter.RBF;
    } else if (kernelType.compareTo("SIGMOID") == 0) {
      SVMparam.kernel_type = svm_parameter.SIGMOID;
    }

    SVMparam.svm_type = svm_parameter.EPSILON_SVR;

    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(input_train_name, true);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][2]; // matrix with transformed data

      mostCommon = new String[nvariables];
      SVMp = new svm_problem();
      SVMp.l = ndatos;
      SVMp.y = new double[SVMp.l];
      SVMp.x = new svm_node[SVMp.l][nentradas + 1];
      for (int l = 0; l < SVMp.l; l++) {
        for (int n = 0; n < Attributes.getInputNumAttributes() + 1; n++) {
          SVMp.x[l][n] = new svm_node();
        }
      }

      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);

        SVMp.y[i] = inst.getAllOutputValues()[0];
        for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
          SVMp.x[i][n].index = n;
          SVMp.x[i][n].value = inst.getAllInputValues()[n];
          SVMp.y[i] = inst.getAllOutputValues()[0];
        }
        // end of instance
        SVMp.x[i][nentradas].index = -1;
      }
      if (svm.svm_check_parameter(SVMp, SVMparam) != null) {
        System.out.println("SVM parameter error in training:");
        System.out.println(svm.svm_check_parameter(SVMp, SVMparam));
        System.exit(-1);
      }
      // train the SVM
      if (ndatos > 0) {
        svr = svm.svm_train(SVMp, SVMparam);
      }
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);
        X[i][0] = new String(String.valueOf(inst.getAllOutputValues()[0]));
        //			the values used for regression
        SVMn = new svm_node[Attributes.getInputNumAttributes() + 1];
        for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
          SVMn[n] = new svm_node();
          SVMn[n].index = n;
          SVMn[n].value = inst.getAllInputValues()[n];
        }
        SVMn[nentradas] = new svm_node();
        SVMn[nentradas].index = -1;
        // pedict the class
        X[i][1] = new String(String.valueOf((svm.svm_predict(svr, SVMn))));
      }
    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    write_results(output_train_name);
    /** ************************************************************************************ */
    try {

      // Load in memory a dataset that contains a classification
      // problem
      IS.readSet(input_test_name, false);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][2]; // matrix with transformed data
      // data

      mostCommon = new String[nvariables];

      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);
        X[i][0] = new String(String.valueOf(inst.getAllOutputValues()[0]));

        SVMn = new svm_node[Attributes.getInputNumAttributes() + 1];
        for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
          SVMn[n] = new svm_node();
          SVMn[n].index = n;
          SVMn[n].value = inst.getAllInputValues()[n];
        }
        SVMn[nentradas] = new svm_node();
        SVMn[nentradas].index = -1;
        // pedict the class
        X[i][1] = new String(String.valueOf(svm.svm_predict(svr, SVMn)));
      }
    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    System.out.println("escribiendo test");
    write_results(output_test_name);
  }
예제 #7
0
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    // declarations
    double[] outputs;
    double[] outputs2;
    Instance neighbor;
    double dist, mean;
    int actual;
    Randomize rnd = new Randomize();
    Instance ex;
    gCenter kmeans = null;
    int iterations = 0;
    double E;
    double prevE;
    int totalMissing = 0;
    boolean allMissing = true;

    rnd.setSeed(semilla);
    // PROCESS
    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(input_train_name, true);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][nvariables]; // matrix with transformed data
      kmeans = new gCenter(K, ndatos, nvariables);

      timesSeen = new FreqList[nvariables];
      mostCommon = new String[nvariables];

      // first, we choose k 'means' randomly from all
      // instances
      totalMissing = 0;
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);
        if (inst.existsAnyMissingValue()) totalMissing++;
      }
      if (totalMissing == ndatos) allMissing = true;
      else allMissing = false;
      for (int numMeans = 0; numMeans < K; numMeans++) {
        do {
          actual = (int) (ndatos * rnd.Rand());
          ex = IS.getInstance(actual);
        } while (ex.existsAnyMissingValue() && !allMissing);

        kmeans.copyCenter(ex, numMeans);
      }

      // now, iterate adjusting clusters' centers and
      // instances to them
      prevE = 0;
      iterations = 0;
      do {
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          kmeans.setClusterOf(inst, i);
        }
        // set new centers
        kmeans.recalculateCenters(IS);
        // compute RMSE
        E = 0;
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          E += kmeans.distance(inst, kmeans.getClusterOf(i));
        }
        iterations++;
        // System.out.println(iterations+"\t"+E);
        if (Math.abs(prevE - E) == 0) iterations = maxIter;
        else prevE = E;
      } while (E > minError && iterations < maxIter);
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);

        in = 0;
        out = 0;

        for (int j = 0; j < nvariables; j++) {
          Attribute a = Attributes.getAttribute(j);

          direccion = a.getDirectionAttribute();
          tipo = a.getType();

          if (direccion == Attribute.INPUT) {
            if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
              X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
            } else {
              if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
              else {
                actual = kmeans.getClusterOf(i);
                X[i][j] = new String(kmeans.valueAt(actual, j));
              }
            }
            in++;
          } else {
            if (direccion == Attribute.OUTPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
              } else {
                if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              out++;
            }
          }
        }
      }
    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    write_results(output_train_name);
    /** ************************************************************************************ */
    // does a test file associated exist?
    if (input_train_name.compareTo(input_test_name) != 0) {
      try {

        // Load in memory a dataset that contains a classification problem
        IStest.readSet(input_test_name, false);
        int in = 0;
        int out = 0;

        ndatos = IStest.getNumInstances();
        nvariables = Attributes.getNumAttributes();
        nentradas = Attributes.getInputNumAttributes();
        nsalidas = Attributes.getOutputNumAttributes();

        for (int i = 0; i < ndatos; i++) {
          Instance inst = IStest.getInstance(i);

          in = 0;
          out = 0;

          for (int j = 0; j < nvariables; j++) {
            Attribute a = Attributes.getAttribute(j);

            direccion = a.getDirectionAttribute();
            tipo = a.getType();

            if (direccion == Attribute.INPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
                X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
              } else {
                if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              in++;
            } else {
              if (direccion == Attribute.OUTPUT) {
                if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                  X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
                } else {
                  if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                  else {
                    actual = kmeans.getClusterOf(i);
                    X[i][j] = new String(kmeans.valueAt(actual, j));
                  }
                }
                out++;
              }
            }
          }
        }
      } catch (Exception e) {
        System.out.println("Dataset exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
      write_results(output_test_name);
    }
  }
예제 #8
0
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(input_train_name, true);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][nvariables]; // matrix with transformed data
      boolean[] isMissed =
          new boolean[ndatos]; // vector which points out instances with missed data

      try {
        FileWriter file_write = new FileWriter(output_train_name);

        file_write.write(IS.getHeader());

        // now, print the normalized data
        file_write.write("@data\n");
        // file_write.close();
        PrintWriter pw = new PrintWriter(file_write);
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);
          if (!inst.existsAnyMissingValue()) {
            inst.printAsOriginal(pw);
            // file_write.write(inst.toString()); // DOES NOT WRITE BACK NON-DEF DIRECTION
            // ATTRIBUTES!!!!
            file_write.write("\n");
          }
        }
        pw.close();
        file_write.close();
      } catch (IOException e) {
        System.out.println("IO exception = " + e);
        System.exit(-1);
      }

    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    // does a test file associated exist?
    if (input_train_name.compareTo(input_test_name) != 0) {
      try {

        // Load in memory a dataset that contains a classification problem
        IS.readSet(input_test_name, false);
        int in = 0;
        int out = 0;

        ndatos = IS.getNumInstances();
        nvariables = Attributes.getNumAttributes();
        nentradas = Attributes.getInputNumAttributes();
        nsalidas = Attributes.getOutputNumAttributes();

        X = new String[ndatos][nvariables]; // matrix with transformed data
        boolean[] isMissed =
            new boolean[ndatos]; // vector which points out instances with missed data

        try {
          FileWriter file_write = new FileWriter(output_test_name);

          file_write.write(IS.getHeader());

          // now, print the normalized data
          file_write.write("@data\n");
          PrintWriter pw = new PrintWriter(file_write);
          for (int i = 0; i < ndatos; i++) {
            Instance inst = IS.getInstance(i);
            if (!inst.existsAnyMissingValue()) {
              inst.printAsOriginal(pw);
              file_write.write("\n");
            }
          }
          pw.close();
          file_write.close();
        } catch (IOException e) {
          System.out.println("IO exception = " + e);
          System.exit(-1);
        }

      } catch (Exception e) {
        System.out.println("Dataset exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
    }

    // write_results(); / since there ins't any data transformation, is not needed
  }
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    double[] outputs;
    double[] outputs2;
    try {
      FileWriter file_write = new FileWriter(output_train_name);

      try {

        // Load in memory a dataset that contains a classification problem
        IS.readSet(input_train_name, true);
        int in = 0;
        int out = 0;
        int in2 = 0;
        int out2 = 0;
        int lastMissing = -1;
        boolean fin = false;
        boolean stepNext = false;

        ndatos = IS.getNumInstances();
        nvariables = Attributes.getNumAttributes();
        nentradas = Attributes.getInputNumAttributes();
        nsalidas = Attributes.getOutputNumAttributes();

        String[] row = null;
        X = new Vector[ndatos]; // matrix with transformed data
        for (int i = 0; i < ndatos; i++) X[i] = new Vector();

        timesSeen = new FreqList[nvariables];
        mostCommon = new String[nvariables];

        file_write.write(IS.getHeader());

        // now, print the normalized data
        file_write.write("@data\n");

        // now, search for missed data, and replace them with
        // the most common value

        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);
          in = 0;
          out = 0;
          row = new String[nvariables];

          for (int j = 0; j < nvariables; j++) {
            Attribute a = Attributes.getAttribute(j);

            direccion = a.getDirectionAttribute();
            tipo = a.getType();

            if (direccion == Attribute.INPUT) {
              if (tipo != Attribute.NOMINAL && !inst.existsAnyMissingValue()) {
                row[j] = new String(String.valueOf(inst.getInputRealValues(in)));
              } else {
                if (!inst.existsAnyMissingValue()) row[j] = inst.getInputNominalValues(in);
                else {
                  // missing data
                  outputs = inst.getAllOutputValues();
                  in2 = 0;
                  out2 = 0;
                  for (int attr = 0; attr < nvariables; attr++) {
                    Attribute b = Attributes.getAttribute(attr);
                    direccion = b.getDirectionAttribute();
                    tipo = b.getType();
                    if (direccion == Attribute.INPUT) {
                      if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in2)) {
                        row[attr] = new String(String.valueOf(inst.getInputRealValues(in2)));
                      } else {
                        if (!inst.getInputMissingValues(in2))
                          row[attr] = inst.getInputNominalValues(in2);
                      }
                      in2++;
                    } else {
                      if (direccion == Attribute.OUTPUT) {
                        if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out2)) {
                          row[attr] = new String(String.valueOf(inst.getOutputRealValues(out2)));
                        } else {
                          if (!inst.getOutputMissingValues(out2))
                            row[attr] = inst.getOutputNominalValues(out2);
                        }
                        out2++;
                      }
                    }
                  }
                  // make frecuencies  for each attribute
                  for (int attr = 0; attr < nvariables; attr++) {
                    Attribute b = Attributes.getAttribute(attr);

                    direccion = b.getDirectionAttribute();
                    tipo = b.getType();
                    if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) {
                      lastMissing = attr;
                      timesSeen[attr] = new FreqList();
                      for (int m = 0; m < ndatos; m++) {
                        Instance inst2 = IS.getInstance(m);
                        outputs2 = inst2.getAllOutputValues();
                        boolean sameClass = true;
                        // are they same concept instances??
                        for (int k = 0; k < nsalidas && sameClass; k++)
                          if (outputs[k] != outputs2[k]) sameClass = false;
                        if (sameClass) {
                          if (tipo != Attribute.NOMINAL && !inst2.getInputMissingValues(attr)) {
                            timesSeen[attr].AddElement(
                                new String(String.valueOf(inst2.getInputRealValues(attr))));

                          } else {
                            if (!inst2.getInputMissingValues(attr)) {
                              timesSeen[attr].AddElement(inst2.getInputNominalValues(attr));
                            }
                          }
                        }
                      }
                    }
                  }
                  for (int attr = 0; attr < nvariables; attr++) {
                    if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) {
                      timesSeen[attr].reset();
                    }
                  }
                  fin = false;
                  stepNext = false;
                  while (!fin) {
                    in2 = 0;
                    for (int attr = 0; attr < nvariables && !fin; attr++) {
                      Attribute b = Attributes.getAttribute(attr);

                      direccion = b.getDirectionAttribute();
                      tipo = b.getType();
                      if (direccion == Attribute.INPUT && inst.getInputMissingValues(in2)) {
                        if (stepNext) {
                          timesSeen[attr].iterate();
                          stepNext = false;
                        }
                        if (timesSeen[attr].outOfBounds()) {
                          stepNext = true;
                          if (attr == lastMissing) fin = true;
                          timesSeen[attr].reset();
                        }
                        if (!fin)
                          row[attr] =
                              ((ValueFreq) timesSeen[attr].getCurrent())
                                  .getValue(); // replace missing data
                      }
                      in2++;
                    }
                    if (!fin) {
                      stepNext = true;
                      file_write.write(row[0]);
                      for (int y = 1; y < nvariables; y++) {
                        file_write.write("," + row[y]);
                      }
                      file_write.write("\n");
                      // X[i].addElement(row);
                      // row = (String[])row.clone();
                    }
                  }
                }
              }
              in++;
            } else {
              if (direccion == Attribute.OUTPUT) {
                if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                  row[j] = new String(String.valueOf(inst.getOutputRealValues(out)));
                } else {
                  if (!inst.getOutputMissingValues(out)) row[j] = inst.getOutputNominalValues(out);
                  else row[j] = new String("?");
                }
                out++;
              }
            }
          }
          if (!inst.existsAnyMissingValue()) {
            file_write.write(row[0]);
            for (int y = 1; y < nvariables; y++) {
              file_write.write("," + row[y]);
            }
            file_write.write("\n");
          }
        }
      } catch (Exception e) {
        System.out.println("Dataset exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
      file_write.close();
    } catch (IOException e) {
      System.out.println("IO exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }

    /** ************************************************************************************ */
    // does a test file associated exist?
    if (input_train_name.compareTo(input_test_name) != 0) {
      try {
        FileWriter file_write = new FileWriter(output_test_name);

        try {

          // Load in memory a dataset that contains a classification problem
          IS.readSet(input_test_name, false);
          int in = 0;
          int out = 0;
          int in2 = 0;
          int out2 = 0;
          int lastMissing = -1;
          boolean fin = false;
          boolean stepNext = false;

          ndatos = IS.getNumInstances();
          nvariables = Attributes.getNumAttributes();
          nentradas = Attributes.getInputNumAttributes();
          nsalidas = Attributes.getOutputNumAttributes();

          String[] row = null;
          X = new Vector[ndatos]; // matrix with transformed data
          for (int i = 0; i < ndatos; i++) X[i] = new Vector();

          timesSeen = new FreqList[nvariables];
          mostCommon = new String[nvariables];

          file_write.write(IS.getHeader());

          // now, print the normalized data
          file_write.write("@data\n");

          // now, search for missed data, and replace them with
          // the most common value

          for (int i = 0; i < ndatos; i++) {
            Instance inst = IS.getInstance(i);
            in = 0;
            out = 0;
            row = new String[nvariables];

            for (int j = 0; j < nvariables; j++) {
              Attribute a = Attributes.getAttribute(j);

              direccion = a.getDirectionAttribute();
              tipo = a.getType();

              if (direccion == Attribute.INPUT) {
                if (tipo != Attribute.NOMINAL && !inst.existsAnyMissingValue()) {
                  row[j] = new String(String.valueOf(inst.getInputRealValues(in)));
                } else {
                  if (!inst.existsAnyMissingValue()) row[j] = inst.getInputNominalValues(in);
                  else {
                    // missing data
                    outputs = inst.getAllOutputValues();
                    in2 = 0;
                    out2 = 0;
                    for (int attr = 0; attr < nvariables; attr++) {
                      Attribute b = Attributes.getAttribute(attr);
                      direccion = b.getDirectionAttribute();
                      tipo = b.getType();
                      if (direccion == Attribute.INPUT) {
                        if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in2)) {
                          row[attr] = new String(String.valueOf(inst.getInputRealValues(in2)));
                        } else {
                          if (!inst.getInputMissingValues(in2))
                            row[attr] = inst.getInputNominalValues(in2);
                        }
                        in2++;
                      } else {
                        if (direccion == Attribute.OUTPUT) {
                          if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out2)) {
                            row[attr] = new String(String.valueOf(inst.getOutputRealValues(out2)));
                          } else {
                            if (!inst.getOutputMissingValues(out2))
                              row[attr] = inst.getOutputNominalValues(out2);
                          }
                          out2++;
                        }
                      }
                    }
                    // make frecuencies  for each attribute
                    for (int attr = 0; attr < nvariables; attr++) {
                      Attribute b = Attributes.getAttribute(attr);

                      direccion = b.getDirectionAttribute();
                      tipo = b.getType();
                      if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) {
                        lastMissing = attr;
                        timesSeen[attr] = new FreqList();
                        for (int m = 0; m < ndatos; m++) {
                          Instance inst2 = IS.getInstance(m);
                          outputs2 = inst2.getAllOutputValues();
                          boolean sameClass = true;
                          // are they same concept instances??
                          for (int k = 0; k < nsalidas && sameClass; k++)
                            if (outputs[k] != outputs2[k]) sameClass = false;
                          if (sameClass) {
                            if (tipo != Attribute.NOMINAL && !inst2.getInputMissingValues(attr)) {
                              timesSeen[attr].AddElement(
                                  new String(String.valueOf(inst2.getInputRealValues(attr))));

                            } else {
                              if (!inst2.getInputMissingValues(attr)) {
                                timesSeen[attr].AddElement(inst2.getInputNominalValues(attr));
                              }
                            }
                          }
                        }
                      }
                    }
                    for (int attr = 0; attr < nvariables; attr++) {
                      if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) {
                        timesSeen[attr].reset();
                      }
                    }
                    fin = false;
                    stepNext = false;
                    while (!fin) {
                      in2 = 0;
                      for (int attr = 0; attr < nvariables && !fin; attr++) {
                        Attribute b = Attributes.getAttribute(attr);

                        direccion = b.getDirectionAttribute();
                        tipo = b.getType();
                        if (direccion == Attribute.INPUT && inst.getInputMissingValues(in2)) {
                          if (stepNext) {
                            timesSeen[attr].iterate();
                            stepNext = false;
                          }
                          if (timesSeen[attr].outOfBounds()) {
                            stepNext = true;
                            if (attr == lastMissing) fin = true;
                            timesSeen[attr].reset();
                          }
                          if (!fin)
                            row[attr] =
                                ((ValueFreq) timesSeen[attr].getCurrent())
                                    .getValue(); // replace missing data
                        }
                        in2++;
                      }
                      if (!fin) {
                        stepNext = true;
                        file_write.write(row[0]);
                        for (int y = 1; y < nvariables; y++) {
                          file_write.write("," + row[y]);
                        }
                        file_write.write("\n");
                        // X[i].addElement(row);
                        // row = (String[])row.clone();
                      }
                    }
                  }
                }
                in++;
              } else {
                if (direccion == Attribute.OUTPUT) {
                  if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                    row[j] = new String(String.valueOf(inst.getOutputRealValues(out)));
                  } else {
                    if (!inst.getOutputMissingValues(out))
                      row[j] = inst.getOutputNominalValues(out);
                    else row[j] = new String("?");
                  }
                  out++;
                }
              }
            }
            if (!inst.existsAnyMissingValue()) {
              file_write.write(row[0]);
              for (int y = 1; y < nvariables; y++) {
                file_write.write("," + row[y]);
              }
              file_write.write("\n");
            }
          }
        } catch (Exception e) {
          System.out.println("Dataset exception = " + e);
          e.printStackTrace();
          System.exit(-1);
        }
        file_write.close();
      } catch (IOException e) {
        System.out.println("IO exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
    }
  }