Пример #1
0
  /**
   * Process a dataset file for a clustering problem.
   *
   * @param nfexamples Name of the dataset file
   * @param train The dataset file is for training or for test
   * @throws java.io.IOException if there is any semantical, lexical or sintactical error in the
   *     input file.
   */
  public void processClusterDataset(String nfexamples, boolean train) throws IOException {

    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(nfexamples, train);

      nData = IS.getNumInstances();
      nInputs = Attributes.getInputNumAttributes();
      nVariables = nInputs + Attributes.getOutputNumAttributes();

      if (Attributes.getOutputNumAttributes() != 0) {
        System.out.println("This algorithm can not process datasets with outputs");
        System.out.println("All outputs will be removed");
      }

      // Initialize and fill our own tables
      X = new double[nData][nInputs];
      missing = new boolean[nData][nInputs];

      // Maximum and minimum of inputs
      iMaximum = new double[nInputs];
      iMinimum = new double[nInputs];

      // Maximum and minimum for output data
      oMaximum = 0;
      oMinimum = 0;

      // All values are casted into double/integer
      nClasses = 0;
      for (int i = 0; i < X.length; i++) {
        Instance inst = IS.getInstance(i);
        for (int j = 0; j < nInputs; j++) {
          X[i][j] = IS.getInputNumericValue(i, j);
          missing[i][j] = inst.getInputMissingValues(j);
          if (X[i][j] > iMaximum[j] || i == 0) {
            iMaximum[j] = X[i][j];
          }
          if (X[i][j] < iMinimum[j] || i == 0) {
            iMinimum[j] = X[i][j];
          }
        }
      }

    } catch (Exception e) {
      System.out.println("DBG: Exception in readSet");
      e.printStackTrace();
    }
  }
Пример #2
0
  private void normalizarTest() {

    int i, j, cont = 0, k;
    Instance temp;
    boolean hecho;
    double caja[];
    StringTokenizer tokens;
    boolean nulls[];

    /* Check if dataset corresponding with a classification problem */

    if (Attributes.getOutputNumAttributes() < 1) {
      System.err.println(
          "This dataset haven´t outputs, so it not corresponding to a classification problem.");
      System.exit(-1);
    } else if (Attributes.getOutputNumAttributes() > 1) {
      System.err.println("This dataset have more of one output.");
      System.exit(-1);
    }

    if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) {
      System.err.println(
          "This dataset have an input attribute with floating values, so it not corresponding to a classification problem.");
      System.exit(-1);
    }

    datosTest = new double[test.getNumInstances()][Attributes.getInputNumAttributes()];
    clasesTest = new int[test.getNumInstances()];
    caja = new double[1];

    for (i = 0; i < test.getNumInstances(); i++) {
      temp = test.getInstance(i);
      nulls = temp.getInputMissingValues();
      datosTest[i] = test.getInstance(i).getAllInputValues();
      for (j = 0; j < nulls.length; j++) if (nulls[j]) datosTest[i][j] = 0.0;
      caja = test.getInstance(i).getAllOutputValues();
      clasesTest[i] = (int) caja[0];
      for (k = 0; k < datosTest[i].length; k++) {
        if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) {
          datosTest[i][k] /= Attributes.getInputAttribute(k).getNominalValuesList().size() - 1;
        } else {
          datosTest[i][k] -= Attributes.getInputAttribute(k).getMinAttribute();
          datosTest[i][k] /=
              Attributes.getInputAttribute(k).getMaxAttribute()
                  - Attributes.getInputAttribute(k).getMinAttribute();
        }
      }
    }
  }
Пример #3
0
  private void normalizarReferencia() throws CheckException {

    int i, j, cont = 0, k;
    Instance temp;
    boolean hecho;
    double caja[];
    StringTokenizer tokens;
    boolean nulls[];

    /*Check if dataset corresponding with a classification problem*/

    if (Attributes.getOutputNumAttributes() < 1) {
      throw new CheckException(
          "This dataset haven´t outputs, so it not corresponding to a classification problem.");
    } else if (Attributes.getOutputNumAttributes() > 1) {
      throw new CheckException("This dataset have more of one output.");
    }

    if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) {
      throw new CheckException(
          "This dataset have an input attribute with floating values, so it not corresponding to a classification problem.");
    }

    datosReferencia = new double[referencia.getNumInstances()][Attributes.getInputNumAttributes()];
    clasesReferencia = new int[referencia.getNumInstances()];
    caja = new double[1];

    /*Get the number of instances that have a null value*/
    for (i = 0; i < referencia.getNumInstances(); i++) {
      temp = referencia.getInstance(i);
      nulls = temp.getInputMissingValues();
      datosReferencia[i] = referencia.getInstance(i).getAllInputValues();
      for (j = 0; j < nulls.length; j++) if (nulls[j]) datosReferencia[i][j] = 0.0;
      caja = referencia.getInstance(i).getAllOutputValues();
      clasesReferencia[i] = (int) caja[0];
      for (k = 0; k < datosReferencia[i].length; k++) {
        if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) {
          datosReferencia[i][k] /=
              Attributes.getInputAttribute(k).getNominalValuesList().size() - 1;
        } else {
          datosReferencia[i][k] -= Attributes.getInputAttribute(k).getMinAttribute();
          datosReferencia[i][k] /=
              Attributes.getInputAttribute(k).getMaxAttribute()
                  - Attributes.getInputAttribute(k).getMinAttribute();
        }
      }
    }
  }
Пример #4
0
  /**
   * Returns the fraction of correct instances of the instance's set for the rule 'regla'
   *
   * @param i Number of the rule
   * @return Fraction of correct instances of the instance's set for the rule 'regla'
   */
  private double getAccuracy(int i) {

    Instance instancia;
    double Accuracy;
    num_cubiertas = 0;
    num_correctas = 0;
    for (int k = 0; k < instancias.getNumInstances(); k++) {
      instancia = instancias.getInstance(k);
      cubierta = regla.reglaCubreInstancia(instancia);
      if (cubierta) {
        num_cubiertas++;
        clase = instancia.getOutputNominalValuesInt(0);
        if (clase == i) num_correctas++;
      }
    }
    Accuracy = (double) num_correctas / (double) num_cubiertas;
    if (num_cubiertas == 0) Accuracy = 0;
    return Accuracy;
  }
Пример #5
0
  /**
   * Removes from the instance's set those instances that matches with the rule
   *
   * @param i Numebr of the rule
   */
  private void removeInstancesCovered(int i) {

    for (int k = 0; k < instancias.getNumInstances(); k++) {

      instancia = instancias.getInstance(k);
      /*System.out.print(k+" ");
         instancia.print();
      System.out.println();*/
      cubierta = regla.reglaCubreInstancia(instancia);
      if (cubierta) {
        //  System.out.println("CUBIERTA");
        clase = instancia.getOutputNominalValuesInt(0);
        // if(clase==i){
        instancias.removeInstance(k);
        instancia.print();
        System.out.println();
        k = k - 1;
        // }
      }
    }
  }
Пример #6
0
  /**
   * Constructor with all the attributes to initialize
   *
   * @param ficheroTrain Train file
   * @param ficheroTest Test file
   * @param fSalidaTr Out-put train file
   * @param fSalidaTst Out-put test file
   * @param fsalida Out-put file
   * @param semilla seed
   */
  public Prism(
      String ficheroTrain,
      String ficheroTest,
      String fSalidaTr,
      String fSalidaTst,
      String fsalida,
      long semilla) {

    ficheroSalida = fsalida;
    ficheroSalidaTr = fSalidaTr;
    ficheroSalidaTst = fSalidaTst;
    seed = semilla;

    datosTrain = new ConjDatos(); // datosEval = new ConjDatos();
    datosTest = new ConjDatos();

    train = new Dataset();
    test = new Dataset();
    s = new Selector(0, 0, 0.);
    conjunto_reglas = new ConjReglas();

    try {
      Randomize.setSeed(seed);
      System.out.println("la semilla es " + seed);
      train.leeConjunto(ficheroTrain, true);
      test.leeConjunto(ficheroTest, false); //
      if (train.hayAtributosContinuos() /*|| train.hayAtributosDiscretos()*/) {
        System.err.println("\nPrism may not work properly with real or integer attributes.\n");
        // System.exit(-1);
        hayContinuos = true;
      }
      if (!hayContinuos) {
        train.calculaMasComunes(); // eval.calculaMasComunes();
        test.calculaMasComunes();
        datosTrain =
            creaConjunto(
                train); // Leemos los datos de entrenamiento (todos seguidos como un
                        // String)//datosEval = creaConjunto(eval);
        datosTest = creaConjunto(test);

        valores = train.getX2(); // obtengo los valores nominales
        clases = train.getC2();
        clasitas = train.getC();
        /*System.out.println(train.getndatos());
        System.out.println(train.getnentradas());
        for(int i=0;i<train.getndatos();i++){
        	for(int j=0;j<train.getnentradas();j++)
        		System.out.print(valores[i][j]);
        	System.out.print(clases[i]);System.out.println(clasitas[i]);}*/
        // COMENZAMOS EL ALGORITMO PRISM
        // FOR EACH CLASS C
        clases = train.dameClases();
        int unavez = 0, candidato;
        for (int i = 0; i < train.getnclases(); i++) {
          System.out.println("CLASE :" + clases[i] + "\n");
          // initialize E to the instance set
          /*Cuando haya que inicializar de nuevo el conjunto de instancias no es necesario insertar aquellas que se eliminaron, sino que nos va a bastar con inicializar otra vez el conjunto mediante el fichero de entrenamiento. Por eso hay un metodo para insertar una instancia*/
          train.leeConjunto(ficheroTrain, false);
          nombre_atributos = train.dameNombres();
          instancias = train.getInstanceSet();

          // While E contains instances in class C
          while (train.hayInstanciasDeClaseC(i)) {
            // Create a rule R with an empty left-hand side that predicts class C
            regla = new Complejo(train.getnclases());
            regla.setClase(i);
            regla.adjuntaNombreAtributos(nombre_atributos);
            // esto lo hacemos solo aqui pq luego vamos quitando selectores del almacen
            almacen = hazSelectores(train);
            almacen.adjuntaNombreAtributos(nombre_atributos);
            do {
              // FOR EACH ATTRIBUTE A NOT MENTIONED IN R, AND EACH VALUE V
              accuracy_ant = -1.;
              p = 0;
              int seleccionados[] = new int[almacen.size()];
              for (int jj = 0; jj < almacen.size(); jj++) seleccionados[jj] = 0;
              System.out.println();
              for (int j = 0; j < almacen.size(); j++) {
                // tenemos que quitar el selector anterior
                if (j > 0) regla.removeSelector(s);
                s = almacen.getSelector(j);
                // if(i==0)
                s.print();
                // CONSIDER ADDING THE CONDITION A=V TO THE LHS OF R
                regla.addSelector(s);
                accuracy = getAccuracy(i);
                // if(i==0)	{
                System.out.println("correctas " + num_correctas + " cubiertas " + num_cubiertas);
                System.out.println("Acurracy " + accuracy);
                // }

                if ((accuracy > accuracy_ant)
                    || ((accuracy == accuracy_ant) && (num_correctas > p))) {

                  // if((accuracy==accuracy_ant) &&(num_correctas>p)){
                  // System.out.println("atn "+accuracy_ant);
                  // System.out.println("ahora "+accuracy);
                  accuracy_ant = accuracy;
                  seleccionado = j;
                  p = num_correctas;

                  // si se encuentra un superior hay que quitar
                  // todo lo q se hay ido almacenando en esta iteracion
                  for (int jj = 0; jj < almacen.size(); jj++) seleccionados[jj] = 0;
                  // }
                } else {
                  if ((accuracy == accuracy_ant) && (num_correctas == p)) {
                    seleccionados[seleccionado] = 1;
                    seleccionados[j] = 1;
                  }
                }
              }
              // seleccionamos uno de los seleccionados en el caso de empate
              int contador = 0;
              for (int jj = 0; jj < almacen.size(); jj++) {
                if (seleccionados[jj] == 1) {
                  contador++;
                  System.out.println("OPCION " + jj);
                }
              }
              if (contador > 0) {
                candidato = Randomize.RandintClosed(1, contador);
                contador = 0;
                for (int jj = 0; jj < almacen.size(); jj++) {
                  if (seleccionados[jj] == 1) {
                    contador++;
                    if (contador == candidato) seleccionado = jj;
                  }
                }
              }
              System.out.println("ELEGIDO " + seleccionado);

              // antes hay que quitar el q metimos
              regla.removeSelector(s);
              s = almacen.getSelector(seleccionado);
              s.print();
              // ADD A=V TO R
              regla.addSelector(s);
              /*AHORA HAY QUE QUITAR DEL ALMACEN SE SELECTORES AQUELLOS QUE
              HACEN REFERENCIA AL ATRIBUTO SELECCIONADO*/
              // obtener el atributo del selector ganador
              atributo = s.getAtributo();
              // se borran todos los q tengan ese atributo
              // System.out.println("ALMACEN");almacen.print();
              almacen.removeSelectorAtributo(atributo);

              reglaPerfecta = perfectRule(regla, train);
            } while (!reglaPerfecta && (regla.size() < train.getnentradas()));

            System.out.println("\n");
            System.out.println("\nREGLA............................................");
            regla.print();
            System.out.println("\n");
            /*necesitamos evaluar la regla para obtener la salida del metodo
            para compararla con la salida esperada*/
            evaluarComplejo(regla, datosTrain);
            // INCLUIMOS ESTA REGLA YA PARA EL CONJUNTO FINAL DE REGLAS
            conjunto_reglas.addRegla(regla);
            // REMOVE THE INSTANCES COVERED BY R FROM E

            // Instance instancia;
            /*for(int k=0;k<instancias.getNumInstances();k++){
            instancia=instancias.getInstance(k);
            System.out.print(k+" ");
              	instancia.print();
            System.out.println();
              	}*/
            removeInstancesCovered(i);
            for (int k = 0; k < instancias.getNumInstances(); k++) {
              instancia = instancias.getInstance(k);
              clase = instancia.getOutputNominalValuesInt(0);
              if (clase == i) {
                System.out.print(k + " ");
                instancia.print();
                System.out.println();
              }
            }
            // instancias.print();
            System.out.println("\n");
          } // del while
        } // del for de las clases

        // EVALUAMOS LA CALIDAD DE LAS REGLAS
        int[] clasesEval;
        clasesEval = train.getC();
        muestPorClaseEval = new int[train.getnclases()];
        for (int j = 0; j < train.getnclases(); j++) {
          muestPorClaseEval[j] = 0;
          for (int i = 0; i < datosTrain.size(); i++) {
            if (
            /*valorClases[j]*/ j == clasesEval[i]) {
              muestPorClaseEval[j]++;
            }
          }
        }
        conjunto_reglas.eliminaRepetidos(1);
        evReg =
            new EvaluaCalidadReglas(
                conjunto_reglas,
                datosTrain,
                datosTest,
                muestPorClaseEval,
                muestPorClaseEval,
                clases);
        // GENERAMOS LA SALIDA
        generaSalida();
        System.out.println("la semilla es " + seed);
      } // del if
    } catch (IOException e) {
      System.err.println("There was a problem while trying to read the dataset files:");
      System.err.println("-> " + e);
      // System.exit(0);
    }
  }
Пример #7
0
  /**
   * This function builds the data matrix for reference data and normalizes inputs values
   *
   * @throws keel.Algorithms.Preprocess.Basic.CheckException Can not be normalized.
   */
  protected void normalizar() throws CheckException {

    int i, j, k;
    Instance temp;
    double caja[];
    StringTokenizer tokens;
    boolean nulls[];

    /*Check if dataset corresponding with a classification problem*/

    if (Attributes.getOutputNumAttributes() < 1) {
      throw new CheckException(
          "This dataset haven?t outputs, so it not corresponding to a classification problem.");
    } else if (Attributes.getOutputNumAttributes() > 1) {
      throw new CheckException("This dataset have more of one output.");
    }

    if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) {
      throw new CheckException(
          "This dataset have an input attribute with floating values, so it not corresponding to a classification problem.");
    }

    entradas = Attributes.getInputAttributes();
    salida = Attributes.getOutputAttribute(0);
    nEntradas = Attributes.getInputNumAttributes();
    tokens = new StringTokenizer(training.getHeader(), " \n\r");
    tokens.nextToken();
    relation = tokens.nextToken();

    datosTrain = new double[training.getNumInstances()][Attributes.getInputNumAttributes()];
    clasesTrain = new int[training.getNumInstances()];
    caja = new double[1];

    nulosTrain = new boolean[training.getNumInstances()][Attributes.getInputNumAttributes()];
    nominalTrain = new int[training.getNumInstances()][Attributes.getInputNumAttributes()];
    realTrain = new double[training.getNumInstances()][Attributes.getInputNumAttributes()];

    for (i = 0; i < training.getNumInstances(); i++) {
      temp = training.getInstance(i);
      nulls = temp.getInputMissingValues();
      datosTrain[i] = training.getInstance(i).getAllInputValues();
      for (j = 0; j < nulls.length; j++)
        if (nulls[j]) {
          datosTrain[i][j] = 0.0;
          nulosTrain[i][j] = true;
        }
      caja = training.getInstance(i).getAllOutputValues();
      clasesTrain[i] = (int) caja[0];
      for (k = 0; k < datosTrain[i].length; k++) {
        if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) {
          nominalTrain[i][k] = (int) datosTrain[i][k];
          datosTrain[i][k] /= Attributes.getInputAttribute(k).getNominalValuesList().size() - 1;
        } else {
          realTrain[i][k] = datosTrain[i][k];
          datosTrain[i][k] -= Attributes.getInputAttribute(k).getMinAttribute();
          datosTrain[i][k] /=
              Attributes.getInputAttribute(k).getMaxAttribute()
                  - Attributes.getInputAttribute(k).getMinAttribute();
          if (Double.isNaN(datosTrain[i][k])) {
            datosTrain[i][k] = realTrain[i][k];
          }
        }
      }
    }

    datosTest = new double[test.getNumInstances()][Attributes.getInputNumAttributes()];
    clasesTest = new int[test.getNumInstances()];
    caja = new double[1];

    for (i = 0; i < test.getNumInstances(); i++) {
      temp = test.getInstance(i);
      nulls = temp.getInputMissingValues();
      datosTest[i] = test.getInstance(i).getAllInputValues();
      for (j = 0; j < nulls.length; j++)
        if (nulls[j]) {
          datosTest[i][j] = 0.0;
        }
      caja = test.getInstance(i).getAllOutputValues();
      clasesTest[i] = (int) caja[0];
    }
  } // end-method
Пример #8
0
  /**
   * Process a dataset file for a classification problem.
   *
   * @param nfejemplos Name of the dataset file
   * @param train The dataset file is for training or for test
   * @throws java.io.IOException if there is any semantical, lexical or sintactical error in the
   *     input file.
   */
  public void processClassifierDataset(String nfejemplos, boolean train) throws IOException {

    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(nfejemplos, train);

      nData = IS.getNumInstances();
      nInputs = Attributes.getInputNumAttributes();
      nVariables = nInputs + Attributes.getOutputNumAttributes();

      // Check that there is only one output variable and
      // it is nominal

      if (Attributes.getOutputNumAttributes() > 1) {
        System.out.println("This algorithm can not process MIMO datasets");
        System.out.println("All outputs but the first one will be removed");
      }

      boolean noOutputs = false;
      if (Attributes.getOutputNumAttributes() < 1) {
        System.out.println("This algorithm can not process datasets without outputs");
        System.out.println("Zero-valued output generated");
        noOutputs = true;
      }

      // Initialize and fill our own tables
      X = new double[nData][nInputs];
      missing = new boolean[nData][nInputs];
      C = new int[nData];

      // Maximum and minimum of inputs
      iMaximum = new double[nInputs];
      iMinimum = new double[nInputs];

      // Maximum and minimum for output data
      oMaximum = 0;
      oMinimum = 0;

      // All values are casted into double/integer
      nClasses = 0;
      for (int i = 0; i < X.length; i++) {
        Instance inst = IS.getInstance(i);
        for (int j = 0; j < nInputs; j++) {
          X[i][j] = IS.getInputNumericValue(i, j);
          missing[i][j] = inst.getInputMissingValues(j);
          if (X[i][j] > iMaximum[j] || i == 0) {
            iMaximum[j] = X[i][j];
          }
          if (X[i][j] < iMinimum[j] || i == 0) {
            iMinimum[j] = X[i][j];
          }
        }

        if (noOutputs) {
          C[i] = 0;
        } else {
          C[i] = (int) IS.getOutputNumericValue(i, 0);
        }
        if (C[i] > nClasses) {
          nClasses = C[i];
        }
      }
      nClasses++;
      System.out.println("Number of classes=" + nClasses);

    } catch (Exception e) {
      System.out.println("DBG: Exception in readSet");
      e.printStackTrace();
    }
  }
Пример #9
0
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    // declarations
    double[] outputs;
    double[] outputs2;
    Instance neighbor;
    double dist, mean;
    int actual;
    Randomize rnd = new Randomize();
    Instance ex;
    gCenter kmeans = null;
    int iterations = 0;
    double E;
    double prevE;
    int totalMissing = 0;
    boolean allMissing = true;

    rnd.setSeed(semilla);
    // PROCESS
    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(input_train_name, true);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][nvariables]; // matrix with transformed data
      kmeans = new gCenter(K, ndatos, nvariables);

      timesSeen = new FreqList[nvariables];
      mostCommon = new String[nvariables];

      // first, we choose k 'means' randomly from all
      // instances
      totalMissing = 0;
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);
        if (inst.existsAnyMissingValue()) totalMissing++;
      }
      if (totalMissing == ndatos) allMissing = true;
      else allMissing = false;
      for (int numMeans = 0; numMeans < K; numMeans++) {
        do {
          actual = (int) (ndatos * rnd.Rand());
          ex = IS.getInstance(actual);
        } while (ex.existsAnyMissingValue() && !allMissing);

        kmeans.copyCenter(ex, numMeans);
      }

      // now, iterate adjusting clusters' centers and
      // instances to them
      prevE = 0;
      iterations = 0;
      do {
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          kmeans.setClusterOf(inst, i);
        }
        // set new centers
        kmeans.recalculateCenters(IS);
        // compute RMSE
        E = 0;
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          E += kmeans.distance(inst, kmeans.getClusterOf(i));
        }
        iterations++;
        // System.out.println(iterations+"\t"+E);
        if (Math.abs(prevE - E) == 0) iterations = maxIter;
        else prevE = E;
      } while (E > minError && iterations < maxIter);
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);

        in = 0;
        out = 0;

        for (int j = 0; j < nvariables; j++) {
          Attribute a = Attributes.getAttribute(j);

          direccion = a.getDirectionAttribute();
          tipo = a.getType();

          if (direccion == Attribute.INPUT) {
            if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
              X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
            } else {
              if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
              else {
                actual = kmeans.getClusterOf(i);
                X[i][j] = new String(kmeans.valueAt(actual, j));
              }
            }
            in++;
          } else {
            if (direccion == Attribute.OUTPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
              } else {
                if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              out++;
            }
          }
        }
      }
    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    write_results(output_train_name);
    /** ************************************************************************************ */
    // does a test file associated exist?
    if (input_train_name.compareTo(input_test_name) != 0) {
      try {

        // Load in memory a dataset that contains a classification problem
        IStest.readSet(input_test_name, false);
        int in = 0;
        int out = 0;

        ndatos = IStest.getNumInstances();
        nvariables = Attributes.getNumAttributes();
        nentradas = Attributes.getInputNumAttributes();
        nsalidas = Attributes.getOutputNumAttributes();

        for (int i = 0; i < ndatos; i++) {
          Instance inst = IStest.getInstance(i);

          in = 0;
          out = 0;

          for (int j = 0; j < nvariables; j++) {
            Attribute a = Attributes.getAttribute(j);

            direccion = a.getDirectionAttribute();
            tipo = a.getType();

            if (direccion == Attribute.INPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
                X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
              } else {
                if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              in++;
            } else {
              if (direccion == Attribute.OUTPUT) {
                if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                  X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
                } else {
                  if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                  else {
                    actual = kmeans.getClusterOf(i);
                    X[i][j] = new String(kmeans.valueAt(actual, j));
                  }
                }
                out++;
              }
            }
          }
        }
      } catch (Exception e) {
        System.out.println("Dataset exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
      write_results(output_test_name);
    }
  }
Пример #10
0
  /**
   * Computes the distance between two instances (without previous normalization)
   *
   * @param i First instance
   * @param j Second instance
   * @return The Euclidean distance between i and j
   */
  private double distance(Instance i, Instance j) {
    double dist = 0;
    int in = 0;
    int out = 0;

    for (int l = 0; l < nvariables; l++) {
      Attribute a = Attributes.getAttribute(l);

      direccion = a.getDirectionAttribute();
      tipo = a.getType();

      if (direccion == Attribute.INPUT) {
        if (tipo != Attribute.NOMINAL && !i.getInputMissingValues(in)) {
          // real value, apply euclidean distance
          dist +=
              (i.getInputRealValues(in) - j.getInputRealValues(in))
                  * (i.getInputRealValues(in) - j.getInputRealValues(in));
        } else {
          if (!i.getInputMissingValues(in)
              && i.getInputNominalValues(in) != j.getInputNominalValues(in)) dist += 1;
        }
        in++;
      } else {
        if (direccion == Attribute.OUTPUT) {
          if (tipo != Attribute.NOMINAL && !i.getOutputMissingValues(out)) {
            dist +=
                (i.getOutputRealValues(out) - j.getOutputRealValues(out))
                    * (i.getOutputRealValues(out) - j.getOutputRealValues(out));
          } else {
            if (!i.getOutputMissingValues(out)
                && i.getOutputNominalValues(out) != j.getOutputNominalValues(out)) dist += 1;
          }
          out++;
        }
      }
    }
    return dist;
  }