Exemple #1
0
  private void normalizarTest() {

    int i, j, cont = 0, k;
    Instance temp;
    boolean hecho;
    double caja[];
    StringTokenizer tokens;
    boolean nulls[];

    /* Check if dataset corresponding with a classification problem */

    if (Attributes.getOutputNumAttributes() < 1) {
      System.err.println(
          "This dataset haven´t outputs, so it not corresponding to a classification problem.");
      System.exit(-1);
    } else if (Attributes.getOutputNumAttributes() > 1) {
      System.err.println("This dataset have more of one output.");
      System.exit(-1);
    }

    if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) {
      System.err.println(
          "This dataset have an input attribute with floating values, so it not corresponding to a classification problem.");
      System.exit(-1);
    }

    datosTest = new double[test.getNumInstances()][Attributes.getInputNumAttributes()];
    clasesTest = new int[test.getNumInstances()];
    caja = new double[1];

    for (i = 0; i < test.getNumInstances(); i++) {
      temp = test.getInstance(i);
      nulls = temp.getInputMissingValues();
      datosTest[i] = test.getInstance(i).getAllInputValues();
      for (j = 0; j < nulls.length; j++) if (nulls[j]) datosTest[i][j] = 0.0;
      caja = test.getInstance(i).getAllOutputValues();
      clasesTest[i] = (int) caja[0];
      for (k = 0; k < datosTest[i].length; k++) {
        if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) {
          datosTest[i][k] /= Attributes.getInputAttribute(k).getNominalValuesList().size() - 1;
        } else {
          datosTest[i][k] -= Attributes.getInputAttribute(k).getMinAttribute();
          datosTest[i][k] /=
              Attributes.getInputAttribute(k).getMaxAttribute()
                  - Attributes.getInputAttribute(k).getMinAttribute();
        }
      }
    }
  }
Exemple #2
0
  private void normalizarReferencia() throws CheckException {

    int i, j, cont = 0, k;
    Instance temp;
    boolean hecho;
    double caja[];
    StringTokenizer tokens;
    boolean nulls[];

    /*Check if dataset corresponding with a classification problem*/

    if (Attributes.getOutputNumAttributes() < 1) {
      throw new CheckException(
          "This dataset haven´t outputs, so it not corresponding to a classification problem.");
    } else if (Attributes.getOutputNumAttributes() > 1) {
      throw new CheckException("This dataset have more of one output.");
    }

    if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) {
      throw new CheckException(
          "This dataset have an input attribute with floating values, so it not corresponding to a classification problem.");
    }

    datosReferencia = new double[referencia.getNumInstances()][Attributes.getInputNumAttributes()];
    clasesReferencia = new int[referencia.getNumInstances()];
    caja = new double[1];

    /*Get the number of instances that have a null value*/
    for (i = 0; i < referencia.getNumInstances(); i++) {
      temp = referencia.getInstance(i);
      nulls = temp.getInputMissingValues();
      datosReferencia[i] = referencia.getInstance(i).getAllInputValues();
      for (j = 0; j < nulls.length; j++) if (nulls[j]) datosReferencia[i][j] = 0.0;
      caja = referencia.getInstance(i).getAllOutputValues();
      clasesReferencia[i] = (int) caja[0];
      for (k = 0; k < datosReferencia[i].length; k++) {
        if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) {
          datosReferencia[i][k] /=
              Attributes.getInputAttribute(k).getNominalValuesList().size() - 1;
        } else {
          datosReferencia[i][k] -= Attributes.getInputAttribute(k).getMinAttribute();
          datosReferencia[i][k] /=
              Attributes.getInputAttribute(k).getMaxAttribute()
                  - Attributes.getInputAttribute(k).getMinAttribute();
        }
      }
    }
  }
Exemple #3
0
  public void ejecutar() {

    int i, j, l, m;
    double alfai;
    int nClases;

    int claseObt;

    boolean marcas[];
    boolean notFound;

    int init;
    int clasSel[];

    int baraje[];

    int pos, tmp;
    String instanciasIN[];
    String instanciasOUT[];

    long tiempo = System.currentTimeMillis();

    /* Getting the number of differents classes */

    nClases = 0;

    for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i];

    nClases++;

    /* Shuffle the train set */

    baraje = new int[datosTrain.length];

    Randomize.setSeed(semilla);

    for (i = 0; i < datosTrain.length; i++) baraje[i] = i;

    for (i = 0; i < datosTrain.length; i++) {

      pos = Randomize.Randint(i, datosTrain.length - 1);

      tmp = baraje[i];

      baraje[i] = baraje[pos];

      baraje[pos] = tmp;
    }

    /*
     * Inicialization of the flagged instaces vector for a posterior
     * elimination
     */

    marcas = new boolean[datosTrain.length];

    for (i = 0; i < datosTrain.length; i++) marcas[i] = false;

    if (datosTrain.length > 0) {

      // marcas[baraje[0]] = true; //the first instance is included always

      nSel = n_p;
      if (nSel < nClases) nSel = nClases;

    } else {

      System.err.println("Input dataset is empty");

      nSel = 0;
    }
    clasSel = new int[nClases];
    System.out.print("Selecting initial neurons... ");
    // at least, there must be 1 neuron of each class at the beginning
    init = nClases;
    for (i = 0; i < nClases && i < datosTrain.length; i++) {
      pos = Randomize.Randint(0, datosTrain.length - 1);
      tmp = 0;
      while ((clasesTrain[pos] != i || marcas[pos]) && tmp < datosTrain.length) {
        pos = (pos + 1) % datosTrain.length;
        tmp++;
      }
      if (tmp < datosTrain.length) marcas[pos] = true;
      else init--;
      // clasSel[i] = i;
    }
    for (i = init; i < Math.min(nSel, datosTrain.length); i++) {
      tmp = 0;
      pos = Randomize.Randint(0, datosTrain.length - 1);
      while (marcas[pos]) {
        pos = (pos + 1) % datosTrain.length;
        tmp++;
      }
      // if(i<nClases){
      // notFound = true;
      // do{
      // for(j=i-1;j>=0 && notFound;j--){
      // if(clasSel[j] == clasesTrain[pos])
      // notFound = false;
      // }
      // if(!notFound)
      // pos = Randomize.Randint (0, datosTrain.length-1);
      // }while(!notFound);
      // }
      // clasSel[i] = clasesTrain[pos];
      marcas[pos] = true;
      init++;
    }
    nSel = init;
    System.out.println("Initial neurons selected: " + nSel);

    /* Building of the S set from the flags */

    conjS = new double[nSel][datosTrain[0].length];

    clasesS = new int[nSel];

    for (m = 0, l = 0; m < datosTrain.length; m++) {

      if (marcas[m]) { // the instance must be copied to the solution

        for (j = 0; j < datosTrain[0].length; j++) {

          conjS[l][j] = datosTrain[m][j];
        }

        clasesS[l] = clasesTrain[m];

        l++;
      }
    }

    alfai = alpha;
    boolean change = true;
    /* Body of the LVQ algorithm. */

    // Train the network
    for (int it = 0; it < T && change; it++) {
      change = false;
      alpha = alfai;
      for (i = 1; i < datosTrain.length; i++) {
        // search for the nearest neuron to training instance
        pos = NN(nSel, conjS, datosTrain[baraje[i]]);
        // nearest neuron labels correctly the class of training
        // instance?

        if (clasesS[pos] != clasesTrain[baraje[i]]) { // NO - repel
          // the neuron
          for (j = 0; j < conjS[pos].length; j++) {
            conjS[pos][j] = conjS[pos][j] - alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]);
          }
          change = true;
        } else { // YES - migrate the neuron towards the input vector
          for (j = 0; j < conjS[pos].length; j++) {
            conjS[pos][j] = conjS[pos][j] + alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]);
          }
        }
        alpha = nu * alpha;
      }
      // Shuffle again the training partition
      baraje = new int[datosTrain.length];

      for (i = 0; i < datosTrain.length; i++) baraje[i] = i;

      for (i = 0; i < datosTrain.length; i++) {

        pos = Randomize.Randint(i, datosTrain.length - 1);

        tmp = baraje[i];

        baraje[i] = baraje[pos];

        baraje[pos] = tmp;
      }
    }
    System.out.println(
        "LVQ " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s");
    // Classify the train data set
    instanciasIN = new String[datosReferencia.length];
    instanciasOUT = new String[datosReferencia.length];
    for (i = 0; i < datosReferencia.length; i++) {
      /* Classify the instance selected in this iteration */
      Attribute a = Attributes.getOutputAttribute(0);

      int tipo = a.getType();
      claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosReferencia[i], nClases);
      if (tipo != Attribute.NOMINAL) {
        instanciasIN[i] = new String(String.valueOf(clasesReferencia[i]));
        instanciasOUT[i] = new String(String.valueOf(claseObt));
      } else {
        instanciasIN[i] = new String(a.getNominalValue(clasesReferencia[i]));
        instanciasOUT[i] = new String(a.getNominalValue(claseObt));
      }
    }

    escribeSalida(
        ficheroSalida[0], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation);

    // Classify the test data set
    normalizarTest();
    instanciasIN = new String[datosTest.length];
    instanciasOUT = new String[datosTest.length];
    for (i = 0; i < datosTest.length; i++) {
      /* Classify the instance selected in this iteration */
      Attribute a = Attributes.getOutputAttribute(0);

      int tipo = a.getType();

      claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosTest[i], nClases);
      if (tipo != Attribute.NOMINAL) {
        instanciasIN[i] = new String(String.valueOf(clasesTest[i]));
        instanciasOUT[i] = new String(String.valueOf(claseObt));
      } else {
        instanciasIN[i] = new String(a.getNominalValue(clasesTest[i]));
        instanciasOUT[i] = new String(a.getNominalValue(claseObt));
      }
    }

    escribeSalida(
        ficheroSalida[1], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation);

    // Print the network to a file
    printNetworkToFile(ficheroSalida[2], referencia.getHeader());
  }
  /**
   * This function builds the data matrix for reference data and normalizes inputs values
   *
   * @throws keel.Algorithms.Preprocess.Basic.CheckException Can not be normalized.
   */
  protected void normalizar() throws CheckException {

    int i, j, k;
    Instance temp;
    double caja[];
    StringTokenizer tokens;
    boolean nulls[];

    /*Check if dataset corresponding with a classification problem*/

    if (Attributes.getOutputNumAttributes() < 1) {
      throw new CheckException(
          "This dataset haven?t outputs, so it not corresponding to a classification problem.");
    } else if (Attributes.getOutputNumAttributes() > 1) {
      throw new CheckException("This dataset have more of one output.");
    }

    if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) {
      throw new CheckException(
          "This dataset have an input attribute with floating values, so it not corresponding to a classification problem.");
    }

    entradas = Attributes.getInputAttributes();
    salida = Attributes.getOutputAttribute(0);
    nEntradas = Attributes.getInputNumAttributes();
    tokens = new StringTokenizer(training.getHeader(), " \n\r");
    tokens.nextToken();
    relation = tokens.nextToken();

    datosTrain = new double[training.getNumInstances()][Attributes.getInputNumAttributes()];
    clasesTrain = new int[training.getNumInstances()];
    caja = new double[1];

    nulosTrain = new boolean[training.getNumInstances()][Attributes.getInputNumAttributes()];
    nominalTrain = new int[training.getNumInstances()][Attributes.getInputNumAttributes()];
    realTrain = new double[training.getNumInstances()][Attributes.getInputNumAttributes()];

    for (i = 0; i < training.getNumInstances(); i++) {
      temp = training.getInstance(i);
      nulls = temp.getInputMissingValues();
      datosTrain[i] = training.getInstance(i).getAllInputValues();
      for (j = 0; j < nulls.length; j++)
        if (nulls[j]) {
          datosTrain[i][j] = 0.0;
          nulosTrain[i][j] = true;
        }
      caja = training.getInstance(i).getAllOutputValues();
      clasesTrain[i] = (int) caja[0];
      for (k = 0; k < datosTrain[i].length; k++) {
        if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) {
          nominalTrain[i][k] = (int) datosTrain[i][k];
          datosTrain[i][k] /= Attributes.getInputAttribute(k).getNominalValuesList().size() - 1;
        } else {
          realTrain[i][k] = datosTrain[i][k];
          datosTrain[i][k] -= Attributes.getInputAttribute(k).getMinAttribute();
          datosTrain[i][k] /=
              Attributes.getInputAttribute(k).getMaxAttribute()
                  - Attributes.getInputAttribute(k).getMinAttribute();
          if (Double.isNaN(datosTrain[i][k])) {
            datosTrain[i][k] = realTrain[i][k];
          }
        }
      }
    }

    datosTest = new double[test.getNumInstances()][Attributes.getInputNumAttributes()];
    clasesTest = new int[test.getNumInstances()];
    caja = new double[1];

    for (i = 0; i < test.getNumInstances(); i++) {
      temp = test.getInstance(i);
      nulls = temp.getInputMissingValues();
      datosTest[i] = test.getInstance(i).getAllInputValues();
      for (j = 0; j < nulls.length; j++)
        if (nulls[j]) {
          datosTest[i][j] = 0.0;
        }
      caja = test.getInstance(i).getAllOutputValues();
      clasesTest[i] = (int) caja[0];
    }
  } // end-method