예제 #1
0
  /**
   * The main method of the class that includes the operations of the algorithm. It includes all the
   * operations that the algorithm has and finishes when it writes the output information into
   * files.
   */
  public void run() {

    int nPos = 0;
    int nNeg = 0;
    int i, j, l, m;
    int tmp;
    int posID;
    int positives[];
    int overs[];
    double conjS[][];
    int clasesS[];
    int tamS;

    long tiempo = System.currentTimeMillis();

    /*Count of number of positive and negative examples*/
    for (i = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] == 0) nPos++;
      else nNeg++;
    }
    if (nPos > nNeg) {
      tmp = nPos;
      nPos = nNeg;
      nNeg = tmp;
      posID = 1;
    } else {
      posID = 0;
    }

    /*Localize the positive instances*/
    positives = new int[nPos];
    for (i = 0, j = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] == posID) {
        positives[j] = i;
        j++;
      }
    }

    /*Obtain the oversampling array taking account the previous array*/
    overs = new int[nNeg - nPos];
    Randomize.setSeed(semilla);
    for (i = 0; i < overs.length; i++) {
      tmp = Randomize.Randint(0, nPos - 1);
      overs[i] = positives[tmp];
    }

    tamS = 2 * nNeg;
    /*Construction of the S set from the previous vector S*/
    conjS = new double[tamS][datosTrain[0].length];
    clasesS = new int[tamS];
    for (j = 0; j < datosTrain.length; j++) {
      for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[j][l];
      clasesS[j] = clasesTrain[j];
    }
    for (m = 0; j < tamS; j++, m++) {
      for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[overs[m]][l];
      clasesS[j] = clasesTrain[overs[m]];
    }

    System.out.println(
        "RandomOverSampling "
            + relation
            + " "
            + (double) (System.currentTimeMillis() - tiempo) / 1000.0
            + "s");

    OutputIS.escribeSalida(ficheroSalida[0], conjS, clasesS, entradas, salida, nEntradas, relation);
    OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation);
  }
예제 #2
0
파일: SGA.java 프로젝트: Navieclipse/KEEL
  /** Executes the algorithm */
  public void ejecutar() {

    int i, j, l;
    int nClases;
    double conjS[][];
    double conjR[][];
    int conjN[][];
    boolean conjM[][];
    int clasesS[];
    int nSel = 0;
    Cromosoma poblacion[];
    int ev = 0;
    double prob[];
    double NUmax = 1.5;
    double NUmin = 0.5; // used for lineal ranking
    double aux;
    double pos1, pos2;
    int sel1, sel2, comp1, comp2;
    Cromosoma newPob[];

    long tiempo = System.currentTimeMillis();

    /*Getting the number of different clases*/
    nClases = 0;
    for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i];
    nClases++;

    /*Random inicialization of the population*/
    Randomize.setSeed(semilla);
    poblacion = new Cromosoma[tamPoblacion];
    for (i = 0; i < tamPoblacion; i++) poblacion[i] = new Cromosoma(datosTrain.length);

    /*Initial evaluation of the population*/
    for (i = 0; i < tamPoblacion; i++)
      poblacion[i].evalua(
          datosTrain,
          realTrain,
          nominalTrain,
          nulosTrain,
          clasesTrain,
          alfa,
          kNeigh,
          nClases,
          distanceEu);

    if (torneo) {
      while (ev < nEval) {
        newPob = new Cromosoma[2];

        /*Binary tournament selection*/
        comp1 = Randomize.Randint(0, tamPoblacion - 1);
        do {
          comp2 = Randomize.Randint(0, tamPoblacion - 1);
        } while (comp2 == comp1);
        if (poblacion[comp1].getCalidad() > poblacion[comp2].getCalidad()) sel1 = comp1;
        else sel1 = comp2;
        comp1 = Randomize.Randint(0, tamPoblacion - 1);
        do {
          comp2 = Randomize.Randint(0, tamPoblacion - 1);
        } while (comp2 == comp1);
        if (poblacion[comp1].getCalidad() > poblacion[comp2].getCalidad()) sel2 = comp1;
        else sel2 = comp2;

        if (Randomize.Rand() < pCruce) { // there is cross
          crucePMX(poblacion, newPob, sel1, sel2);
        } else { // there is not cross
          newPob[0] = new Cromosoma(datosTrain.length, poblacion[sel1]);
          newPob[1] = new Cromosoma(datosTrain.length, poblacion[sel2]);
        }

        /*Mutation of the cromosomes*/
        for (i = 0; i < 2; i++) newPob[i].mutacion(pMutacion1to0, pMutacion0to1);

        /*Evaluation of the population*/
        for (i = 0; i < 2; i++)
          if (!(newPob[i].estaEvaluado())) {
            newPob[i].evalua(
                datosTrain,
                realTrain,
                nominalTrain,
                nulosTrain,
                clasesTrain,
                alfa,
                kNeigh,
                nClases,
                distanceEu);
            ev++;
          }

        /*Replace the two worst*/
        Arrays.sort(poblacion);
        poblacion[tamPoblacion - 2] = new Cromosoma(datosTrain.length, newPob[0]);
        poblacion[tamPoblacion - 1] = new Cromosoma(datosTrain.length, newPob[1]);
      }
    } else {
      /*Get the probabilities of lineal ranking in case of not use binary tournament*/
      prob = new double[tamPoblacion];
      for (i = 0; i < tamPoblacion; i++) {
        aux = (double) (NUmax - NUmin) * ((double) i / (tamPoblacion - 1));
        prob[i] = (double) (1.0 / (tamPoblacion)) * (NUmax - aux);
      }
      for (i = 1; i < tamPoblacion; i++) prob[i] = prob[i] + prob[i - 1];

      while (ev < nEval) {
        /*Sort the population by quality criterion*/
        Arrays.sort(poblacion);

        newPob = new Cromosoma[2];
        pos1 = Randomize.Rand();
        pos2 = Randomize.Rand();
        for (j = 0; j < tamPoblacion && prob[j] < pos1; j++) ;
        sel1 = j;
        for (j = 0; j < tamPoblacion && prob[j] < pos2; j++) ;
        sel2 = j;

        if (Randomize.Rand() < pCruce) { // there is cross
          crucePMX(poblacion, newPob, sel1, sel2);
        } else { // there is not cross
          newPob[0] = new Cromosoma(datosTrain.length, poblacion[sel1]);
          newPob[1] = new Cromosoma(datosTrain.length, poblacion[sel2]);
        }

        /*Mutation of the cromosomes*/
        for (i = 0; i < 2; i++) newPob[i].mutacion(pMutacion1to0, pMutacion0to1);

        /*Evaluation of the population*/
        for (i = 0; i < 2; i++)
          if (!(newPob[i].estaEvaluado())) {
            newPob[i].evalua(
                datosTrain,
                realTrain,
                nominalTrain,
                nulosTrain,
                clasesTrain,
                alfa,
                kNeigh,
                nClases,
                distanceEu);
            ev++;
          }

        /*Replace the two worst*/
        poblacion[tamPoblacion - 2] = new Cromosoma(datosTrain.length, newPob[0]);
        poblacion[tamPoblacion - 1] = new Cromosoma(datosTrain.length, newPob[1]);
      }
    }

    nSel = poblacion[0].genesActivos();

    /*Building of S set from the best cromosome obtained*/
    conjS = new double[nSel][datosTrain[0].length];
    conjR = new double[nSel][datosTrain[0].length];
    conjN = new int[nSel][datosTrain[0].length];
    conjM = new boolean[nSel][datosTrain[0].length];
    clasesS = new int[nSel];
    for (i = 0, l = 0; i < datosTrain.length; i++) {
      if (poblacion[0].getGen(i)) { // the instance must be copied to the solution
        for (j = 0; j < datosTrain[0].length; j++) {
          conjS[l][j] = datosTrain[i][j];
          conjR[l][j] = realTrain[i][j];
          conjN[l][j] = nominalTrain[i][j];
          conjM[l][j] = nulosTrain[i][j];
        }
        clasesS[l] = clasesTrain[i];
        l++;
      }
    }

    System.out.println(
        "SGA " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s");

    OutputIS.escribeSalida(
        ficheroSalida[0], conjR, conjN, conjM, clasesS, entradas, salida, nEntradas, relation);
    OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation);
  } // end-method
예제 #3
0
  /**
   * The main method of the class that includes the operations of the algorithm. It includes all the
   * operations that the algorithm has and finishes when it writes the output information into
   * files.
   */
  public void run() {

    int S[];
    int i, j, l, m;
    int nPos = 0, nNeg = 0;
    int posID;
    int nClases;
    int pos;
    int baraje[];
    int tmp;
    double conjS[][];
    int clasesS[];
    int tamS = 0;
    int claseObt;
    int cont;
    int busq;
    boolean marcas[];
    int nSel;
    double conjS2[][];
    int clasesS2[];
    double minDist, dist;

    long tiempo = System.currentTimeMillis();

    /*CNN PART*/

    /*Count of number of positive and negative examples*/
    for (i = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] == 0) nPos++;
      else nNeg++;
    }
    if (nPos > nNeg) {
      tmp = nPos;
      nPos = nNeg;
      nNeg = tmp;
      posID = 1;
    } else {
      posID = 0;
    }

    /*Inicialization of the candidates set*/
    S = new int[datosTrain.length];
    for (i = 0; i < S.length; i++) S[i] = Integer.MAX_VALUE;

    /*Inserting an element of mayority class*/
    Randomize.setSeed(semilla);
    pos = Randomize.Randint(0, clasesTrain.length - 1);
    while (clasesTrain[pos] == posID) pos = (pos + 1) % clasesTrain.length;
    S[tamS] = pos;
    tamS++;

    /*Insert all subset of minority class*/
    for (i = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] == posID) {
        S[tamS] = i;
        tamS++;
      }
    }

    /*Algorithm body. We resort randomly the instances of T and compare with the rest of S.
    If an instance doesn´t classified correctly, it is inserted in S*/
    baraje = new int[datosTrain.length];
    for (i = 0; i < datosTrain.length; i++) baraje[i] = i;
    for (i = 0; i < datosTrain.length; i++) {
      pos = Randomize.Randint(i, clasesTrain.length - 1);
      tmp = baraje[i];
      baraje[i] = baraje[pos];
      baraje[pos] = tmp;
    }

    for (i = 0; i < datosTrain.length; i++) {
      if (clasesTrain[i] != posID) { // only for mayority class instances
        /*Construction of the S set from the previous vector S*/
        conjS = new double[tamS][datosTrain[0].length];
        clasesS = new int[tamS];
        for (j = 0; j < tamS; j++) {
          for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[S[j]][l];
          clasesS[j] = clasesTrain[S[j]];
        }

        /*Do KNN to the instance*/
        claseObt = KNN.evaluacionKNN(k, conjS, clasesS, datosTrain[baraje[i]], 2);
        if (claseObt != clasesTrain[baraje[i]]) { // fail in the class, it is included in S
          Arrays.sort(S);
          busq = Arrays.binarySearch(S, baraje[i]);
          if (busq < 0) {
            S[tamS] = baraje[i];
            tamS++;
          }
        }
      }
    }

    /*Construction of the S set from the previous vector S*/
    conjS = new double[tamS][datosTrain[0].length];
    clasesS = new int[tamS];
    for (j = 0; j < tamS; j++) {
      for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[S[j]][l];
      clasesS[j] = clasesTrain[S[j]];
    }

    /*TOMEK LINKS PART*/

    /*Inicialization of the instance flagged vector of the S set*/
    marcas = new boolean[conjS.length];
    for (i = 0; i < conjS.length; i++) {
      marcas[i] = true;
    }
    nSel = conjS.length;

    for (i = 0; i < conjS.length; i++) {
      minDist = Double.POSITIVE_INFINITY;
      pos = 0;
      for (j = 0; j < conjS.length; j++) {
        if (i != j) {
          dist = KNN.distancia(conjS[i], conjS[j]);
          if (dist < minDist) {
            minDist = dist;
            pos = j;
          }
        }
      }
      if (clasesS[i] != clasesS[pos]) {
        if (clasesS[i] != posID) {
          if (marcas[i] == true) {
            marcas[i] = false;
            nSel--;
          }
        } else {
          if (marcas[pos] == true) {
            marcas[pos] = false;
            nSel--;
          }
        }
      }
    }

    /*Construction of the S set from the flags*/
    conjS2 = new double[nSel][conjS[0].length];
    clasesS2 = new int[nSel];
    for (m = 0, l = 0; m < conjS.length; m++) {
      if (marcas[m]) { // the instance will evaluate
        for (j = 0; j < conjS[0].length; j++) {
          conjS2[l][j] = conjS[m][j];
        }
        clasesS2[l] = clasesS[m];
        l++;
      }
    }

    System.out.println(
        "CNN_TomekLinks "
            + relation
            + " "
            + (double) (System.currentTimeMillis() - tiempo) / 1000.0
            + "s");

    OutputIS.escribeSalida(
        ficheroSalida[0], conjS2, clasesS2, entradas, salida, nEntradas, relation);
    OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation);
  }
예제 #4
0
파일: Depur.java 프로젝트: Navieclipse/KEEL
  public void ejecutar() {

    double conjS[][];
    double conjR[][];
    int conjN[][];
    boolean conjM[][];
    int clasesS[];

    int S[]; /* Binary Vector, to decide if the instance will be included*/
    int i, j, l, cont;
    int nClases;
    int tamS;
    int transformations;

    int claseObt[];
    int clasePredominante;

    long tiempo = System.currentTimeMillis();

    transformations = 0;
    /*Getting the number of different classes*/

    nClases = 0;
    for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i];
    nClases++;

    if (nClases < 2) {
      System.err.println("Input dataset is empty");
      nClases = 0;
    }

    /*Algorithm body.
      First, S=TS.
      Then, for each instance of TS, the first step is to repeat the aplication of the k-nn, and then
      we decide if we need to change the label of the instance or we don't need it.

    */

    /*Inicialization of the candidates set, S=X, where X is the original Training Set*/
    S = new int[datosTrain.length];
    for (i = 0; i < S.length; i++) S[i] = 1; /* All included*/

    tamS = datosTrain.length;

    System.out.print("K= " + k + "\n");
    System.out.print("K'= " + k2 + "\n");

    for (i = 0; i < datosTrain.length; i++) {

      /* I need find the k-nn of   i in X - {i}, so I make conjS without i*/
      conjS = new double[datosTrain.length - 1][datosTrain[0].length];
      conjR = new double[datosTrain.length - 1][datosTrain[0].length];
      conjN = new int[datosTrain.length - 1][datosTrain[0].length];
      conjM = new boolean[datosTrain.length - 1][datosTrain[0].length];
      clasesS = new int[datosTrain.length - 1];

      cont = 0;
      for (j = 0; j < datosTrain.length; j++) {

        if (i != j) {
          for (l = 0; l < datosTrain[0].length; l++) {

            conjS[cont][l] = datosTrain[j][l];
            conjR[cont][l] = realTrain[j][l];
            conjN[cont][l] = nominalTrain[j][l];
            conjM[cont][l] = nulosTrain[j][l];
          }

          clasesS[cont] = clasesTrain[j];
          cont++;
        }
      }

      /*Do KNN to the instance*/
      claseObt =
          KNN.evaluacionKNN3(
              k,
              conjS,
              conjR,
              conjN,
              conjM,
              clasesS,
              datosTrain[i],
              realTrain[i],
              nominalTrain[i],
              nulosTrain[i],
              nClases,
              distanceEu);

      /*
      System.out.print("Las clases de los k vecinos m�s cercanos son\n");
      for(int m=0;m<k;m++){
      	System.out.print(claseObt[m]+ "  ");
      }
      System.out.print("\n-----------------------------------------------\n");

       */

      /*Now, we must check that we have at least k2 neighboors with the same class. */
      int max = 0;
      clasePredominante = 0;

      for (int m = 0; m < claseObt.length; m++) {
        int claseDeInstancia = claseObt[m]; // Select one class.
        int iguales = 0;

        for (j = 0; j < claseObt.length; j++) { // Check numbers of instances with this class
          if (j != m) { // I can't count the same.
            if (claseObt[j] == claseDeInstancia) {
              iguales++;
            }
          }
        }

        // I must check if there is another class with more instances.
        if (iguales > max) {
          max = iguales;
          clasePredominante = claseObt[m];
        }
      }

      // System.out.print("max " + max +"\n");
      // System.out.print("Clase Predominante: "+clasePredominante+"\n");

      /* Max+1 = number of neighbours with the same class*/
      if ((max) >= k2) {
        /* if there are at least k2 neighbour, we change the class in S, */

        if (clasePredominante != clasesTrain[i]) transformations++;

        clasesTrain[i] = clasePredominante;
        S[i] = 1;

      } else {
        /* Discard.*/
        tamS--;
        S[i] = 0;
      }
    }

    System.out.print("S size resultante= " + tamS + "\n");
    System.out.print("Transformations = " + transformations + "\n");

    /*Construction of the S set from the previous vector S*/
    conjS = new double[tamS][datosTrain[0].length];
    conjR = new double[tamS][datosTrain[0].length];
    conjN = new int[tamS][datosTrain[0].length];
    conjM = new boolean[tamS][datosTrain[0].length];
    clasesS = new int[tamS];

    cont = 0; /* To establish the sets' sizes */
    for (j = 0; j < datosTrain.length; j++) {

      if (S[j] == 1) {
        /* Checking the instance is included*/
        for (l = 0; l < datosTrain[0].length; l++) {

          conjS[cont][l] = datosTrain[j][l];
          conjR[cont][l] = realTrain[j][l];
          conjN[cont][l] = nominalTrain[j][l];
          conjM[cont][l] = nulosTrain[j][l];
        }

        clasesS[cont] = clasesTrain[j];
        cont++;
      }
    }

    System.out.println(
        "Time elapse: " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s");

    OutputIS.escribeSalida(
        ficheroSalida[0], conjR, conjN, conjM, clasesS, entradas, salida, nEntradas, relation);
    OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation);
  }