예제 #1
0
파일: ABB.java 프로젝트: Navieclipse/KEEL
  /**
   * Main method for ABB, that explores the search space by pruning nodes and checking their
   * inconsistency ratio.
   */
  private void runABB() {
    boolean[] root = startSolution();
    System.arraycopy(root, 0, features, 0, root.length);

    abb(root);

    /* checks if a subset satisfies the condition (more than 0 selected features) */
    if (features == null) {
      System.err.println("ERROR: It couldn't be possible to find any solution.");
      System.exit(0);
    }
  }
예제 #2
0
파일: LVQ.java 프로젝트: triguero/Keel3.0
  private void normalizarTest() {

    int i, j, cont = 0, k;
    Instance temp;
    boolean hecho;
    double caja[];
    StringTokenizer tokens;
    boolean nulls[];

    /* Check if dataset corresponding with a classification problem */

    if (Attributes.getOutputNumAttributes() < 1) {
      System.err.println(
          "This dataset haven´t outputs, so it not corresponding to a classification problem.");
      System.exit(-1);
    } else if (Attributes.getOutputNumAttributes() > 1) {
      System.err.println("This dataset have more of one output.");
      System.exit(-1);
    }

    if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) {
      System.err.println(
          "This dataset have an input attribute with floating values, so it not corresponding to a classification problem.");
      System.exit(-1);
    }

    datosTest = new double[test.getNumInstances()][Attributes.getInputNumAttributes()];
    clasesTest = new int[test.getNumInstances()];
    caja = new double[1];

    for (i = 0; i < test.getNumInstances(); i++) {
      temp = test.getInstance(i);
      nulls = temp.getInputMissingValues();
      datosTest[i] = test.getInstance(i).getAllInputValues();
      for (j = 0; j < nulls.length; j++) if (nulls[j]) datosTest[i][j] = 0.0;
      caja = test.getInstance(i).getAllOutputValues();
      clasesTest[i] = (int) caja[0];
      for (k = 0; k < datosTest[i].length; k++) {
        if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) {
          datosTest[i][k] /= Attributes.getInputAttribute(k).getNominalValuesList().size() - 1;
        } else {
          datosTest[i][k] -= Attributes.getInputAttribute(k).getMinAttribute();
          datosTest[i][k] /=
              Attributes.getInputAttribute(k).getMaxAttribute()
                  - Attributes.getInputAttribute(k).getMinAttribute();
        }
      }
    }
  }
예제 #3
0
  private void generateModel() {
    String salida = new String("");
    double max_auc = 0;
    ArrayList<String> solutions = this.getAllSolutions();
    models = new ArrayList<Farchd>();

    int nEjemplos = train.getnData();
    if (this.instances == this.MAJ) {
      nEjemplos = train.getMajority();
    }
    boolean[] variables = new boolean[train.getnInputs()];
    boolean[] ejemplos = new boolean[nEjemplos];
    this.weightsAUC = new double[solutions.size() / 2]; // Hay 2 soluciones FS e IS

    for (int i = 0, j = 0; i < solutions.size(); i += 2, j++) {
      int vars, ejs;
      vars = ejs = 0;
      variables = decode(solutions.get(i));
      ejemplos = decode(solutions.get(i + 1));
      for (int l = 0; l < variables.length; l++) {
        // variables[j] = solution[j];
        if (variables[l]) vars++;
      }
      for (int l = 0; l < ejemplos.length; l++) {
        if (ejemplos[l]) ejs++;
      }
      try {
        Farchd model = new Farchd(train, val, test, variables, ejemplos);

        /** ******** */
        // double fit = model.getAUCTr();
        double auc_tr = model.execute(true);
        double auc_tst = model.getAUCTst();
        if (auc_tr > max_auc) {
          max_auc = auc_tr;
          indexBest = j;
        }
        this.weightsAUC[j] = auc_tr;

        salida +=
            "Solution[" + j + "]:\t" + vars + "\t" + ejs + "\t" + auc_tr + "\t" + auc_tst + "\n";

        /** ******** */
        models.add(model);
      } catch (Exception e) {
        System.err.println("Liada maxima al generar modelo ");
        e.printStackTrace(System.err);
        System.exit(-1);
      }
    }
    System.out.print(salida);
    Files.writeFile(header + "_AUC.txt", salida);
  }
예제 #4
0
파일: LVQ.java 프로젝트: triguero/Keel3.0
  public LVQ(String ficheroScript) {
    super(ficheroScript);
    try {
      referencia = new InstanceSet();
      referencia.readSet(ficheroReferencia, false);

      /*Normalize the data*/
      normalizarReferencia();
    } catch (Exception e) {
      System.err.println(e);
      System.exit(1);
    }
  }
예제 #5
0
파일: ABB.java 프로젝트: Navieclipse/KEEL
    /**
     * Constructor of the Class Parametros
     *
     * @param nombreFileParametros is the pathname of input parameter file
     */
    Parametros(String nombreFileParametros) {

      try {
        int i;
        String fichero, linea, tok;
        StringTokenizer lineasFile, tokens;

        /* read the parameter file using Files class */
        fichero = Files.readFile(nombreFileParametros);
        fichero += "\n";

        /* remove all \r characters. it is neccesary for a correst use in Windows and UNIX  */
        fichero = fichero.replace('\r', ' ');

        /* extracts the differents tokens of the file */
        lineasFile = new StringTokenizer(fichero, "\n");

        i = 0;
        while (lineasFile.hasMoreTokens()) {

          linea = lineasFile.nextToken();
          i++;
          tokens = new StringTokenizer(linea, " ,\t");
          if (tokens.hasMoreTokens()) {

            tok = tokens.nextToken();
            if (tok.equalsIgnoreCase("algorithm")) nameAlgorithm = getParamString(tokens);
            else if (tok.equalsIgnoreCase("inputdata")) getInputFiles(tokens);
            else if (tok.equalsIgnoreCase("outputdata")) getOutputFiles(tokens);
            else if (tok.equalsIgnoreCase("seed")) seed = getParamLong(tokens);
            else throw new java.io.IOException("Syntax error on line " + i + ": [" + tok + "]\n");
          }
        }

      } catch (java.io.FileNotFoundException e) {
        System.err.println(e + "Parameter file");
      } catch (java.io.IOException e) {
        System.err.println(e + "Aborting program");
        System.exit(-1);
      }

      /** show the read parameter in the standard output */
      String contents = "-- Parameters echo --- \n";
      contents += "Algorithm name: " + nameAlgorithm + "\n";
      contents += "Input Train File: " + trainFileNameInput + "\n";
      contents += "Input Test File: " + testFileNameInput + "\n";
      contents += "Output Train File: " + trainFileNameOutput + "\n";
      contents += "Output Test File: " + testFileNameOutput + "\n";
      System.out.println(contents);
    }
예제 #6
0
파일: ABB.java 프로젝트: Navieclipse/KEEL
  /**
   * Removes one feature at a time, starting from the furthest on the right
   *
   * @param featuresVector solution to generate its neighbor
   * @param which number of the feature to remove starting from the right
   * @return next neighbor of the given solution with one less feature
   */
  private static boolean[] removeOne(boolean featuresVector[], int which) {
    boolean[] fv = new boolean[featuresVector.length];
    System.arraycopy(featuresVector, 0, fv, 0, fv.length);
    boolean stop = false;
    int count = 0;
    for (int i = fv.length - 1; i >= 0 && !stop; i--) {
      if (fv[i]) {
        count++;
        if (count == which) {
          fv[i] = false;
          stop = true;
        }
      }
    }

    return fv;
  }
예제 #7
0
  /** It launches the algorithm */
  public void execute() {
    if (this.somethingWrong) { // We do not execute the program
      System.err.println("An error was found, either the data-set has missing values.");
      System.err.println(
          "Please remove the examples with missing data or apply a MV preprocessing.");
      System.err.println("Aborting the program");
      // We should not use the statement: System.exit(-1);
    } else {
      // We do here the algorithm's operations

      int nClasses = train.getnClasses();
      aprioriClassDistribution = new double[nClasses];
      for (int i = 0; i < nClasses; i++) {
        aprioriClassDistribution[i] = 1.0 * val.numberInstances(i) / val.size();
      }

      if (model) { // el modelo no esta generado en fichero previamente
        NSGA2 search =
            new NSGA2(
                train, seed, populationSize, maxTrials, crossover, mutation, instances, fitness);
        try {
          search.execute();
        } catch (Exception e) {
          e.printStackTrace(System.err);
        }
      }

      // Finally we should fill the training and test output files

      this.generateModel();

      double avgTr = this.doOutput(val, this.outputTr, false);
      double aucTr = getAUC(val);
      double avgTst = this.doOutput(test, this.outputTst, true);
      double aucTst = getAUC(test);
      System.out.print("AUC Train: " + aucTr);
      System.out.println("; AvgAcc Train: " + avgTr);
      System.out.print("AUC Test: " + aucTst);
      System.out.println("; AvgAcc Test: " + avgTst);

      totalTime = System.currentTimeMillis() - startTime;
      System.out.println("Algorithm Finished: " + totalTime);
    }
  }
예제 #8
0
파일: ABB.java 프로젝트: Navieclipse/KEEL
  /** Recursive method for ABB */
  private void abb(boolean feat[]) {
    boolean[] child;
    double measure;

    threshold = data.measureIEP(feat);

    for (int i = 0; i < cardinalidadCto(feat); i++) {
      child = removeOne(feat, i);
      measure = data.measureIEP(child);

      if (legitimate(child) && measure < threshold) {
        if (measure < data.measureIEP(features)) {
          // we keep the best found in 'features'
          System.arraycopy(child, 0, features, 0, child.length);
        }
        abb(child);
      } else { // we prune this node
        pruned.add(child);
      }
    }
  }
예제 #9
0
  // Write data matrix X to disk, in KEEL format
  private void write_results(String output) {
    // File OutputFile = new File(output_train_name.substring(1, output_train_name.length()-1));
    try {
      FileWriter file_write = new FileWriter(output);

      file_write.write(IS.getHeader());

      // now, print the normalized data
      file_write.write("@data\n");
      for (int i = 0; i < ndatos; i++) {
        file_write.write(X[i][0]);
        for (int j = 1; j < nvariables; j++) {
          file_write.write("," + X[i][j]);
        }
        file_write.write("\n");
      }
      file_write.close();
    } catch (IOException e) {
      System.out.println("IO exception = " + e);
      System.exit(-1);
    }
  }
예제 #10
0
파일: ENNRS.java 프로젝트: TheMurderer/keel
  public void ejecutar() {

    int i, j, l, m, o;

    int nClases;

    int claseObt;

    boolean marcas[];

    double conjS[][];

    int clasesS[];

    int eleS[], eleT[];

    int bestAc, aciertos;

    int temp[];

    int pos, tmp;

    long tiempo = System.currentTimeMillis();

    /*Getting the number of different classes*/

    nClases = 0;

    for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i];

    nClases++;

    /*Inicialization of the flagged instance vector of the S set*/

    marcas = new boolean[datosTrain.length];

    for (i = 0; i < datosTrain.length; i++) marcas[i] = false;

    /*Allocate memory for the random selection*/

    m = (int) ((porcentaje * datosTrain.length) / 100.0);

    eleS = new int[m];

    eleT = new int[datosTrain.length - m];

    temp = new int[datosTrain.length];

    for (i = 0; i < datosTrain.length; i++) temp[i] = i;

    /** Random distribution of elements in each set */
    Randomize.setSeed(semilla);

    for (i = 0; i < eleS.length; i++) {

      pos = Randomize.Randint(i, datosTrain.length - 1);

      tmp = temp[i];

      temp[i] = temp[pos];

      temp[pos] = tmp;

      eleS[i] = temp[i];
    }

    for (i = 0; i < eleT.length; i++) {

      pos = Randomize.Randint(m + i, datosTrain.length - 1);

      tmp = temp[m + i];

      temp[m + i] = temp[pos];

      temp[pos] = tmp;

      eleT[i] = temp[m + i];
    }

    for (i = 0; i < eleS.length; i++) marcas[eleS[i]] = true;

    /*Building of the S set from the flags*/

    conjS = new double[m][datosTrain[0].length];

    clasesS = new int[m];

    for (o = 0, l = 0; o < datosTrain.length; o++) {

      if (marcas[o]) { // the instance will be evaluated

        for (j = 0; j < datosTrain[0].length; j++) {

          conjS[l][j] = datosTrain[o][j];
        }

        clasesS[l] = clasesTrain[o];

        l++;
      }
    }

    /*Evaluation of the S set*/

    bestAc = 0;

    for (i = 0; i < datosTrain.length; i++) {

      claseObt = KNN.evaluacionKNN2(k, conjS, clasesS, datosTrain[i], nClases);

      if (claseObt == clasesTrain[i]) // correct clasification
      bestAc++;
    }

    /*Body of the ENNRS algorithm. Change the S set in each iteration for instances
    of the T set until get a complete sustitution*/

    for (i = 0; i < n; i++) {

      /*Preparation the set to interchange*/

      for (j = 0; j < eleS.length; j++) {

        pos = Randomize.Randint(j, eleT.length - 1);

        tmp = eleT[j];

        eleT[j] = eleT[pos];

        eleT[pos] = tmp;
      }

      /*Interchange of instances*/

      for (j = 0; j < eleS.length; j++) {

        tmp = eleS[j];

        eleS[j] = eleT[j];

        eleT[j] = tmp;

        marcas[eleS[j]] = true;

        marcas[eleT[j]] = false;
      }

      /*Building of the S set from the flags*/

      for (o = 0, l = 0; o < datosTrain.length; o++) {

        if (marcas[o]) { // the instance will evaluate

          for (j = 0; j < datosTrain[0].length; j++) {

            conjS[l][j] = datosTrain[o][j];
          }

          clasesS[l] = clasesTrain[o];

          l++;
        }
      }

      /*Evaluation of the S set*/

      aciertos = 0;

      for (j = 0; j < datosTrain.length; j++) {

        claseObt = KNN.evaluacionKNN2(k, conjS, clasesS, datosTrain[j], nClases);

        if (claseObt == clasesTrain[j]) // correct clasification
        aciertos++;
      }

      if (aciertos > bestAc) { // keep S

        bestAc = aciertos;

      } else { // undo changes

        for (j = 0; j < eleS.length; j++) {

          tmp = eleS[j];

          eleS[j] = eleT[j];

          eleT[j] = tmp;

          marcas[eleS[j]] = true;

          marcas[eleT[j]] = false;
        }
      }
    }

    /*Building of the S set from the flags*/
    /*Building of the S set from the flags*/

    for (o = 0, l = 0; o < datosTrain.length; o++) {

      if (marcas[o]) { // the instance will evaluate

        for (j = 0; j < datosTrain[0].length; j++) {

          conjS[l][j] = datosTrain[o][j];
        }

        clasesS[l] = clasesTrain[o];

        l++;
      }
    }

    System.out.println(
        "ENNRS " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s");

    // COn conjS me vale.
    int trainRealClass[][];
    int trainPrediction[][];

    trainRealClass = new int[datosTrain.length][1];
    trainPrediction = new int[datosTrain.length][1];

    // Working on training
    for (i = 0; i < datosTrain.length; i++) {
      trainRealClass[i][0] = clasesTrain[i];
      trainPrediction[i][0] = KNN.evaluate(datosTrain[i], conjS, nClases, clasesS, this.k);
    }

    KNN.writeOutput(ficheroSalida[0], trainRealClass, trainPrediction, entradas, salida, relation);

    // Working on test
    int realClass[][] = new int[datosTest.length][1];
    int prediction[][] = new int[datosTest.length][1];

    // Check  time

    for (i = 0; i < realClass.length; i++) {
      realClass[i][0] = clasesTest[i];
      prediction[i][0] = KNN.evaluate(datosTest[i], conjS, nClases, clasesS, this.k);
    }

    KNN.writeOutput(ficheroSalida[1], realClass, prediction, entradas, salida, relation);
  }
예제 #11
0
  /**
   * It reads the data from the input files (training, validation and test) and parse all the
   * parameters from the parameters array.
   *
   * @param parameters parseParameters It contains the input files, output files and parameters
   */
  public Wrapper(parseParameters parameters) {
    this.startTime = System.currentTimeMillis();

    this.train = new myDataset();
    this.val = new myDataset();
    this.test = new myDataset();
    try {
      System.out.println("\nReading the training set: " + parameters.getTrainingInputFile());
      this.train.readClassificationSet(parameters.getTrainingInputFile(), true);
      System.out.println("\nReading the validation set: " + parameters.getValidationInputFile());
      this.val.readClassificationSet(parameters.getValidationInputFile(), false);
      System.out.println("\nReading the test set: " + parameters.getTestInputFile());
      this.test.readClassificationSet(parameters.getTestInputFile(), false);
    } catch (IOException e) {
      System.err.println("There was a problem while reading the input data-sets: " + e);
      this.somethingWrong = true;
    }

    // We may check if there are some numerical attributes, because our algorithm may not handle
    // them:
    // somethingWrong = somethingWrong || train.hasNumericalAttributes();
    this.somethingWrong = this.somethingWrong || this.train.hasMissingAttributes();

    this.outputTr = parameters.getTrainingOutputFile();
    this.outputTst = parameters.getTestOutputFile();

    this.fileRB = parameters.getOutputFile(0);
    this.data = parameters.getTrainingInputFile();

    // Now we parse the parameters
    seed = Long.parseLong(parameters.getParameter(0));

    this.populationSize = Integer.parseInt(parameters.getParameter(1));
    this.maxTrials = Integer.parseInt(parameters.getParameter(2));
    if (this.populationSize % 2 > 0) this.populationSize++;
    this.crossover = Double.parseDouble(parameters.getParameter(3)); // crossover probability
    this.mutation = Double.parseDouble(parameters.getParameter(4));

    String aux2 = parameters.getParameter(5);

    this.fitness = this.aucVal;
    if (aux2.equalsIgnoreCase("AUC_TR")) {
      fitness = this.aucTrain;
    } else if (aux2.equalsIgnoreCase("GM_VAL")) {
      fitness = this.gmVal;
    }
    aux2 = parameters.getParameter(6);
    this.instances = this.ALL;
    if (aux2.equalsIgnoreCase("MAJ")) {
      instances = this.MAJ;
    }
    aux2 = parameters.getParameter(7);
    this.ensemble = this.NONE;
    if (aux2.equalsIgnoreCase("WV")) {
      ensemble = this.WV;
    } else if (aux2.equalsIgnoreCase("WTA")) {
      ensemble = this.WTA;
    } else if (aux2.equalsIgnoreCase("VOTE")) {
      ensemble = this.VOTE;
    }

    // ensemble = aux2.equalsIgnoreCase("true");

    aux2 = parameters.getParameter(8);
    model = aux2.equalsIgnoreCase("true");

    aux2 = parameters.getParameter(9);
    this.weighting = aux2.equalsIgnoreCase("true");

    header = parameters.getTestInputFile();
    String[] aux = null;
    aux = header.split("\\.");
    header = aux[aux.length - 2]; // aux.length-1 is the extension
    aux = header.split("/");
    header = aux[aux.length - 1]; // To be run in SGE

    Randomize.setSeed(seed);
  }
예제 #12
0
  /**
   * The main method of the class that includes the operations of the algorithm. It includes all the
   * operations that the algorithm has and finishes when it writes the output information into
   * files.
   */
  public void run() {

    int nPos = 0;
    int nNeg = 0;
    int i, j, l, m;
    int tmp;
    int posID;
    int positives[];
    int overs[];
    double conjS[][];
    int clasesS[];
    int tamS;

    long tiempo = System.currentTimeMillis();

    /*Count of number of positive and negative examples*/
    for (i = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] == 0) nPos++;
      else nNeg++;
    }
    if (nPos > nNeg) {
      tmp = nPos;
      nPos = nNeg;
      nNeg = tmp;
      posID = 1;
    } else {
      posID = 0;
    }

    /*Localize the positive instances*/
    positives = new int[nPos];
    for (i = 0, j = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] == posID) {
        positives[j] = i;
        j++;
      }
    }

    /*Obtain the oversampling array taking account the previous array*/
    overs = new int[nNeg - nPos];
    Randomize.setSeed(semilla);
    for (i = 0; i < overs.length; i++) {
      tmp = Randomize.Randint(0, nPos - 1);
      overs[i] = positives[tmp];
    }

    tamS = 2 * nNeg;
    /*Construction of the S set from the previous vector S*/
    conjS = new double[tamS][datosTrain[0].length];
    clasesS = new int[tamS];
    for (j = 0; j < datosTrain.length; j++) {
      for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[j][l];
      clasesS[j] = clasesTrain[j];
    }
    for (m = 0; j < tamS; j++, m++) {
      for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[overs[m]][l];
      clasesS[j] = clasesTrain[overs[m]];
    }

    System.out.println(
        "RandomOverSampling "
            + relation
            + " "
            + (double) (System.currentTimeMillis() - tiempo) / 1000.0
            + "s");

    OutputIS.escribeSalida(ficheroSalida[0], conjS, clasesS, entradas, salida, nEntradas, relation);
    OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation);
  }
예제 #13
0
  /**
   * The main method of the class that includes the operations of the algorithm. It includes all the
   * operations that the algorithm has and finishes when it writes the output information into
   * files.
   */
  public void run() {

    int S[];
    int i, j, l, m;
    int nPos = 0, nNeg = 0;
    int posID;
    int nClases;
    int pos;
    int baraje[];
    int tmp;
    double conjS[][];
    int clasesS[];
    int tamS = 0;
    int claseObt;
    int cont;
    int busq;
    boolean marcas[];
    int nSel;
    double conjS2[][];
    int clasesS2[];
    double minDist, dist;

    long tiempo = System.currentTimeMillis();

    /*CNN PART*/

    /*Count of number of positive and negative examples*/
    for (i = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] == 0) nPos++;
      else nNeg++;
    }
    if (nPos > nNeg) {
      tmp = nPos;
      nPos = nNeg;
      nNeg = tmp;
      posID = 1;
    } else {
      posID = 0;
    }

    /*Inicialization of the candidates set*/
    S = new int[datosTrain.length];
    for (i = 0; i < S.length; i++) S[i] = Integer.MAX_VALUE;

    /*Inserting an element of mayority class*/
    Randomize.setSeed(semilla);
    pos = Randomize.Randint(0, clasesTrain.length - 1);
    while (clasesTrain[pos] == posID) pos = (pos + 1) % clasesTrain.length;
    S[tamS] = pos;
    tamS++;

    /*Insert all subset of minority class*/
    for (i = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] == posID) {
        S[tamS] = i;
        tamS++;
      }
    }

    /*Algorithm body. We resort randomly the instances of T and compare with the rest of S.
    If an instance doesn´t classified correctly, it is inserted in S*/
    baraje = new int[datosTrain.length];
    for (i = 0; i < datosTrain.length; i++) baraje[i] = i;
    for (i = 0; i < datosTrain.length; i++) {
      pos = Randomize.Randint(i, clasesTrain.length - 1);
      tmp = baraje[i];
      baraje[i] = baraje[pos];
      baraje[pos] = tmp;
    }

    for (i = 0; i < datosTrain.length; i++) {
      if (clasesTrain[i] != posID) { // only for mayority class instances
        /*Construction of the S set from the previous vector S*/
        conjS = new double[tamS][datosTrain[0].length];
        clasesS = new int[tamS];
        for (j = 0; j < tamS; j++) {
          for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[S[j]][l];
          clasesS[j] = clasesTrain[S[j]];
        }

        /*Do KNN to the instance*/
        claseObt = KNN.evaluacionKNN(k, conjS, clasesS, datosTrain[baraje[i]], 2);
        if (claseObt != clasesTrain[baraje[i]]) { // fail in the class, it is included in S
          Arrays.sort(S);
          busq = Arrays.binarySearch(S, baraje[i]);
          if (busq < 0) {
            S[tamS] = baraje[i];
            tamS++;
          }
        }
      }
    }

    /*Construction of the S set from the previous vector S*/
    conjS = new double[tamS][datosTrain[0].length];
    clasesS = new int[tamS];
    for (j = 0; j < tamS; j++) {
      for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[S[j]][l];
      clasesS[j] = clasesTrain[S[j]];
    }

    /*TOMEK LINKS PART*/

    /*Inicialization of the instance flagged vector of the S set*/
    marcas = new boolean[conjS.length];
    for (i = 0; i < conjS.length; i++) {
      marcas[i] = true;
    }
    nSel = conjS.length;

    for (i = 0; i < conjS.length; i++) {
      minDist = Double.POSITIVE_INFINITY;
      pos = 0;
      for (j = 0; j < conjS.length; j++) {
        if (i != j) {
          dist = KNN.distancia(conjS[i], conjS[j]);
          if (dist < minDist) {
            minDist = dist;
            pos = j;
          }
        }
      }
      if (clasesS[i] != clasesS[pos]) {
        if (clasesS[i] != posID) {
          if (marcas[i] == true) {
            marcas[i] = false;
            nSel--;
          }
        } else {
          if (marcas[pos] == true) {
            marcas[pos] = false;
            nSel--;
          }
        }
      }
    }

    /*Construction of the S set from the flags*/
    conjS2 = new double[nSel][conjS[0].length];
    clasesS2 = new int[nSel];
    for (m = 0, l = 0; m < conjS.length; m++) {
      if (marcas[m]) { // the instance will evaluate
        for (j = 0; j < conjS[0].length; j++) {
          conjS2[l][j] = conjS[m][j];
        }
        clasesS2[l] = clasesS[m];
        l++;
      }
    }

    System.out.println(
        "CNN_TomekLinks "
            + relation
            + " "
            + (double) (System.currentTimeMillis() - tiempo) / 1000.0
            + "s");

    OutputIS.escribeSalida(
        ficheroSalida[0], conjS2, clasesS2, entradas, salida, nEntradas, relation);
    OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation);
  }
예제 #14
0
  /** It runs the Qstatistic */
  public void runAlgorithm() {

    int i, j, l, h;
    double conjS[][];
    double conjR[][];
    int conjN[][];
    boolean conjM[][];
    int clasesS[];
    int nSel = 0;
    Chromosome poblacion[];
    int ev = 0;
    Chromosome C[];
    int baraje[];
    int pos, tmp;
    Chromosome newPob[];
    int d;
    int tamC;
    Chromosome pobTemp[];
    int nPos = 0, nNeg = 0, posID, negID;
    double datosArt[][];
    double realArt[][];
    int nominalArt[][];
    boolean nulosArt[][];
    int clasesArt[];
    int tamS;

    long tiempo = System.currentTimeMillis();

    // Randomize.setSeed (semilla);
    posID = clasesTrain[0];
    negID = -1;
    for (i = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] != posID) {
        negID = clasesTrain[i];
        break;
      }
    }
    /* Count of number of positive and negative examples */
    for (i = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] == posID) nPos++;
      else nNeg++;
    }
    if (nPos > nNeg) {
      tmp = nPos;
      nPos = nNeg;
      nNeg = tmp;
      tmp = posID;
      posID = negID;
      negID = tmp;
    } else {
      /*
       * tmp = posID; posID = negID; negID = tmp;
       */
    }

    if (hybrid.equalsIgnoreCase("smote + eus")) {
      if (balance) {
        tamS = 2 * nNeg;
      } else {
        tamS = nNeg + nPos + (int) (nPos * smoting);
      }
      datosArt = new double[tamS][datosTrain[0].length];
      realArt = new double[tamS][datosTrain[0].length];
      nominalArt = new int[tamS][datosTrain[0].length];
      nulosArt = new boolean[tamS][datosTrain[0].length];
      clasesArt = new int[tamS];

      SMOTE(
          datosTrain,
          realTrain,
          nominalTrain,
          nulosTrain,
          clasesTrain,
          datosArt,
          realArt,
          nominalArt,
          nulosArt,
          clasesArt,
          kSMOTE,
          ASMO,
          smoting,
          balance,
          nPos,
          posID,
          nNeg,
          negID,
          distanceEu);
    } else {
      datosArt = new double[datosTrain.length][datosTrain[0].length];
      realArt = new double[datosTrain.length][datosTrain[0].length];
      nominalArt = new int[datosTrain.length][datosTrain[0].length];
      nulosArt = new boolean[datosTrain.length][datosTrain[0].length];
      clasesArt = new int[clasesTrain.length];
      for (i = 0; i < datosTrain.length; i++) {
        for (j = 0; j < datosTrain[i].length; j++) {
          datosArt[i][j] = datosTrain[i][j];
          realArt[i][j] = realTrain[i][j];
          nominalArt[i][j] = nominalTrain[i][j];
          nulosArt[i][j] = nulosTrain[i][j];
        }
        clasesArt[i] = clasesTrain[i];
      }
    }

    /* Count of number of positive and negative examples */
    nPos = nNeg = 0;
    for (i = 0; i < clasesArt.length; i++) {
      if (clasesArt[i] == posID) nPos++;
      else nNeg++;
    }

    if (majSelection) d = nNeg / 4;
    else d = datosArt.length / 4;

    /* Random initialization of the population */
    poblacion = new Chromosome[popSize];
    baraje = new int[popSize];
    for (i = 0; i < popSize; i++)
      if (majSelection) poblacion[i] = new Chromosome(nNeg);
      else poblacion[i] = new Chromosome(datosArt.length);

    /* Initial evaluation of the population */
    for (i = 0; i < popSize; i++)
      poblacion[i].evalua(
          datosTrain,
          realTrain,
          nominalTrain,
          nulosTrain,
          clasesTrain,
          datosArt,
          realArt,
          nominalArt,
          nulosArt,
          clasesArt,
          wrapper,
          k,
          evMeas,
          majSelection,
          pFactor,
          P,
          posID,
          nPos,
          distanceEu,
          entradas,
          anteriores,
          salidasAnteriores);

    /* Until stop condition */
    while (ev < nEval) {
      C = new Chromosome[popSize];

      /* Selection(r) of C(t) from P(t) */
      for (i = 0; i < popSize; i++) baraje[i] = i;
      for (i = 0; i < popSize; i++) {
        pos = Randomize.Randint(i, popSize - 1);
        tmp = baraje[i];
        baraje[i] = baraje[pos];
        baraje[pos] = tmp;
      }
      for (i = 0; i < popSize; i++)
        if (majSelection) C[i] = new Chromosome(nNeg, poblacion[baraje[i]]);
        else C[i] = new Chromosome(datosArt.length, poblacion[baraje[i]]);

      /* Structure recombination in C(t) constructing C'(t) */
      tamC = recombinar(C, d, nNeg, nPos, majSelection);
      newPob = new Chromosome[tamC];
      for (i = 0, l = 0; i < C.length; i++) {
        if (C[i].esValido()) { // the cromosome must be copied to the
          // new poblation C'(t)
          if (majSelection) newPob[l] = new Chromosome(nNeg, C[i]);
          else newPob[l] = new Chromosome(datosArt.length, C[i]);
          l++;
        }
      }

      /* Structure evaluation in C'(t) */
      for (i = 0; i < newPob.length; i++) {
        newPob[i].evalua(
            datosTrain,
            realTrain,
            nominalTrain,
            nulosTrain,
            clasesTrain,
            datosArt,
            realArt,
            nominalArt,
            nulosArt,
            clasesArt,
            wrapper,
            k,
            evMeas,
            majSelection,
            pFactor,
            P,
            posID,
            nPos,
            distanceEu,
            entradas,
            anteriores,
            salidasAnteriores);
        ev++;
      }

      /* Selection(s) of P(t) from C'(t) and P(t-1) */
      Arrays.sort(poblacion);
      Arrays.sort(newPob);
      /*
       * If the best of C' is worse than the worst of P(t-1), then there
       * will no changes
       */
      if (tamC == 0 || newPob[0].getCalidad() < poblacion[popSize - 1].getCalidad()) {
        d--;
      } else {
        pobTemp = new Chromosome[popSize];
        for (i = 0, j = 0, l = 0; i < popSize && l < tamC; i++) {
          if (poblacion[j].getCalidad() > newPob[l].getCalidad()) {
            if (majSelection) pobTemp[i] = new Chromosome(nNeg, poblacion[j]);
            else pobTemp[i] = new Chromosome(datosArt.length, poblacion[j]);
            j++;
          } else {
            if (majSelection) pobTemp[i] = new Chromosome(nNeg, newPob[l]);
            else pobTemp[i] = new Chromosome(datosArt.length, newPob[l]);
            l++;
          }
        }
        if (l == tamC) { // there are cromosomes for copying
          for (; i < popSize; i++) {
            if (majSelection) pobTemp[i] = new Chromosome(nNeg, poblacion[j]);
            else pobTemp[i] = new Chromosome(datosArt.length, poblacion[j]);
            j++;
          }
        }
        poblacion = pobTemp;
      }

      /* Last step of the algorithm */
      if (d <= 0) {
        for (i = 1; i < popSize; i++) {
          poblacion[i].divergeCHC(r, poblacion[0], prob0to1Div);
        }
        for (i = 0; i < popSize; i++)
          if (!(poblacion[i].estaEvaluado())) {
            poblacion[i].evalua(
                datosTrain,
                realTrain,
                nominalTrain,
                nulosTrain,
                clasesTrain,
                datosArt,
                realArt,
                nominalArt,
                nulosArt,
                clasesArt,
                wrapper,
                k,
                evMeas,
                majSelection,
                pFactor,
                P,
                posID,
                nPos,
                distanceEu,
                entradas,
                anteriores,
                salidasAnteriores);
            ev++;
          }

        /* Reinicialization of d value */
        if (majSelection) d = (int) (r * (1.0 - r) * (double) nNeg);
        else d = (int) (r * (1.0 - r) * (double) datosArt.length);
      }
    }

    Arrays.sort(poblacion);

    if (majSelection) {
      nSel = poblacion[0].genesActivos() + nPos;

      /* Construction of S set from the best cromosome */
      conjS = new double[nSel][datosArt[0].length];
      conjR = new double[nSel][datosArt[0].length];
      conjN = new int[nSel][datosArt[0].length];
      conjM = new boolean[nSel][datosArt[0].length];
      clasesS = new int[nSel];
      h = 0;
      for (i = 0, l = 0; i < nNeg; i++, h++) {
        for (; clasesArt[h] == posID && h < clasesArt.length; h++) ;
        if (poblacion[0].getGen(i)) { // the instance must be copied to
          // the solution
          for (j = 0; j < datosArt[h].length; j++) {
            conjS[l][j] = datosArt[h][j];
            conjR[l][j] = realArt[h][j];
            conjN[l][j] = nominalArt[h][j];
            conjM[l][j] = nulosArt[h][j];
          }
          clasesS[l] = clasesArt[h];
          l++;
        }
      }
      for (i = 0; i < datosArt.length; i++) {
        if (clasesArt[i] == posID) {
          for (j = 0; j < datosArt[i].length; j++) {
            conjS[l][j] = datosArt[i][j];
            conjR[l][j] = realArt[i][j];
            conjN[l][j] = nominalArt[i][j];
            conjM[l][j] = nulosArt[i][j];
          }
          clasesS[l] = clasesArt[i];
          l++;
        }
      }
    } else {
      nSel = poblacion[0].genesActivos();

      /* Construction of S set from the best cromosome */
      conjS = new double[nSel][datosArt[0].length];
      conjR = new double[nSel][datosArt[0].length];
      conjN = new int[nSel][datosArt[0].length];
      conjM = new boolean[nSel][datosArt[0].length];
      clasesS = new int[nSel];
      for (i = 0, l = 0; i < datosArt.length; i++) {
        if (poblacion[0].getGen(i)) { // the instance must be copied to
          // the solution
          for (j = 0; j < datosArt[i].length; j++) {
            conjS[l][j] = datosArt[i][j];
            conjR[l][j] = realArt[i][j];
            conjN[l][j] = nominalArt[i][j];
            conjM[l][j] = nulosArt[i][j];
          }
          clasesS[l] = clasesArt[i];
          l++;
        }
      }
    }

    if (hybrid.equalsIgnoreCase("eus + smote")) {
      nPos = nNeg = 0;
      for (i = 0; i < clasesS.length; i++) {
        if (clasesS[i] == posID) nPos++;
        else nNeg++;
      }
      if (nPos < nNeg) {
        if (balance) {
          tamS = 2 * nNeg;
        } else {
          tamS = nNeg + nPos + (int) (nPos * smoting);
        }
        datosArt = new double[tamS][datosTrain[0].length];
        realArt = new double[tamS][datosTrain[0].length];
        nominalArt = new int[tamS][datosTrain[0].length];
        nulosArt = new boolean[tamS][datosTrain[0].length];
        clasesArt = new int[tamS];

        SMOTE(
            conjS,
            conjR,
            conjN,
            conjM,
            clasesS,
            datosArt,
            realArt,
            nominalArt,
            nulosArt,
            clasesArt,
            kSMOTE,
            ASMO,
            smoting,
            balance,
            nPos,
            posID,
            nNeg,
            negID,
            distanceEu);

        nSel = datosArt.length;

        /* Construction of S set from the best cromosome */
        conjS = new double[nSel][datosArt[0].length];
        conjR = new double[nSel][datosArt[0].length];
        conjN = new int[nSel][datosArt[0].length];
        conjM = new boolean[nSel][datosArt[0].length];
        clasesS = new int[nSel];
        for (i = 0; i < datosArt.length; i++) {
          for (j = 0; j < datosArt[i].length; j++) {
            conjS[i][j] = datosArt[i][j];
            conjR[i][j] = realArt[i][j];
            conjN[i][j] = nominalArt[i][j];
            conjM[i][j] = nulosArt[i][j];
          }
          clasesS[i] = clasesArt[i];
        }
      }
    }

    /*
     * for (i = 0; i < poblacion.length; i++){ for (j = 0; j <
     * poblacion[0].cuerpo.length; j++){
     * System.out.print((poblacion[i].cuerpo[j] ? 1 : 0)); }
     * System.out.println(" Calidad: " + poblacion[i].calidad); }
     */
    best = poblacion[0].cuerpo.clone();
    bestOutputs = poblacion[0].prediction.clone();
    System.out.println(
        "QstatEUSCHC "
            + relation
            + " "
            + (double) (System.currentTimeMillis() - tiempo) / 1000.0
            + "s");

    OutputIS.escribeSalida(
        ficheroSalida[0], conjR, conjN, conjM, clasesS, entradas, salida, nEntradas, relation);
    // OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida,
    // nEntradas, relation);
  }
예제 #15
0
파일: Depur.java 프로젝트: Navieclipse/KEEL
  public void ejecutar() {

    double conjS[][];
    double conjR[][];
    int conjN[][];
    boolean conjM[][];
    int clasesS[];

    int S[]; /* Binary Vector, to decide if the instance will be included*/
    int i, j, l, cont;
    int nClases;
    int tamS;
    int transformations;

    int claseObt[];
    int clasePredominante;

    long tiempo = System.currentTimeMillis();

    transformations = 0;
    /*Getting the number of different classes*/

    nClases = 0;
    for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i];
    nClases++;

    if (nClases < 2) {
      System.err.println("Input dataset is empty");
      nClases = 0;
    }

    /*Algorithm body.
      First, S=TS.
      Then, for each instance of TS, the first step is to repeat the aplication of the k-nn, and then
      we decide if we need to change the label of the instance or we don't need it.

    */

    /*Inicialization of the candidates set, S=X, where X is the original Training Set*/
    S = new int[datosTrain.length];
    for (i = 0; i < S.length; i++) S[i] = 1; /* All included*/

    tamS = datosTrain.length;

    System.out.print("K= " + k + "\n");
    System.out.print("K'= " + k2 + "\n");

    for (i = 0; i < datosTrain.length; i++) {

      /* I need find the k-nn of   i in X - {i}, so I make conjS without i*/
      conjS = new double[datosTrain.length - 1][datosTrain[0].length];
      conjR = new double[datosTrain.length - 1][datosTrain[0].length];
      conjN = new int[datosTrain.length - 1][datosTrain[0].length];
      conjM = new boolean[datosTrain.length - 1][datosTrain[0].length];
      clasesS = new int[datosTrain.length - 1];

      cont = 0;
      for (j = 0; j < datosTrain.length; j++) {

        if (i != j) {
          for (l = 0; l < datosTrain[0].length; l++) {

            conjS[cont][l] = datosTrain[j][l];
            conjR[cont][l] = realTrain[j][l];
            conjN[cont][l] = nominalTrain[j][l];
            conjM[cont][l] = nulosTrain[j][l];
          }

          clasesS[cont] = clasesTrain[j];
          cont++;
        }
      }

      /*Do KNN to the instance*/
      claseObt =
          KNN.evaluacionKNN3(
              k,
              conjS,
              conjR,
              conjN,
              conjM,
              clasesS,
              datosTrain[i],
              realTrain[i],
              nominalTrain[i],
              nulosTrain[i],
              nClases,
              distanceEu);

      /*
      System.out.print("Las clases de los k vecinos m�s cercanos son\n");
      for(int m=0;m<k;m++){
      	System.out.print(claseObt[m]+ "  ");
      }
      System.out.print("\n-----------------------------------------------\n");

       */

      /*Now, we must check that we have at least k2 neighboors with the same class. */
      int max = 0;
      clasePredominante = 0;

      for (int m = 0; m < claseObt.length; m++) {
        int claseDeInstancia = claseObt[m]; // Select one class.
        int iguales = 0;

        for (j = 0; j < claseObt.length; j++) { // Check numbers of instances with this class
          if (j != m) { // I can't count the same.
            if (claseObt[j] == claseDeInstancia) {
              iguales++;
            }
          }
        }

        // I must check if there is another class with more instances.
        if (iguales > max) {
          max = iguales;
          clasePredominante = claseObt[m];
        }
      }

      // System.out.print("max " + max +"\n");
      // System.out.print("Clase Predominante: "+clasePredominante+"\n");

      /* Max+1 = number of neighbours with the same class*/
      if ((max) >= k2) {
        /* if there are at least k2 neighbour, we change the class in S, */

        if (clasePredominante != clasesTrain[i]) transformations++;

        clasesTrain[i] = clasePredominante;
        S[i] = 1;

      } else {
        /* Discard.*/
        tamS--;
        S[i] = 0;
      }
    }

    System.out.print("S size resultante= " + tamS + "\n");
    System.out.print("Transformations = " + transformations + "\n");

    /*Construction of the S set from the previous vector S*/
    conjS = new double[tamS][datosTrain[0].length];
    conjR = new double[tamS][datosTrain[0].length];
    conjN = new int[tamS][datosTrain[0].length];
    conjM = new boolean[tamS][datosTrain[0].length];
    clasesS = new int[tamS];

    cont = 0; /* To establish the sets' sizes */
    for (j = 0; j < datosTrain.length; j++) {

      if (S[j] == 1) {
        /* Checking the instance is included*/
        for (l = 0; l < datosTrain[0].length; l++) {

          conjS[cont][l] = datosTrain[j][l];
          conjR[cont][l] = realTrain[j][l];
          conjN[cont][l] = nominalTrain[j][l];
          conjM[cont][l] = nulosTrain[j][l];
        }

        clasesS[cont] = clasesTrain[j];
        cont++;
      }
    }

    System.out.println(
        "Time elapse: " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s");

    OutputIS.escribeSalida(
        ficheroSalida[0], conjR, conjN, conjM, clasesS, entradas, salida, nEntradas, relation);
    OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation);
  }
예제 #16
0
  public void ejecutar() {

    int i, j, l;
    boolean marcas[];
    boolean marcas2[];
    boolean marcastmp[];
    boolean incorrect[];
    int nSel;
    double conjS[][];
    double conjR[][];
    int conjN[][];
    boolean conjM[][];
    int clasesS[];
    Vector<Integer> vecinos[];
    int next;
    int maxneigh;
    int pos;
    int borrado;
    int claseObt;
    int nClases;

    long tiempo = System.currentTimeMillis();

    /*Getting the number of differents classes*/
    nClases = 0;
    for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i];
    nClases++;

    /*Inicialization of the flagged instances vector for a posterior copy*/
    marcas = new boolean[datosTrain.length];
    marcas2 = new boolean[datosTrain.length];
    incorrect = new boolean[datosTrain.length];
    marcastmp = new boolean[datosTrain.length];
    Arrays.fill(marcas, true);
    Arrays.fill(marcas2, true);
    Arrays.fill(incorrect, false);
    Arrays.fill(marcastmp, true);
    vecinos = new Vector[datosTrain.length];
    for (i = 0; i < datosTrain.length; i++) vecinos[i] = new Vector<Integer>();

    for (i = 0; i < datosTrain.length; i++) {
      next = nextNeighbour(marcas, datosTrain, i, vecinos[i]);
      for (j = 0; j < datosTrain.length; j++) marcastmp[j] = marcas[j];
      while (next >= 0 && clasesTrain[next] == clasesTrain[i]) {
        vecinos[i].add(new Integer(next));
        marcastmp[next] = false;
        next = nextNeighbour(marcastmp, datosTrain, i, vecinos[i]);
      }
    }

    maxneigh = vecinos[0].size();
    pos = 0;
    for (i = 1; i < datosTrain.length; i++) {
      if (vecinos[i].size() > maxneigh) {
        maxneigh = vecinos[i].size();
        pos = i;
      }
    }

    while (maxneigh > 0) {
      for (i = 0; i < vecinos[pos].size(); i++) {
        borrado = vecinos[pos].elementAt(i).intValue();
        marcas[borrado] = false;
        for (j = 0; j < datosTrain.length; j++) {
          vecinos[j].removeElement(new Integer(borrado));
        }
        vecinos[borrado].clear();
      }
      vecinos[pos].clear();

      maxneigh = vecinos[0].size();
      pos = 0;
      for (i = 1; i < datosTrain.length; i++) {
        if (vecinos[i].size() > maxneigh) {
          maxneigh = vecinos[i].size();
          pos = i;
        }
      }
    }

    /*Building of the S set from the flags*/
    nSel = 0;
    for (i = 0; i < datosTrain.length; i++) if (marcas[i]) nSel++;

    conjS = new double[nSel][datosTrain[0].length];
    conjR = new double[nSel][datosTrain[0].length];
    conjN = new int[nSel][datosTrain[0].length];
    conjM = new boolean[nSel][datosTrain[0].length];
    clasesS = new int[nSel];
    for (i = 0, l = 0; i < datosTrain.length; i++) {
      if (marcas[i]) { // the instance will be copied to the solution
        for (j = 0; j < datosTrain[0].length; j++) {
          conjS[l][j] = datosTrain[i][j];
          conjR[l][j] = realTrain[i][j];
          conjN[l][j] = nominalTrain[i][j];
          conjM[l][j] = nulosTrain[i][j];
        }
        clasesS[l] = clasesTrain[i];
        l++;
      }
    }

    for (i = 0; i < datosTrain.length; i++) {
      /*Apply 1-NN to the instance*/
      claseObt =
          KNN.evaluacionKNN2(
              1,
              conjS,
              conjR,
              conjN,
              conjM,
              clasesTrain,
              datosTrain[i],
              realTrain[i],
              nominalTrain[i],
              nulosTrain[i],
              nClases,
              true);
      if (claseObt != clasesTrain[i]) {
        incorrect[i] = true;
      }
    }

    for (i = 0; i < datosTrain.length; i++) vecinos[i] = new Vector<Integer>();

    for (i = 0; i < datosTrain.length; i++) {
      if (incorrect[i]) {
        next = nextNeighbour(marcas2, datosTrain, i, vecinos[i]);
        for (j = 0; j < datosTrain.length; j++) marcastmp[j] = marcas2[j];
        while (next >= 0 && clasesTrain[next] == clasesTrain[i]) {
          vecinos[i].add(new Integer(next));
          marcastmp[next] = false;
          next = nextNeighbour(marcastmp, datosTrain, i, vecinos[i]);
        }
      }
    }

    maxneigh = vecinos[0].size();
    pos = 0;
    for (i = 1; i < datosTrain.length; i++) {
      if (vecinos[i].size() > maxneigh) {
        maxneigh = vecinos[i].size();
        pos = i;
      }
    }

    while (maxneigh > 0) {
      for (i = 0; i < vecinos[pos].size(); i++) {
        borrado = vecinos[pos].elementAt(i).intValue();
        marcas2[borrado] = false;
        for (j = 0; j < datosTrain.length; j++) {
          vecinos[j].removeElement(new Integer(borrado));
        }
        vecinos[borrado].clear();
      }
      vecinos[pos].clear();

      maxneigh = vecinos[0].size();
      pos = 0;
      for (i = 1; i < datosTrain.length; i++) {
        if (vecinos[i].size() > maxneigh) {
          maxneigh = vecinos[i].size();
          pos = i;
        }
      }
    }

    for (i = 0; i < marcas.length; i++) marcas[i] |= (marcas2[i] & incorrect[i]);

    /*Building of the S set from the flags*/
    nSel = 0;
    for (i = 0; i < datosTrain.length; i++) if (marcas[i]) nSel++;

    conjS = new double[nSel][datosTrain[0].length];
    conjR = new double[nSel][datosTrain[0].length];
    conjN = new int[nSel][datosTrain[0].length];
    conjM = new boolean[nSel][datosTrain[0].length];
    clasesS = new int[nSel];
    for (i = 0, l = 0; i < datosTrain.length; i++) {
      if (marcas[i]) { // the instance will be copied to the solution
        for (j = 0; j < datosTrain[0].length; j++) {
          conjS[l][j] = datosTrain[i][j];
          conjR[l][j] = realTrain[i][j];
          conjN[l][j] = nominalTrain[i][j];
          conjM[l][j] = nulosTrain[i][j];
        }
        clasesS[l] = clasesTrain[i];
        l++;
      }
    }

    System.out.println(
        "Reconsistent "
            + relation
            + " "
            + (double) (System.currentTimeMillis() - tiempo) / 1000.0
            + "s");

    // COn conjS me vale.
    int trainRealClass[][];
    int trainPrediction[][];

    trainRealClass = new int[datosTrain.length][1];
    trainPrediction = new int[datosTrain.length][1];

    // Working on training
    for (i = 0; i < datosTrain.length; i++) {
      trainRealClass[i][0] = clasesTrain[i];
      trainPrediction[i][0] = KNN.evaluate(datosTrain[i], conjS, nClases, clasesS, 1);
    }

    KNN.writeOutput(ficheroSalida[0], trainRealClass, trainPrediction, entradas, salida, relation);

    // Working on test
    int realClass[][] = new int[datosTest.length][1];
    int prediction[][] = new int[datosTest.length][1];

    // Check  time

    for (i = 0; i < realClass.length; i++) {
      realClass[i][0] = clasesTest[i];
      prediction[i][0] = KNN.evaluate(datosTest[i], conjS, nClases, clasesS, 1);
    }

    KNN.writeOutput(ficheroSalida[1], realClass, prediction, entradas, salida, relation);
  }
예제 #17
0
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    // declarations
    double[] outputs;
    double[] outputs2;
    Instance neighbor;
    double dist, mean;
    int actual;
    Randomize rnd = new Randomize();
    Instance ex;
    gCenter kmeans = null;
    int iterations = 0;
    double E;
    double prevE;
    int totalMissing = 0;
    boolean allMissing = true;

    rnd.setSeed(semilla);
    // PROCESS
    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(input_train_name, true);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][nvariables]; // matrix with transformed data
      kmeans = new gCenter(K, ndatos, nvariables);

      timesSeen = new FreqList[nvariables];
      mostCommon = new String[nvariables];

      // first, we choose k 'means' randomly from all
      // instances
      totalMissing = 0;
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);
        if (inst.existsAnyMissingValue()) totalMissing++;
      }
      if (totalMissing == ndatos) allMissing = true;
      else allMissing = false;
      for (int numMeans = 0; numMeans < K; numMeans++) {
        do {
          actual = (int) (ndatos * rnd.Rand());
          ex = IS.getInstance(actual);
        } while (ex.existsAnyMissingValue() && !allMissing);

        kmeans.copyCenter(ex, numMeans);
      }

      // now, iterate adjusting clusters' centers and
      // instances to them
      prevE = 0;
      iterations = 0;
      do {
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          kmeans.setClusterOf(inst, i);
        }
        // set new centers
        kmeans.recalculateCenters(IS);
        // compute RMSE
        E = 0;
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          E += kmeans.distance(inst, kmeans.getClusterOf(i));
        }
        iterations++;
        // System.out.println(iterations+"\t"+E);
        if (Math.abs(prevE - E) == 0) iterations = maxIter;
        else prevE = E;
      } while (E > minError && iterations < maxIter);
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);

        in = 0;
        out = 0;

        for (int j = 0; j < nvariables; j++) {
          Attribute a = Attributes.getAttribute(j);

          direccion = a.getDirectionAttribute();
          tipo = a.getType();

          if (direccion == Attribute.INPUT) {
            if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
              X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
            } else {
              if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
              else {
                actual = kmeans.getClusterOf(i);
                X[i][j] = new String(kmeans.valueAt(actual, j));
              }
            }
            in++;
          } else {
            if (direccion == Attribute.OUTPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
              } else {
                if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              out++;
            }
          }
        }
      }
    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    write_results(output_train_name);
    /** ************************************************************************************ */
    // does a test file associated exist?
    if (input_train_name.compareTo(input_test_name) != 0) {
      try {

        // Load in memory a dataset that contains a classification problem
        IStest.readSet(input_test_name, false);
        int in = 0;
        int out = 0;

        ndatos = IStest.getNumInstances();
        nvariables = Attributes.getNumAttributes();
        nentradas = Attributes.getInputNumAttributes();
        nsalidas = Attributes.getOutputNumAttributes();

        for (int i = 0; i < ndatos; i++) {
          Instance inst = IStest.getInstance(i);

          in = 0;
          out = 0;

          for (int j = 0; j < nvariables; j++) {
            Attribute a = Attributes.getAttribute(j);

            direccion = a.getDirectionAttribute();
            tipo = a.getType();

            if (direccion == Attribute.INPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
                X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
              } else {
                if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              in++;
            } else {
              if (direccion == Attribute.OUTPUT) {
                if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                  X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
                } else {
                  if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                  else {
                    actual = kmeans.getClusterOf(i);
                    X[i][j] = new String(kmeans.valueAt(actual, j));
                  }
                }
                out++;
              }
            }
          }
        }
      } catch (Exception e) {
        System.out.println("Dataset exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
      write_results(output_test_name);
    }
  }
예제 #18
0
  // Read the pattern file, and parse data into strings
  private void config_read(String fileParam) {
    File inputFile = new File(fileParam);

    if (inputFile == null || !inputFile.exists()) {
      System.out.println("parameter " + fileParam + " file doesn't exists!");
      System.exit(-1);
    }
    // begin the configuration read from file
    try {
      FileReader file_reader = new FileReader(inputFile);
      BufferedReader buf_reader = new BufferedReader(file_reader);
      // FileWriter file_write = new FileWriter(outputFile);

      String line;

      do {
        line = buf_reader.readLine();
      } while (line.length() == 0); // avoid empty lines for processing -> produce exec failure
      String out[] = line.split("algorithm = ");
      // alg_name = new String(out[1]); //catch the algorithm name
      // input & output filenames
      do {
        line = buf_reader.readLine();
      } while (line.length() == 0);
      out = line.split("inputData = ");
      out = out[1].split("\\s\"");
      input_train_name = new String(out[0].substring(1, out[0].length() - 1));
      input_test_name = new String(out[1].substring(0, out[1].length() - 1));
      if (input_test_name.charAt(input_test_name.length() - 1) == '"')
        input_test_name = input_test_name.substring(0, input_test_name.length() - 1);

      do {
        line = buf_reader.readLine();
      } while (line.length() == 0);
      out = line.split("outputData = ");
      out = out[1].split("\\s\"");
      output_train_name = new String(out[0].substring(1, out[0].length() - 1));
      output_test_name = new String(out[1].substring(0, out[1].length() - 1));
      if (output_test_name.charAt(output_test_name.length() - 1) == '"')
        output_test_name = output_test_name.substring(0, output_test_name.length() - 1);

      // parameters
      do {
        line = buf_reader.readLine();
      } while (line.length() == 0);
      out = line.split("seed = ");
      semilla = (new Long(out[1])).longValue(); // parse the string into a integer

      do {
        line = buf_reader.readLine();
      } while (line.length() == 0);
      out = line.split("k = ");
      K = (new Integer(out[1])).intValue(); // parse the string into a integer

      do {
        line = buf_reader.readLine();
      } while (line.length() == 0);
      out = line.split("error = ");
      minError = (new Double(out[1])).doubleValue(); // parse the string into a double

      do {
        line = buf_reader.readLine();
      } while (line.length() == 0);
      out = line.split("iterations = ");
      maxIter = (new Integer(out[1])).intValue(); // parse the string into a double

      file_reader.close();

    } catch (IOException e) {
      System.out.println("IO exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
  }
예제 #19
0
파일: LVQ.java 프로젝트: triguero/Keel3.0
  public void ejecutar() {

    int i, j, l, m;
    double alfai;
    int nClases;

    int claseObt;

    boolean marcas[];
    boolean notFound;

    int init;
    int clasSel[];

    int baraje[];

    int pos, tmp;
    String instanciasIN[];
    String instanciasOUT[];

    long tiempo = System.currentTimeMillis();

    /* Getting the number of differents classes */

    nClases = 0;

    for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i];

    nClases++;

    /* Shuffle the train set */

    baraje = new int[datosTrain.length];

    Randomize.setSeed(semilla);

    for (i = 0; i < datosTrain.length; i++) baraje[i] = i;

    for (i = 0; i < datosTrain.length; i++) {

      pos = Randomize.Randint(i, datosTrain.length - 1);

      tmp = baraje[i];

      baraje[i] = baraje[pos];

      baraje[pos] = tmp;
    }

    /*
     * Inicialization of the flagged instaces vector for a posterior
     * elimination
     */

    marcas = new boolean[datosTrain.length];

    for (i = 0; i < datosTrain.length; i++) marcas[i] = false;

    if (datosTrain.length > 0) {

      // marcas[baraje[0]] = true; //the first instance is included always

      nSel = n_p;
      if (nSel < nClases) nSel = nClases;

    } else {

      System.err.println("Input dataset is empty");

      nSel = 0;
    }
    clasSel = new int[nClases];
    System.out.print("Selecting initial neurons... ");
    // at least, there must be 1 neuron of each class at the beginning
    init = nClases;
    for (i = 0; i < nClases && i < datosTrain.length; i++) {
      pos = Randomize.Randint(0, datosTrain.length - 1);
      tmp = 0;
      while ((clasesTrain[pos] != i || marcas[pos]) && tmp < datosTrain.length) {
        pos = (pos + 1) % datosTrain.length;
        tmp++;
      }
      if (tmp < datosTrain.length) marcas[pos] = true;
      else init--;
      // clasSel[i] = i;
    }
    for (i = init; i < Math.min(nSel, datosTrain.length); i++) {
      tmp = 0;
      pos = Randomize.Randint(0, datosTrain.length - 1);
      while (marcas[pos]) {
        pos = (pos + 1) % datosTrain.length;
        tmp++;
      }
      // if(i<nClases){
      // notFound = true;
      // do{
      // for(j=i-1;j>=0 && notFound;j--){
      // if(clasSel[j] == clasesTrain[pos])
      // notFound = false;
      // }
      // if(!notFound)
      // pos = Randomize.Randint (0, datosTrain.length-1);
      // }while(!notFound);
      // }
      // clasSel[i] = clasesTrain[pos];
      marcas[pos] = true;
      init++;
    }
    nSel = init;
    System.out.println("Initial neurons selected: " + nSel);

    /* Building of the S set from the flags */

    conjS = new double[nSel][datosTrain[0].length];

    clasesS = new int[nSel];

    for (m = 0, l = 0; m < datosTrain.length; m++) {

      if (marcas[m]) { // the instance must be copied to the solution

        for (j = 0; j < datosTrain[0].length; j++) {

          conjS[l][j] = datosTrain[m][j];
        }

        clasesS[l] = clasesTrain[m];

        l++;
      }
    }

    alfai = alpha;
    boolean change = true;
    /* Body of the LVQ algorithm. */

    // Train the network
    for (int it = 0; it < T && change; it++) {
      change = false;
      alpha = alfai;
      for (i = 1; i < datosTrain.length; i++) {
        // search for the nearest neuron to training instance
        pos = NN(nSel, conjS, datosTrain[baraje[i]]);
        // nearest neuron labels correctly the class of training
        // instance?

        if (clasesS[pos] != clasesTrain[baraje[i]]) { // NO - repel
          // the neuron
          for (j = 0; j < conjS[pos].length; j++) {
            conjS[pos][j] = conjS[pos][j] - alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]);
          }
          change = true;
        } else { // YES - migrate the neuron towards the input vector
          for (j = 0; j < conjS[pos].length; j++) {
            conjS[pos][j] = conjS[pos][j] + alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]);
          }
        }
        alpha = nu * alpha;
      }
      // Shuffle again the training partition
      baraje = new int[datosTrain.length];

      for (i = 0; i < datosTrain.length; i++) baraje[i] = i;

      for (i = 0; i < datosTrain.length; i++) {

        pos = Randomize.Randint(i, datosTrain.length - 1);

        tmp = baraje[i];

        baraje[i] = baraje[pos];

        baraje[pos] = tmp;
      }
    }
    System.out.println(
        "LVQ " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s");
    // Classify the train data set
    instanciasIN = new String[datosReferencia.length];
    instanciasOUT = new String[datosReferencia.length];
    for (i = 0; i < datosReferencia.length; i++) {
      /* Classify the instance selected in this iteration */
      Attribute a = Attributes.getOutputAttribute(0);

      int tipo = a.getType();
      claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosReferencia[i], nClases);
      if (tipo != Attribute.NOMINAL) {
        instanciasIN[i] = new String(String.valueOf(clasesReferencia[i]));
        instanciasOUT[i] = new String(String.valueOf(claseObt));
      } else {
        instanciasIN[i] = new String(a.getNominalValue(clasesReferencia[i]));
        instanciasOUT[i] = new String(a.getNominalValue(claseObt));
      }
    }

    escribeSalida(
        ficheroSalida[0], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation);

    // Classify the test data set
    normalizarTest();
    instanciasIN = new String[datosTest.length];
    instanciasOUT = new String[datosTest.length];
    for (i = 0; i < datosTest.length; i++) {
      /* Classify the instance selected in this iteration */
      Attribute a = Attributes.getOutputAttribute(0);

      int tipo = a.getType();

      claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosTest[i], nClases);
      if (tipo != Attribute.NOMINAL) {
        instanciasIN[i] = new String(String.valueOf(clasesTest[i]));
        instanciasOUT[i] = new String(String.valueOf(claseObt));
      } else {
        instanciasIN[i] = new String(a.getNominalValue(clasesTest[i]));
        instanciasOUT[i] = new String(a.getNominalValue(claseObt));
      }
    }

    escribeSalida(
        ficheroSalida[1], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation);

    // Print the network to a file
    printNetworkToFile(ficheroSalida[2], referencia.getHeader());
  }
예제 #20
0
파일: SGA.java 프로젝트: Navieclipse/KEEL
  /** Executes the algorithm */
  public void ejecutar() {

    int i, j, l;
    int nClases;
    double conjS[][];
    double conjR[][];
    int conjN[][];
    boolean conjM[][];
    int clasesS[];
    int nSel = 0;
    Cromosoma poblacion[];
    int ev = 0;
    double prob[];
    double NUmax = 1.5;
    double NUmin = 0.5; // used for lineal ranking
    double aux;
    double pos1, pos2;
    int sel1, sel2, comp1, comp2;
    Cromosoma newPob[];

    long tiempo = System.currentTimeMillis();

    /*Getting the number of different clases*/
    nClases = 0;
    for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i];
    nClases++;

    /*Random inicialization of the population*/
    Randomize.setSeed(semilla);
    poblacion = new Cromosoma[tamPoblacion];
    for (i = 0; i < tamPoblacion; i++) poblacion[i] = new Cromosoma(datosTrain.length);

    /*Initial evaluation of the population*/
    for (i = 0; i < tamPoblacion; i++)
      poblacion[i].evalua(
          datosTrain,
          realTrain,
          nominalTrain,
          nulosTrain,
          clasesTrain,
          alfa,
          kNeigh,
          nClases,
          distanceEu);

    if (torneo) {
      while (ev < nEval) {
        newPob = new Cromosoma[2];

        /*Binary tournament selection*/
        comp1 = Randomize.Randint(0, tamPoblacion - 1);
        do {
          comp2 = Randomize.Randint(0, tamPoblacion - 1);
        } while (comp2 == comp1);
        if (poblacion[comp1].getCalidad() > poblacion[comp2].getCalidad()) sel1 = comp1;
        else sel1 = comp2;
        comp1 = Randomize.Randint(0, tamPoblacion - 1);
        do {
          comp2 = Randomize.Randint(0, tamPoblacion - 1);
        } while (comp2 == comp1);
        if (poblacion[comp1].getCalidad() > poblacion[comp2].getCalidad()) sel2 = comp1;
        else sel2 = comp2;

        if (Randomize.Rand() < pCruce) { // there is cross
          crucePMX(poblacion, newPob, sel1, sel2);
        } else { // there is not cross
          newPob[0] = new Cromosoma(datosTrain.length, poblacion[sel1]);
          newPob[1] = new Cromosoma(datosTrain.length, poblacion[sel2]);
        }

        /*Mutation of the cromosomes*/
        for (i = 0; i < 2; i++) newPob[i].mutacion(pMutacion1to0, pMutacion0to1);

        /*Evaluation of the population*/
        for (i = 0; i < 2; i++)
          if (!(newPob[i].estaEvaluado())) {
            newPob[i].evalua(
                datosTrain,
                realTrain,
                nominalTrain,
                nulosTrain,
                clasesTrain,
                alfa,
                kNeigh,
                nClases,
                distanceEu);
            ev++;
          }

        /*Replace the two worst*/
        Arrays.sort(poblacion);
        poblacion[tamPoblacion - 2] = new Cromosoma(datosTrain.length, newPob[0]);
        poblacion[tamPoblacion - 1] = new Cromosoma(datosTrain.length, newPob[1]);
      }
    } else {
      /*Get the probabilities of lineal ranking in case of not use binary tournament*/
      prob = new double[tamPoblacion];
      for (i = 0; i < tamPoblacion; i++) {
        aux = (double) (NUmax - NUmin) * ((double) i / (tamPoblacion - 1));
        prob[i] = (double) (1.0 / (tamPoblacion)) * (NUmax - aux);
      }
      for (i = 1; i < tamPoblacion; i++) prob[i] = prob[i] + prob[i - 1];

      while (ev < nEval) {
        /*Sort the population by quality criterion*/
        Arrays.sort(poblacion);

        newPob = new Cromosoma[2];
        pos1 = Randomize.Rand();
        pos2 = Randomize.Rand();
        for (j = 0; j < tamPoblacion && prob[j] < pos1; j++) ;
        sel1 = j;
        for (j = 0; j < tamPoblacion && prob[j] < pos2; j++) ;
        sel2 = j;

        if (Randomize.Rand() < pCruce) { // there is cross
          crucePMX(poblacion, newPob, sel1, sel2);
        } else { // there is not cross
          newPob[0] = new Cromosoma(datosTrain.length, poblacion[sel1]);
          newPob[1] = new Cromosoma(datosTrain.length, poblacion[sel2]);
        }

        /*Mutation of the cromosomes*/
        for (i = 0; i < 2; i++) newPob[i].mutacion(pMutacion1to0, pMutacion0to1);

        /*Evaluation of the population*/
        for (i = 0; i < 2; i++)
          if (!(newPob[i].estaEvaluado())) {
            newPob[i].evalua(
                datosTrain,
                realTrain,
                nominalTrain,
                nulosTrain,
                clasesTrain,
                alfa,
                kNeigh,
                nClases,
                distanceEu);
            ev++;
          }

        /*Replace the two worst*/
        poblacion[tamPoblacion - 2] = new Cromosoma(datosTrain.length, newPob[0]);
        poblacion[tamPoblacion - 1] = new Cromosoma(datosTrain.length, newPob[1]);
      }
    }

    nSel = poblacion[0].genesActivos();

    /*Building of S set from the best cromosome obtained*/
    conjS = new double[nSel][datosTrain[0].length];
    conjR = new double[nSel][datosTrain[0].length];
    conjN = new int[nSel][datosTrain[0].length];
    conjM = new boolean[nSel][datosTrain[0].length];
    clasesS = new int[nSel];
    for (i = 0, l = 0; i < datosTrain.length; i++) {
      if (poblacion[0].getGen(i)) { // the instance must be copied to the solution
        for (j = 0; j < datosTrain[0].length; j++) {
          conjS[l][j] = datosTrain[i][j];
          conjR[l][j] = realTrain[i][j];
          conjN[l][j] = nominalTrain[i][j];
          conjM[l][j] = nulosTrain[i][j];
        }
        clasesS[l] = clasesTrain[i];
        l++;
      }
    }

    System.out.println(
        "SGA " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s");

    OutputIS.escribeSalida(
        ficheroSalida[0], conjR, conjN, conjM, clasesS, entradas, salida, nEntradas, relation);
    OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation);
  } // end-method