Exemplo n.º 1
0
  private void generateModel() {
    String salida = new String("");
    double max_auc = 0;
    ArrayList<String> solutions = this.getAllSolutions();
    models = new ArrayList<Farchd>();

    int nEjemplos = train.getnData();
    if (this.instances == this.MAJ) {
      nEjemplos = train.getMajority();
    }
    boolean[] variables = new boolean[train.getnInputs()];
    boolean[] ejemplos = new boolean[nEjemplos];
    this.weightsAUC = new double[solutions.size() / 2]; // Hay 2 soluciones FS e IS

    for (int i = 0, j = 0; i < solutions.size(); i += 2, j++) {
      int vars, ejs;
      vars = ejs = 0;
      variables = decode(solutions.get(i));
      ejemplos = decode(solutions.get(i + 1));
      for (int l = 0; l < variables.length; l++) {
        // variables[j] = solution[j];
        if (variables[l]) vars++;
      }
      for (int l = 0; l < ejemplos.length; l++) {
        if (ejemplos[l]) ejs++;
      }
      try {
        Farchd model = new Farchd(train, val, test, variables, ejemplos);

        /** ******** */
        // double fit = model.getAUCTr();
        double auc_tr = model.execute(true);
        double auc_tst = model.getAUCTst();
        if (auc_tr > max_auc) {
          max_auc = auc_tr;
          indexBest = j;
        }
        this.weightsAUC[j] = auc_tr;

        salida +=
            "Solution[" + j + "]:\t" + vars + "\t" + ejs + "\t" + auc_tr + "\t" + auc_tst + "\n";

        /** ******** */
        models.add(model);
      } catch (Exception e) {
        System.err.println("Liada maxima al generar modelo ");
        e.printStackTrace(System.err);
        System.exit(-1);
      }
    }
    System.out.print(salida);
    Files.writeFile(header + "_AUC.txt", salida);
  }
Exemplo n.º 2
0
  /**
   * Process a dataset file for a clustering problem.
   *
   * @param nfexamples Name of the dataset file
   * @param train The dataset file is for training or for test
   * @throws java.io.IOException if there is any semantical, lexical or sintactical error in the
   *     input file.
   */
  public void processClusterDataset(String nfexamples, boolean train) throws IOException {

    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(nfexamples, train);

      nData = IS.getNumInstances();
      nInputs = Attributes.getInputNumAttributes();
      nVariables = nInputs + Attributes.getOutputNumAttributes();

      if (Attributes.getOutputNumAttributes() != 0) {
        System.out.println("This algorithm can not process datasets with outputs");
        System.out.println("All outputs will be removed");
      }

      // Initialize and fill our own tables
      X = new double[nData][nInputs];
      missing = new boolean[nData][nInputs];

      // Maximum and minimum of inputs
      iMaximum = new double[nInputs];
      iMinimum = new double[nInputs];

      // Maximum and minimum for output data
      oMaximum = 0;
      oMinimum = 0;

      // All values are casted into double/integer
      nClasses = 0;
      for (int i = 0; i < X.length; i++) {
        Instance inst = IS.getInstance(i);
        for (int j = 0; j < nInputs; j++) {
          X[i][j] = IS.getInputNumericValue(i, j);
          missing[i][j] = inst.getInputMissingValues(j);
          if (X[i][j] > iMaximum[j] || i == 0) {
            iMaximum[j] = X[i][j];
          }
          if (X[i][j] < iMinimum[j] || i == 0) {
            iMinimum[j] = X[i][j];
          }
        }
      }

    } catch (Exception e) {
      System.out.println("DBG: Exception in readSet");
      e.printStackTrace();
    }
  }
Exemplo n.º 3
0
  /** It launches the algorithm */
  public void execute() {
    if (this.somethingWrong) { // We do not execute the program
      System.err.println("An error was found, either the data-set has missing values.");
      System.err.println(
          "Please remove the examples with missing data or apply a MV preprocessing.");
      System.err.println("Aborting the program");
      // We should not use the statement: System.exit(-1);
    } else {
      // We do here the algorithm's operations

      int nClasses = train.getnClasses();
      aprioriClassDistribution = new double[nClasses];
      for (int i = 0; i < nClasses; i++) {
        aprioriClassDistribution[i] = 1.0 * val.numberInstances(i) / val.size();
      }

      if (model) { // el modelo no esta generado en fichero previamente
        NSGA2 search =
            new NSGA2(
                train, seed, populationSize, maxTrials, crossover, mutation, instances, fitness);
        try {
          search.execute();
        } catch (Exception e) {
          e.printStackTrace(System.err);
        }
      }

      // Finally we should fill the training and test output files

      this.generateModel();

      double avgTr = this.doOutput(val, this.outputTr, false);
      double aucTr = getAUC(val);
      double avgTst = this.doOutput(test, this.outputTst, true);
      double aucTst = getAUC(test);
      System.out.print("AUC Train: " + aucTr);
      System.out.println("; AvgAcc Train: " + avgTr);
      System.out.print("AUC Test: " + aucTst);
      System.out.println("; AvgAcc Test: " + avgTst);

      totalTime = System.currentTimeMillis() - startTime;
      System.out.println("Algorithm Finished: " + totalTime);
    }
  }
Exemplo n.º 4
0
  /**
   * Process a dataset file for a classification problem.
   *
   * @param nfejemplos Name of the dataset file
   * @param train The dataset file is for training or for test
   * @throws java.io.IOException if there is any semantical, lexical or sintactical error in the
   *     input file.
   */
  public void processClassifierDataset(String nfejemplos, boolean train) throws IOException {

    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(nfejemplos, train);

      nData = IS.getNumInstances();
      nInputs = Attributes.getInputNumAttributes();
      nVariables = nInputs + Attributes.getOutputNumAttributes();

      // Check that there is only one output variable and
      // it is nominal

      if (Attributes.getOutputNumAttributes() > 1) {
        System.out.println("This algorithm can not process MIMO datasets");
        System.out.println("All outputs but the first one will be removed");
      }

      boolean noOutputs = false;
      if (Attributes.getOutputNumAttributes() < 1) {
        System.out.println("This algorithm can not process datasets without outputs");
        System.out.println("Zero-valued output generated");
        noOutputs = true;
      }

      // Initialize and fill our own tables
      X = new double[nData][nInputs];
      missing = new boolean[nData][nInputs];
      C = new int[nData];

      // Maximum and minimum of inputs
      iMaximum = new double[nInputs];
      iMinimum = new double[nInputs];

      // Maximum and minimum for output data
      oMaximum = 0;
      oMinimum = 0;

      // All values are casted into double/integer
      nClasses = 0;
      for (int i = 0; i < X.length; i++) {
        Instance inst = IS.getInstance(i);
        for (int j = 0; j < nInputs; j++) {
          X[i][j] = IS.getInputNumericValue(i, j);
          missing[i][j] = inst.getInputMissingValues(j);
          if (X[i][j] > iMaximum[j] || i == 0) {
            iMaximum[j] = X[i][j];
          }
          if (X[i][j] < iMinimum[j] || i == 0) {
            iMinimum[j] = X[i][j];
          }
        }

        if (noOutputs) {
          C[i] = 0;
        } else {
          C[i] = (int) IS.getOutputNumericValue(i, 0);
        }
        if (C[i] > nClasses) {
          nClasses = C[i];
        }
      }
      nClasses++;
      System.out.println("Number of classes=" + nClasses);

    } catch (Exception e) {
      System.out.println("DBG: Exception in readSet");
      e.printStackTrace();
    }
  }
Exemplo n.º 5
0
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    // declarations
    double[] outputs;
    double[] outputs2;
    Instance neighbor;
    double dist, mean;
    int actual;
    Randomize rnd = new Randomize();
    Instance ex;
    gCenter kmeans = null;
    int iterations = 0;
    double E;
    double prevE;
    int totalMissing = 0;
    boolean allMissing = true;

    rnd.setSeed(semilla);
    // PROCESS
    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(input_train_name, true);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][nvariables]; // matrix with transformed data
      kmeans = new gCenter(K, ndatos, nvariables);

      timesSeen = new FreqList[nvariables];
      mostCommon = new String[nvariables];

      // first, we choose k 'means' randomly from all
      // instances
      totalMissing = 0;
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);
        if (inst.existsAnyMissingValue()) totalMissing++;
      }
      if (totalMissing == ndatos) allMissing = true;
      else allMissing = false;
      for (int numMeans = 0; numMeans < K; numMeans++) {
        do {
          actual = (int) (ndatos * rnd.Rand());
          ex = IS.getInstance(actual);
        } while (ex.existsAnyMissingValue() && !allMissing);

        kmeans.copyCenter(ex, numMeans);
      }

      // now, iterate adjusting clusters' centers and
      // instances to them
      prevE = 0;
      iterations = 0;
      do {
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          kmeans.setClusterOf(inst, i);
        }
        // set new centers
        kmeans.recalculateCenters(IS);
        // compute RMSE
        E = 0;
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          E += kmeans.distance(inst, kmeans.getClusterOf(i));
        }
        iterations++;
        // System.out.println(iterations+"\t"+E);
        if (Math.abs(prevE - E) == 0) iterations = maxIter;
        else prevE = E;
      } while (E > minError && iterations < maxIter);
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);

        in = 0;
        out = 0;

        for (int j = 0; j < nvariables; j++) {
          Attribute a = Attributes.getAttribute(j);

          direccion = a.getDirectionAttribute();
          tipo = a.getType();

          if (direccion == Attribute.INPUT) {
            if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
              X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
            } else {
              if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
              else {
                actual = kmeans.getClusterOf(i);
                X[i][j] = new String(kmeans.valueAt(actual, j));
              }
            }
            in++;
          } else {
            if (direccion == Attribute.OUTPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
              } else {
                if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              out++;
            }
          }
        }
      }
    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    write_results(output_train_name);
    /** ************************************************************************************ */
    // does a test file associated exist?
    if (input_train_name.compareTo(input_test_name) != 0) {
      try {

        // Load in memory a dataset that contains a classification problem
        IStest.readSet(input_test_name, false);
        int in = 0;
        int out = 0;

        ndatos = IStest.getNumInstances();
        nvariables = Attributes.getNumAttributes();
        nentradas = Attributes.getInputNumAttributes();
        nsalidas = Attributes.getOutputNumAttributes();

        for (int i = 0; i < ndatos; i++) {
          Instance inst = IStest.getInstance(i);

          in = 0;
          out = 0;

          for (int j = 0; j < nvariables; j++) {
            Attribute a = Attributes.getAttribute(j);

            direccion = a.getDirectionAttribute();
            tipo = a.getType();

            if (direccion == Attribute.INPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
                X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
              } else {
                if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              in++;
            } else {
              if (direccion == Attribute.OUTPUT) {
                if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                  X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
                } else {
                  if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                  else {
                    actual = kmeans.getClusterOf(i);
                    X[i][j] = new String(kmeans.valueAt(actual, j));
                  }
                }
                out++;
              }
            }
          }
        }
      } catch (Exception e) {
        System.out.println("Dataset exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
      write_results(output_test_name);
    }
  }