예제 #1
0
  private void generateModel() {
    String salida = new String("");
    double max_auc = 0;
    ArrayList<String> solutions = this.getAllSolutions();
    models = new ArrayList<Farchd>();

    int nEjemplos = train.getnData();
    if (this.instances == this.MAJ) {
      nEjemplos = train.getMajority();
    }
    boolean[] variables = new boolean[train.getnInputs()];
    boolean[] ejemplos = new boolean[nEjemplos];
    this.weightsAUC = new double[solutions.size() / 2]; // Hay 2 soluciones FS e IS

    for (int i = 0, j = 0; i < solutions.size(); i += 2, j++) {
      int vars, ejs;
      vars = ejs = 0;
      variables = decode(solutions.get(i));
      ejemplos = decode(solutions.get(i + 1));
      for (int l = 0; l < variables.length; l++) {
        // variables[j] = solution[j];
        if (variables[l]) vars++;
      }
      for (int l = 0; l < ejemplos.length; l++) {
        if (ejemplos[l]) ejs++;
      }
      try {
        Farchd model = new Farchd(train, val, test, variables, ejemplos);

        /** ******** */
        // double fit = model.getAUCTr();
        double auc_tr = model.execute(true);
        double auc_tst = model.getAUCTst();
        if (auc_tr > max_auc) {
          max_auc = auc_tr;
          indexBest = j;
        }
        this.weightsAUC[j] = auc_tr;

        salida +=
            "Solution[" + j + "]:\t" + vars + "\t" + ejs + "\t" + auc_tr + "\t" + auc_tst + "\n";

        /** ******** */
        models.add(model);
      } catch (Exception e) {
        System.err.println("Liada maxima al generar modelo ");
        e.printStackTrace(System.err);
        System.exit(-1);
      }
    }
    System.out.print(salida);
    Files.writeFile(header + "_AUC.txt", salida);
  }
예제 #2
0
  /**
   * Process a dataset file for a clustering problem.
   *
   * @param nfexamples Name of the dataset file
   * @param train The dataset file is for training or for test
   * @throws java.io.IOException if there is any semantical, lexical or sintactical error in the
   *     input file.
   */
  public void processClusterDataset(String nfexamples, boolean train) throws IOException {

    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(nfexamples, train);

      nData = IS.getNumInstances();
      nInputs = Attributes.getInputNumAttributes();
      nVariables = nInputs + Attributes.getOutputNumAttributes();

      if (Attributes.getOutputNumAttributes() != 0) {
        System.out.println("This algorithm can not process datasets with outputs");
        System.out.println("All outputs will be removed");
      }

      // Initialize and fill our own tables
      X = new double[nData][nInputs];
      missing = new boolean[nData][nInputs];

      // Maximum and minimum of inputs
      iMaximum = new double[nInputs];
      iMinimum = new double[nInputs];

      // Maximum and minimum for output data
      oMaximum = 0;
      oMinimum = 0;

      // All values are casted into double/integer
      nClasses = 0;
      for (int i = 0; i < X.length; i++) {
        Instance inst = IS.getInstance(i);
        for (int j = 0; j < nInputs; j++) {
          X[i][j] = IS.getInputNumericValue(i, j);
          missing[i][j] = inst.getInputMissingValues(j);
          if (X[i][j] > iMaximum[j] || i == 0) {
            iMaximum[j] = X[i][j];
          }
          if (X[i][j] < iMinimum[j] || i == 0) {
            iMinimum[j] = X[i][j];
          }
        }
      }

    } catch (Exception e) {
      System.out.println("DBG: Exception in readSet");
      e.printStackTrace();
    }
  }
예제 #3
0
  /** It launches the algorithm */
  public void execute() {
    if (this.somethingWrong) { // We do not execute the program
      System.err.println("An error was found, either the data-set has missing values.");
      System.err.println(
          "Please remove the examples with missing data or apply a MV preprocessing.");
      System.err.println("Aborting the program");
      // We should not use the statement: System.exit(-1);
    } else {
      // We do here the algorithm's operations

      int nClasses = train.getnClasses();
      aprioriClassDistribution = new double[nClasses];
      for (int i = 0; i < nClasses; i++) {
        aprioriClassDistribution[i] = 1.0 * val.numberInstances(i) / val.size();
      }

      if (model) { // el modelo no esta generado en fichero previamente
        NSGA2 search =
            new NSGA2(
                train, seed, populationSize, maxTrials, crossover, mutation, instances, fitness);
        try {
          search.execute();
        } catch (Exception e) {
          e.printStackTrace(System.err);
        }
      }

      // Finally we should fill the training and test output files

      this.generateModel();

      double avgTr = this.doOutput(val, this.outputTr, false);
      double aucTr = getAUC(val);
      double avgTst = this.doOutput(test, this.outputTst, true);
      double aucTst = getAUC(test);
      System.out.print("AUC Train: " + aucTr);
      System.out.println("; AvgAcc Train: " + avgTr);
      System.out.print("AUC Test: " + aucTst);
      System.out.println("; AvgAcc Test: " + avgTst);

      totalTime = System.currentTimeMillis() - startTime;
      System.out.println("Algorithm Finished: " + totalTime);
    }
  }
예제 #4
0
  /**
   * This private method extract the dataset and the method's parameters from the KEEL environment,
   * calculates the centroids using the KMeans class and print out the results with the validation
   * dataset.
   *
   * @param tty unused boolean parameter, kept for compatibility
   * @param pc ProcessConfig object to obtain the train and test datasets and the method's
   *     parameters.
   */
  private void clustering_kmeans(boolean tty, ProcessConfig pc) {

    try {

      String linea;
      ProcessDataset pd = new ProcessDataset();

      linea = (String) pc.parInputData.get(ProcessConfig.IndexTrain);

      if (pc.parNewFormat) pd.processClusterDataset(linea, true);
      else pd.procesa_clustering_old(linea);

      int ndatos = pd.getNdata(); // Number of examples
      int nvariables = pd.getNvariables(); // Number of variables
      int nentradas = pd.getNinputs(); // Number of inputs
      pd.showDatasetStatistics();

      System.out.println("Number of examples=" + ndatos);
      System.out.println("Number of inputs=" + nentradas);

      double[][] X = pd.getX(); // Input data

      double[] emaximo = pd.getImaximum(); // Maximum and Minimum for input data
      double[] eminimo = pd.getIminimum();
      int[] neparticion = new int[nentradas];

      int s;
      s = pc.parNClusters;

      KMeans KM = new KMeans(X, s, rand);
      double fallos = 0;
      try {
        for (int i = 0; i < X.length; i++) {
          int clase = KM.nearestCentroid(X[i]);
          // System.out.println("pattern="+i+" cluster="+clase);
        }
      } catch (Exception e) {
        System.out.println(e.toString());
      }

      // Clusters in the test set
      ProcessDataset pdt = new ProcessDataset();
      int nprueba, npentradas, npvariables;
      linea = (String) pc.parInputData.get(ProcessConfig.IndexTestKMeans);

      if (pc.parNewFormat) pdt.processClusterDataset(linea, false);
      else pdt.procesa_clustering_old(linea);

      nprueba = pdt.getNdata();
      npvariables = pdt.getNvariables();
      npentradas = pdt.getNinputs();
      pdt.showDatasetStatistics();

      if (npentradas != nentradas) throw new IOException("Error in test file");

      double[][] Xp = pdt.getX();
      int[] Co = new int[Xp.length];

      // Test set is classified
      try {
        for (int i = 0; i < Xp.length; i++) {
          Co[i] = KM.nearestCentroid(Xp[i]);
          // System.out.println("pattern test="+i+" cluster="+Co[i]);
        }

      } catch (Exception e) {
        System.out.println(e.toString());
      }

      // Output format for clustering algorithms
      pc.results(Xp, Co);
      KM.print();

    } catch (FileNotFoundException e) {
      System.err.println(e + " Training data not found");
    } catch (IOException e) {
      System.err.println(e + " Read error");
    }
  }
예제 #5
0
  /**
   * Process a dataset file for a classification problem.
   *
   * @param nfejemplos Name of the dataset file
   * @param train The dataset file is for training or for test
   * @throws java.io.IOException if there is any semantical, lexical or sintactical error in the
   *     input file.
   */
  public void processClassifierDataset(String nfejemplos, boolean train) throws IOException {

    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(nfejemplos, train);

      nData = IS.getNumInstances();
      nInputs = Attributes.getInputNumAttributes();
      nVariables = nInputs + Attributes.getOutputNumAttributes();

      // Check that there is only one output variable and
      // it is nominal

      if (Attributes.getOutputNumAttributes() > 1) {
        System.out.println("This algorithm can not process MIMO datasets");
        System.out.println("All outputs but the first one will be removed");
      }

      boolean noOutputs = false;
      if (Attributes.getOutputNumAttributes() < 1) {
        System.out.println("This algorithm can not process datasets without outputs");
        System.out.println("Zero-valued output generated");
        noOutputs = true;
      }

      // Initialize and fill our own tables
      X = new double[nData][nInputs];
      missing = new boolean[nData][nInputs];
      C = new int[nData];

      // Maximum and minimum of inputs
      iMaximum = new double[nInputs];
      iMinimum = new double[nInputs];

      // Maximum and minimum for output data
      oMaximum = 0;
      oMinimum = 0;

      // All values are casted into double/integer
      nClasses = 0;
      for (int i = 0; i < X.length; i++) {
        Instance inst = IS.getInstance(i);
        for (int j = 0; j < nInputs; j++) {
          X[i][j] = IS.getInputNumericValue(i, j);
          missing[i][j] = inst.getInputMissingValues(j);
          if (X[i][j] > iMaximum[j] || i == 0) {
            iMaximum[j] = X[i][j];
          }
          if (X[i][j] < iMinimum[j] || i == 0) {
            iMinimum[j] = X[i][j];
          }
        }

        if (noOutputs) {
          C[i] = 0;
        } else {
          C[i] = (int) IS.getOutputNumericValue(i, 0);
        }
        if (C[i] > nClasses) {
          nClasses = C[i];
        }
      }
      nClasses++;
      System.out.println("Number of classes=" + nClasses);

    } catch (Exception e) {
      System.out.println("DBG: Exception in readSet");
      e.printStackTrace();
    }
  }
예제 #6
0
  /**
   * Main Function
   *
   * @param args the Command line arguments. Only one is processed: the name of the file containing
   *     the
   *     <p>parameters
   */
  public static void main(String[] args) throws IOException {

    double[][] X;

    double[][] Y;

    int nInpt, nOutpl, ndata, i, j;

    Rbfn net;

    try {

      // Help required

      if (args.length > 0) {

        if (args[0].equals("--help")
            || args[0].equals("-help")
            || args[0].equals("-h")
            || args[0].equals("-?")) {

          doHelp();

          return;
        }
      }

      System.out.println("- Executing doRbfnDec " + args.length);

      // Reading parameters

      String paramFile = (args.length > 0) ? args[0] : "parameters.txt";

      setParameters(paramFile);

      System.out.println("    - Parameters file: " + paramFile);

      // Random generator setup

      if (reallySeed) {
        Randomize.setSeed((long) seed);
      }

      // Reading Training dataset

      ProcDataset Dtrn = new ProcDataset(trnFile, true);

      // Training
      System.out.println("Modeling Dataset");
      Dtrn.processModelDataset();
      nInpt = Dtrn.getninputs();
      nOutpl = 1; // PD.getnvariables()-nInpt;
      ndata = Dtrn.getndata();
      Y = new double[ndata][1];
      X = Dtrn.getX();
      double[] auxY;
      auxY = Dtrn.getY();
      for (i = 0; i < ndata; i++) Y[i][0] = auxY[i];
      // Building and training the net
      net = new Rbfn(nNeuronsIni, X, ndata, nInpt, nOutpl);
      net.decremental(X, Y, ndata, percent, alfa);
      double[] obtained = new double[ndata];
      net.testModeling(X, ndata, obtained);
      Dtrn.generateResultsModeling(outTrnFile, auxY, obtained);
      // TEST
      ProcDataset Dtst = new ProcDataset(tstFile, false);
      Dtst.processModelDataset();
      nInpt = Dtst.getninputs();
      nOutpl = 1; // PD.getnvariables()-nInpt;
      ndata = Dtst.getndata();
      X = Dtst.getX();
      auxY = Dtst.getY();
      Y = new double[ndata][1];
      for (i = 0; i < ndata; i++) Y[i][0] = auxY[i];
      obtained = new double[ndata];
      net.testModeling(X, ndata, obtained);
      Dtst.generateResultsModeling(outTstFile, auxY, obtained);
      RBFUtils.createOutputFile(trnFile, outRbfFile);
      net.printRbfn(outRbfFile);

      if (Dtrn.datasetType() == 2) System.out.println("This is not a clustering algorithm");

      System.out.println(
          "- End of doRbfnDec. See results in output files named according to "
              + paramFile
              + " parameters file.");

    } catch (Exception e) {

      throw new InternalError(e.toString());
    }
  }
예제 #7
0
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    // declarations
    double[] outputs;
    double[] outputs2;
    Instance neighbor;
    double dist, mean;
    int actual;
    Randomize rnd = new Randomize();
    Instance ex;
    gCenter kmeans = null;
    int iterations = 0;
    double E;
    double prevE;
    int totalMissing = 0;
    boolean allMissing = true;

    rnd.setSeed(semilla);
    // PROCESS
    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(input_train_name, true);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][nvariables]; // matrix with transformed data
      kmeans = new gCenter(K, ndatos, nvariables);

      timesSeen = new FreqList[nvariables];
      mostCommon = new String[nvariables];

      // first, we choose k 'means' randomly from all
      // instances
      totalMissing = 0;
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);
        if (inst.existsAnyMissingValue()) totalMissing++;
      }
      if (totalMissing == ndatos) allMissing = true;
      else allMissing = false;
      for (int numMeans = 0; numMeans < K; numMeans++) {
        do {
          actual = (int) (ndatos * rnd.Rand());
          ex = IS.getInstance(actual);
        } while (ex.existsAnyMissingValue() && !allMissing);

        kmeans.copyCenter(ex, numMeans);
      }

      // now, iterate adjusting clusters' centers and
      // instances to them
      prevE = 0;
      iterations = 0;
      do {
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          kmeans.setClusterOf(inst, i);
        }
        // set new centers
        kmeans.recalculateCenters(IS);
        // compute RMSE
        E = 0;
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          E += kmeans.distance(inst, kmeans.getClusterOf(i));
        }
        iterations++;
        // System.out.println(iterations+"\t"+E);
        if (Math.abs(prevE - E) == 0) iterations = maxIter;
        else prevE = E;
      } while (E > minError && iterations < maxIter);
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);

        in = 0;
        out = 0;

        for (int j = 0; j < nvariables; j++) {
          Attribute a = Attributes.getAttribute(j);

          direccion = a.getDirectionAttribute();
          tipo = a.getType();

          if (direccion == Attribute.INPUT) {
            if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
              X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
            } else {
              if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
              else {
                actual = kmeans.getClusterOf(i);
                X[i][j] = new String(kmeans.valueAt(actual, j));
              }
            }
            in++;
          } else {
            if (direccion == Attribute.OUTPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
              } else {
                if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              out++;
            }
          }
        }
      }
    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    write_results(output_train_name);
    /** ************************************************************************************ */
    // does a test file associated exist?
    if (input_train_name.compareTo(input_test_name) != 0) {
      try {

        // Load in memory a dataset that contains a classification problem
        IStest.readSet(input_test_name, false);
        int in = 0;
        int out = 0;

        ndatos = IStest.getNumInstances();
        nvariables = Attributes.getNumAttributes();
        nentradas = Attributes.getInputNumAttributes();
        nsalidas = Attributes.getOutputNumAttributes();

        for (int i = 0; i < ndatos; i++) {
          Instance inst = IStest.getInstance(i);

          in = 0;
          out = 0;

          for (int j = 0; j < nvariables; j++) {
            Attribute a = Attributes.getAttribute(j);

            direccion = a.getDirectionAttribute();
            tipo = a.getType();

            if (direccion == Attribute.INPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
                X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
              } else {
                if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              in++;
            } else {
              if (direccion == Attribute.OUTPUT) {
                if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                  X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
                } else {
                  if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                  else {
                    actual = kmeans.getClusterOf(i);
                    X[i][j] = new String(kmeans.valueAt(actual, j));
                  }
                }
                out++;
              }
            }
          }
        }
      } catch (Exception e) {
        System.out.println("Dataset exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
      write_results(output_test_name);
    }
  }