예제 #1
0
  /**
   * This private method extract the dataset and the method's parameters from the KEEL environment,
   * calculates the centroids using the KMeans class and print out the results with the validation
   * dataset.
   *
   * @param tty unused boolean parameter, kept for compatibility
   * @param pc ProcessConfig object to obtain the train and test datasets and the method's
   *     parameters.
   */
  private void clustering_kmeans(boolean tty, ProcessConfig pc) {

    try {

      String linea;
      ProcessDataset pd = new ProcessDataset();

      linea = (String) pc.parInputData.get(ProcessConfig.IndexTrain);

      if (pc.parNewFormat) pd.processClusterDataset(linea, true);
      else pd.procesa_clustering_old(linea);

      int ndatos = pd.getNdata(); // Number of examples
      int nvariables = pd.getNvariables(); // Number of variables
      int nentradas = pd.getNinputs(); // Number of inputs
      pd.showDatasetStatistics();

      System.out.println("Number of examples=" + ndatos);
      System.out.println("Number of inputs=" + nentradas);

      double[][] X = pd.getX(); // Input data

      double[] emaximo = pd.getImaximum(); // Maximum and Minimum for input data
      double[] eminimo = pd.getIminimum();
      int[] neparticion = new int[nentradas];

      int s;
      s = pc.parNClusters;

      KMeans KM = new KMeans(X, s, rand);
      double fallos = 0;
      try {
        for (int i = 0; i < X.length; i++) {
          int clase = KM.nearestCentroid(X[i]);
          // System.out.println("pattern="+i+" cluster="+clase);
        }
      } catch (Exception e) {
        System.out.println(e.toString());
      }

      // Clusters in the test set
      ProcessDataset pdt = new ProcessDataset();
      int nprueba, npentradas, npvariables;
      linea = (String) pc.parInputData.get(ProcessConfig.IndexTestKMeans);

      if (pc.parNewFormat) pdt.processClusterDataset(linea, false);
      else pdt.procesa_clustering_old(linea);

      nprueba = pdt.getNdata();
      npvariables = pdt.getNvariables();
      npentradas = pdt.getNinputs();
      pdt.showDatasetStatistics();

      if (npentradas != nentradas) throw new IOException("Error in test file");

      double[][] Xp = pdt.getX();
      int[] Co = new int[Xp.length];

      // Test set is classified
      try {
        for (int i = 0; i < Xp.length; i++) {
          Co[i] = KM.nearestCentroid(Xp[i]);
          // System.out.println("pattern test="+i+" cluster="+Co[i]);
        }

      } catch (Exception e) {
        System.out.println(e.toString());
      }

      // Output format for clustering algorithms
      pc.results(Xp, Co);
      KM.print();

    } catch (FileNotFoundException e) {
      System.err.println(e + " Training data not found");
    } catch (IOException e) {
      System.err.println(e + " Read error");
    }
  }
예제 #2
0
  /**
   *
   *
   * <pre>
   * This private static method extract the dataset and the method's parameters
   * from the KEEL environment, carries out with the partitioning of the
   * input and output spaces, learn the FRBS regression model --which is a
   * {@link PittsburghClassifier} instance-- using the   GP algorithm --which is an instance
   * of the GeneticAlgorithm class-- and prints out the results with the validation
   * dataset.
   *
   * If the parameter Steady is not fixed then the genetic algorithm used is the
   * {@link GeneticAlgorithmGenerational}. If that parameter is fixed then the GP
   * used is the {@link GeneticAlgorithmSteady}.
   * </pre>
   *
   * @param tty unused boolean parameter, kept for compatibility
   * @param pc ProcessConfig object to obtain the train and test datasets and the method's
   *     parameters.
   */
  private static void fuzzyPittsburghModelling(boolean tty, ProcessConfig pc) {

    try {

      String readALine = new String();
      int lOption = 0;

      int defaultNumberInputPartitions = 0;
      int numberOfCrossovers = 0;

      ProcessDataset pd = new ProcessDataset();

      readALine = (String) pc.parInputData.get(ProcessConfig.IndexTrain);

      if (pc.parNewFormat) pd.processModelDataset(readALine, true);
      else pd.oldClassificationProcess(readALine);

      int nData = pd.getNdata(); // Number of examples
      int nVariables = pd.getNvariables(); // Number of variables
      int nInputs = pd.getNinputs(); // Number of inputs

      double[][] X = pd.getX(); // Input data
      double[] Y = pd.getY(); // Output data
      double[] Yt = new double[Y.length];
      pd.showDatasetStatistics();

      double[] inputMaximum = pd.getImaximum(); // Maximum and Minimum for input data
      double[] inputMinimum = pd.getIminimum();

      double outputMaximum = pd.getOmaximum(); // Maximum and Minimum for output data
      double outputMinimum = pd.getOminimum();

      int[] nInputPartitions = new int[nInputs]; // Linguistic partition terms
      int nOutputPartitions;

      // Partitions definition
      // Check the number of rules
      int nrules = 1;
      FuzzyPartition[] inputPartitions = new FuzzyPartition[nInputs];
      for (int i = 0; i < nInputs; i++) {
        nInputPartitions[i] = pc.parPartitionLabelNum;
        inputPartitions[i] =
            new FuzzyPartition(inputMinimum[i], inputMaximum[i], nInputPartitions[i]);
        nrules *= nInputPartitions[i];
        if (nrules > MAXFUZZYRULES) break;
      }
      nOutputPartitions = pc.parPartitionLabelNum;
      FuzzyPartition outputPartitions =
          new FuzzyPartition(outputMinimum, outputMaximum, nOutputPartitions);
      System.out.println("Number of rules = " + nrules);

      if (nrules < MAXFUZZYRULES) {
        int lPopulation = pc.parPopSize;
        int localnPopulations = pc.parIslandNumber;

        boolean STEADY = pc.parSteady;

        int defuzzificationType = RuleBase.DEFUZCDM;

        // Rule base
        FuzzyModel sistema =
            new FuzzyModel(
                inputPartitions,
                outputPartitions,
                RuleBase.product,
                RuleBase.sum,
                defuzzificationType);

        // Genetic Algorithm Optimization
        PittsburghModel p = new PittsburghModel(sistema, pc.parFitnessType, rand);

        p.setExamples(X, Y);

        int nIterations = pc.parIterNumber;

        GeneticAlgorithm AG;
        int crossoverID = OperatorIdent.GENERICROSSOVER;
        int mutationID = OperatorIdent.GENERICMUTATION;

        int lTournament = 4;
        double mutacion = 0.05;
        double lmutationAmpl = 0.1;
        double migrationProb = 0.001;
        double localOptProb = 0.0;
        int localOptIterations = 0;
        lTournament = pc.parTourSize;
        mutacion = pc.parMutProb;
        lmutationAmpl = pc.parMutAmpl;
        migrationProb = pc.parMigProb;
        localOptProb = pc.parLoProb;
        localOptIterations = pc.parLoIterNumber;

        if (STEADY)
          AG =
              new GeneticAlgorithmSteady(
                  p,
                  lPopulation,
                  localnPopulations,
                  lTournament,
                  mutacion,
                  lmutationAmpl,
                  migrationProb,
                  localOptProb,
                  localOptIterations,
                  OperatorIdent.AMEBA,
                  rand,
                  crossoverID,
                  mutationID);
        else
          AG =
              new GeneticAlgorithmGenerational(
                  p,
                  lPopulation,
                  localnPopulations,
                  mutacion,
                  lmutationAmpl,
                  migrationProb,
                  localOptProb,
                  localOptIterations,
                  OperatorIdent.AMEBA,
                  rand,
                  crossoverID,
                  mutationID);

        p = (PittsburghModel) AG.evolve(nIterations);

        // Result is printed
        p.debug();
        pc.trainingResults(Y, p.getYo());
        System.out.println("RMS Train = " + p.fitness());

        ProcessDataset pdt = new ProcessDataset();
        int nTest, nTestInputs, nTestVariables;
        readALine = (String) pc.parInputData.get(ProcessConfig.IndexTest);

        if (pc.parNewFormat) pdt.processModelDataset(readALine, false);
        else pdt.oldClassificationProcess(readALine);

        nTest = pdt.getNdata();
        nTestVariables = pdt.getNvariables();
        nTestInputs = pdt.getNinputs();
        pdt.showDatasetStatistics();

        if (nTestInputs != nInputs) throw new IOException("IOERR Test file");

        double[][] Xp = pdt.getX();
        double[] Yp = pdt.getY();

        p.setExamples(Xp, Yp);
        System.out.println("RMS test = " + p.fitness());
        pc.results(Yp, p.getYo());

      } else {

        pc.trainingResults(Y, Yt);

        ProcessDataset pdt = new ProcessDataset();
        int nTest, nTestInputs, nTestVariables;
        readALine = (String) pc.parInputData.get(ProcessConfig.IndexTest);

        if (pc.parNewFormat) pdt.processModelDataset(readALine, false);
        else pdt.oldClassificationProcess(readALine);

        nTest = pdt.getNdata();
        nTestVariables = pdt.getNvariables();
        nTestInputs = pdt.getNinputs();
        pdt.showDatasetStatistics();

        if (nTestInputs != nInputs) throw new IOException("IOERR test file");

        double[][] Xp = pdt.getX();
        double[] Yp = pdt.getY();

        double[] Yo = new double[Yp.length];
        System.out.println("Generating constant output (0)");

        // Yo = 0

        pc.results(Yp, Yo);
      }

    } catch (FileNotFoundException e) {
      System.err.println(e + " Input file not found");
    } catch (IOException e) {
      System.err.println(e + " Read Error");
    } catch (invalidFitness e) {
      System.err.println(e);
    } catch (invalidCrossover e) {
      System.err.println(e);
    } catch (invalidMutation e) {
      System.err.println(e);
    } catch (invalidOptim e) {
      System.err.println(e);
    }
  }