Beispiel #1
0
 /**
  * Chi square distribution
  *
  * @param x Chi^2 value
  * @param n Degrees of freedom
  * @return P-value associated
  */
 private static double ChiSq(double x, int n) {
   if (n == 1 & x > 1000) {
     return 0;
   }
   if (x > 1000 | n > 1000) {
     double q = ChiSq((x - n) * (x - n) / (2 * n), 1) / 2;
     if (x > n) {
       return q;
     }
     {
       return 1 - q;
     }
   }
   double p = Math.exp(-0.5 * x);
   if ((n % 2) == 1) {
     p = p * Math.sqrt(2 * x / Math.PI);
   }
   double k = n;
   while (k >= 2) {
     p = p * x / k;
     k = k - 2;
   }
   double t = p;
   double a = n;
   while (t > 0.0000000001 * p) {
     a = a + 2;
     t = t * x / a;
     p = p + t;
   }
   return 1 - p;
 }
Beispiel #2
0
  /**
   * Claculates the upper bound for the Chi-Squared value of a rule.
   *
   * @param suppAnte the support for the antecedent of a rule.
   * @param suppCons the support for the consequent of a rule.
   * @return the Chi-Squared upper bound.
   */
  private double calcChiSquaredUpperBound(double suppAnte, double suppCons) {
    double term;

    // Test support for antecedent and confidence and choose minimum
    if (suppAnte < suppCons) term = Math.pow(suppAnte - ((suppAnte * suppCons) / numRecords), 2.0);
    else term = Math.pow(suppCons - ((suppAnte * suppCons) / numRecords), 2.0);

    // Determine e
    double eVlaue = calcWCSeValue(suppAnte, suppCons);

    // Rerturn upper bound
    return (term * eVlaue * numRecords);
  }
Beispiel #3
0
  /**
   * This private method computes the distance between an example in the dataset and a cluster
   * centroid. The distance is measure as the square root of the sum of the squares of the
   * differences between the example and the cetroid for all the dimensions.
   *
   * @param a The example in the dataset
   * @param b The culster centroid
   * @return The distance between a and b as a double precision float value.
   */
  private static double distance(double a[], double b[]) {

    // Euclid distance between two patterns
    double d = 0;
    for (int i = 0; i < a.length; i++) d += (a[i] - b[i]) * (a[i] - b[i]);
    return (double) Math.sqrt(d);
  }
Beispiel #4
0
 /**
  * Computes the ouput of a RBF
  *
  * @param _input Input vector
  * @return The ouput of a RBF
  */
 public double evaluationRbf(double[] _input) {
   double aux;
   aux = RBFUtils.euclidean(_input, centre);
   aux *= aux;
   aux /= (2.0 * radius * radius);
   return (Math.exp(-aux));
 }
Beispiel #5
0
  /**
   * It converts double to char by the gray's code
   *
   * @param ds the vector which is goint to change
   * @param length the size of the vector
   * @return the changed vector
   */
  private char[] StringRep(double[] ds, int length) {

    int i;
    double n;
    int pos;
    double INCREMENTO;
    char[] Cad_sal;
    Cad_sal = new char[Genes * BITS_GEN + 1];
    if (flag == 1) {
      tmpstring = new char[Genes * BITS_GEN];
      flag = 0;
    }

    pos = 0;
    for (i = 0; i < length; i++) {
      INCREMENTO = (Gene[i].max() - Gene[i].min()) / (Math.pow(2.0, (double) BITS_GEN) - 1.0);

      n = (((ds[i] - Gene[i].min()) / INCREMENTO) + 0.5);
      tmpstring = F.Itoc((int) n, BITS_GEN);

      F.Gray(tmpstring, Cad_sal, BITS_GEN, pos);
      pos += BITS_GEN;
    }
    return Cad_sal;
  }
Beispiel #6
0
  /**
   * Computes the euclidean distance between a neuron and a vector
   *
   * @param v A vector
   * @return A double with the euclidean distance
   */
  public double euclideaDist(double[] v) {
    int i;
    double aux = 0;

    for (i = 0; i < nInput; i++) aux += (v[i] - centre[i]) * (v[i] - centre[i]);
    return (Math.sqrt(aux));
  }
Beispiel #7
0
 /**
  * Fisher distribution
  *
  * @param f Fisher value
  * @param n1 N1 value
  * @param n2 N2 value
  * @return P-value associated
  */
 private static double FishF(double f, int n1, int n2) {
   double x = n2 / (n1 * f + n2);
   if ((n1 % 2) == 0) {
     return StatCom(1 - x, n2, n1 + n2 - 4, n2 - 2) * Math.pow(x, n2 / 2.0);
   }
   if ((n2 % 2) == 0) {
     return 1 - StatCom(x, n1, n1 + n2 - 4, n1 - 2) * Math.pow(1 - x, n1 / 2.0);
   }
   double th = Math.atan(Math.sqrt(n1 * f / (1.0 * n2)));
   double a = th / (Math.PI / 2.0);
   double sth = Math.sin(th);
   double cth = Math.cos(th);
   if (n2 > 1) {
     a = a + sth * cth * StatCom(cth * cth, 2, n2 - 3, -1) / (Math.PI / 2.0);
   }
   if (n1 == 1) {
     return 1 - a;
   }
   double c =
       4 * StatCom(sth * sth, n2 + 1, n1 + n2 - 4, n2 - 2) * sth * Math.pow(cth, n2) / Math.PI;
   if (n2 == 1) {
     return 1 - a + c / 2.0;
   }
   int k = 2;
   while (k <= (n2 - 1) / 2.0) {
     c = c * k / (k - .5);
     k = k + 1;
   }
   return 1 - a + c;
 }
Beispiel #8
0
  /**
   * Calculates the Chi squared values and returns their sum.
   *
   * @return the sum of the Chi Squared values.
   */
  private double calcChiSquaredValue() {
    double sumChiSquaredValues = 0.0;

    for (int index = 0; index < obsValues.length; index++) {
      double chiValue = Math.pow((obsValues[index] - expValues[index]), 2.0) / expValues[index];
      sumChiSquaredValues = sumChiSquaredValues + chiValue;
    }

    // Return
    return (sumChiSquaredValues);
  }
Beispiel #9
0
  private double computeAdjustedResidual(int data[][], int classData[], Rule regla, int clase) {

    double suma = 0;
    int i;

    for (i = 0; i < regla.getRule().length; i++) {
      suma +=
          computeStandarizedResidual(data, classData, regla.getiCondition(i), clase)
              / Math.sqrt(
                  computeMaximumLikelohoodEstimate(data, classData, regla.getiCondition(i), clase));
    }

    return suma;
  }
Beispiel #10
0
  /**
   * It calculate the matching degree between the antecedent of the rule and a given example
   *
   * @param indiv Individual The individual representing a fuzzy rule
   * @param ejemplo double [] A given example
   * @return double The matching degree between the example and the antecedent of the rule
   */
  double Matching_degree(Individual indiv, double[] ejemplo) {
    int i, sig;
    double result, suma, numerador, denominador, sigma, ancho_intervalo;

    suma = 0.0;
    for (i = 0; i < entradas; i++) {
      ancho_intervalo = train.getMax(i) - train.getMin(i);

      sigma = -1.0;
      sig = indiv.antecedente[i].sigma;
      switch (sig) {
        case 1:
          sigma = 0.3;
          break;
        case 2:
          sigma = 0.4;
          break;
        case 3:
          sigma = 0.5;
          break;
        case 4:
          sigma = 0.6;
          break;
      }

      sigma *= ancho_intervalo;

      numerador = Math.pow((ejemplo[i] - indiv.antecedente[i].m), 2.0);
      denominador = Math.pow(sigma, 2.0);
      suma += (numerador / denominador);
    }

    suma *= -1.0;
    result = Math.exp(suma);

    return (result);
  }
Beispiel #11
0
  private void interpola(
      double ra[],
      double rb[],
      int na[],
      int nb[],
      boolean ma[],
      boolean mb[],
      double resS[],
      double resR[],
      int resN[],
      boolean resM[]) {

    int i;
    double diff;
    double gap;
    int suerte;

    for (i = 0; i < ra.length; i++) {
      if (ma[i] == true && mb[i] == true) {
        resM[i] = true;
        resS[i] = 0;
      } else {
        resM[i] = false;
        if (entradas[i].getType() == Attribute.REAL) {
          diff = rb[i] - ra[i];
          gap = Randomize.Rand();
          resR[i] = ra[i] + gap * diff;
          resS[i] =
              (ra[i] + entradas[i].getMinAttribute())
                  / (entradas[i].getMaxAttribute() - entradas[i].getMinAttribute());
        } else if (entradas[i].getType() == Attribute.INTEGER) {
          diff = rb[i] - ra[i];
          gap = Randomize.Rand();
          resR[i] = Math.round(ra[i] + gap * diff);
          resS[i] =
              (ra[i] + entradas[i].getMinAttribute())
                  / (entradas[i].getMaxAttribute() - entradas[i].getMinAttribute());
        } else {
          suerte = Randomize.Randint(0, 2);
          if (suerte == 0) {
            resN[i] = na[i];
          } else {
            resN[i] = nb[i];
          }
          resS[i] = (double) resN[i] / (double) (entradas[i].getNominalValuesList().size() - 1);
        }
      }
    }
  }
Beispiel #12
0
  /**
   * It Evaluates the performance of the fuzzy system. The Mean Square Error (MSE) by training is
   * used
   */
  public double Evaluate_fuzzy_system() {
    int i;
    double result, suma, fuerza;

    suma = 0.0;
    for (i = 0; i < train.getnData(); i++) {
      fuerza = Output_fuzzy_system(train.getExample(i));
      suma += Math.pow(train.getOutputAsReal(i) - fuerza, 2.0);
    }

    result = suma / train.getnData();

    /* We want to have a maximization problem so, we invert the error */
    if (result != 0.0) {
      result = 1.0 / result;
    }

    return (result);
  }
Beispiel #13
0
  /**
   * It Evaluates the performance of the best evolved fuzzy system on test data. The Mean Square
   * Error (MSE) is used
   *
   * @return double The MSE error in test data
   */
  public double Evaluate_best_fuzzy_system_in_test() {
    int i;
    double result, suma, fuerza;

    SistemaDifuso.clear();
    for (i = 0; i < Nr; i++) {
      Individual indi = new Individual(BestSistemaDifuso.get(i));
      SistemaDifuso.add(indi);
    }

    suma = 0.0;
    for (i = 0; i < test.getnData(); i++) {
      fuerza = Output_fuzzy_system(test.getExample(i));
      suma += Math.pow(test.getOutputAsReal(i) - fuerza, 2.0);
    }

    result = suma / test.getnData();

    return (result);
  }
Beispiel #14
0
  /**
   * It calculates PCBLX
   *
   * @param d it multiplies the module of the difference of the parents
   * @param P1 it is a father to cross
   * @param P2 it is the other father to do the cross
   * @param Hijo1 the son obtained
   * @param Hijo2 the other obtained
   * @param gens the size of the vector
   */
  private void xPC_BLX(
      double d, double[] P1, double[] P2, double[] Hijo1, double[] Hijo2, int gens) {

    double I, A1, C1;
    int i;

    for (i = 0; i < gens; i++) {

      I = d * Math.abs(P1[i] - P2[i]);
      A1 = P1[i] - I;
      if (A1 < Gene[i].min()) A1 = Gene[i].min();
      C1 = P1[i] + I;
      if (C1 > Gene[i].max()) C1 = Gene[i].max();
      Hijo1[i] = A1 + Randomize.Rand() * (C1 - A1);

      A1 = P2[i] - I;
      if (A1 < Gene[i].min()) A1 = Gene[i].min();
      C1 = P2[i] + I;
      if (C1 > Gene[i].max()) C1 = Gene[i].max();
      Hijo2[i] = A1 + Randomize.Rand() * (C1 - A1);
    }
  }
Beispiel #15
0
  /**
   * This method runs the multiple comparison tests
   *
   * @param code A value to codify which post-hoc methods apply
   * @param results Array with the results of the methods
   * @param algorithmName Array with the name of the methods employed
   * @return A string with the contents of the test in LaTeX format
   */
  private static String runMultiple(double[][] results, String algorithmName[]) {

    int i, j, k;
    int posicion;
    double mean[][];
    MultiplePair orden[][];
    MultiplePair rank[][];
    boolean encontrado;
    int ig;
    double sum;
    boolean visto[];
    Vector<Integer> porVisitar;
    double Rj[];
    double friedman;
    double sumatoria = 0;
    double termino1, termino2, termino3;
    double iman;
    boolean vistos[];
    int pos, tmp, counter;
    String cad;
    double maxVal;
    double Pi[];
    double ALPHAiHolm[];
    double ALPHAiShaffer[];
    String ordenAlgoritmos[];
    double ordenRankings[];
    int order[];
    double adjustedP[][];
    double SE;
    boolean parar;
    Vector<Integer> indices = new Vector<Integer>();
    Vector<Vector<Relation>> exhaustiveI = new Vector<Vector<Relation>>();
    boolean[][] cuadro;
    double minPi, tmpPi, maxAPi, tmpAPi;
    Relation[] parejitas;
    Vector<Integer> T;
    int Tarray[];

    DecimalFormat nf4 = (DecimalFormat) DecimalFormat.getInstance();
    nf4.setMaximumFractionDigits(4);
    nf4.setMinimumFractionDigits(0);

    DecimalFormatSymbols dfs = nf4.getDecimalFormatSymbols();
    dfs.setDecimalSeparator('.');
    nf4.setDecimalFormatSymbols(dfs);

    DecimalFormat nf6 = (DecimalFormat) DecimalFormat.getInstance();
    nf6.setMaximumFractionDigits(6);
    nf6.setMinimumFractionDigits(0);

    nf6.setDecimalFormatSymbols(dfs);

    String out = "";

    int nDatasets = Configuration.getNDatasets();

    Iman = Configuration.isIman();
    Nemenyi = Configuration.isNemenyi();
    Bonferroni = Configuration.isBonferroni();
    Holm = Configuration.isHolm();
    Hoch = Configuration.isHochberg();
    Hommel = Configuration.isHommel();
    Scha = Configuration.isShaffer();
    Berg = Configuration.isBergman();

    mean = new double[nDatasets][algorithmName.length];

    // Maximize performance
    if (Configuration.getObjective() == 1) {

      /*Compute the average performance per algorithm for each data set*/
      for (i = 0; i < nDatasets; i++) {
        for (j = 0; j < algorithmName.length; j++) {
          mean[i][j] = results[j][i];
        }
      }

    }
    // Minimize performance
    else {

      double maxValue = Double.MIN_VALUE;

      /*Compute the average performance per algorithm for each data set*/
      for (i = 0; i < nDatasets; i++) {
        for (j = 0; j < algorithmName.length; j++) {

          if (results[j][i] > maxValue) {
            maxValue = results[j][i];
          }

          mean[i][j] = (-1.0 * results[j][i]);
        }
      }

      for (i = 0; i < nDatasets; i++) {
        for (j = 0; j < algorithmName.length; j++) {
          mean[i][j] += maxValue;
        }
      }
    }

    /*We use the pareja structure to compute and order rankings*/
    orden = new MultiplePair[nDatasets][algorithmName.length];
    for (i = 0; i < nDatasets; i++) {
      for (j = 0; j < algorithmName.length; j++) {
        orden[i][j] = new MultiplePair(j, mean[i][j]);
      }
      Arrays.sort(orden[i]);
    }

    /*building of the rankings table per algorithms and data sets*/
    rank = new MultiplePair[nDatasets][algorithmName.length];
    posicion = 0;
    for (i = 0; i < nDatasets; i++) {
      for (j = 0; j < algorithmName.length; j++) {
        encontrado = false;
        for (k = 0; k < algorithmName.length && !encontrado; k++) {
          if (orden[i][k].indice == j) {
            encontrado = true;
            posicion = k + 1;
          }
        }
        rank[i][j] = new MultiplePair(posicion, orden[i][posicion - 1].valor);
      }
    }

    /*In the case of having the same performance, the rankings are equal*/
    for (i = 0; i < nDatasets; i++) {
      visto = new boolean[algorithmName.length];
      porVisitar = new Vector<Integer>();

      Arrays.fill(visto, false);
      for (j = 0; j < algorithmName.length; j++) {
        porVisitar.removeAllElements();
        sum = rank[i][j].indice;
        visto[j] = true;
        ig = 1;
        for (k = j + 1; k < algorithmName.length; k++) {
          if (rank[i][j].valor == rank[i][k].valor && !visto[k]) {
            sum += rank[i][k].indice;
            ig++;
            porVisitar.add(new Integer(k));
            visto[k] = true;
          }
        }
        sum /= (double) ig;
        rank[i][j].indice = sum;
        for (k = 0; k < porVisitar.size(); k++) {
          rank[i][((Integer) porVisitar.elementAt(k)).intValue()].indice = sum;
        }
      }
    }

    /*compute the average ranking for each algorithm*/
    Rj = new double[algorithmName.length];
    for (i = 0; i < algorithmName.length; i++) {
      Rj[i] = 0;
      for (j = 0; j < nDatasets; j++) {
        Rj[i] += rank[j][i].indice / ((double) nDatasets);
      }
    }

    /*Print the average ranking per algorithm*/
    out += "\n\nAverage ranks obtained by applying the Friedman procedure\n\n";

    out +=
        "\\begin{table}[!htp]\n"
            + "\\centering\n"
            + "\\begin{tabular}{|c|c|}\\hline\n"
            + "Algorithm&Ranking\\\\\\hline\n";
    for (i = 0; i < algorithmName.length; i++) {
      out += (String) algorithmName[i] + " & " + nf4.format(Rj[i]) + "\\\\\n";
    }
    out += "\\hline\n\\end{tabular}\n\\caption{Average Rankings of the algorithms}\n\\end{table}";

    /*Compute the Friedman statistic*/
    termino1 =
        (12 * (double) nDatasets)
            / ((double) algorithmName.length * ((double) algorithmName.length + 1));
    termino2 =
        (double) algorithmName.length
            * ((double) algorithmName.length + 1)
            * ((double) algorithmName.length + 1)
            / (4.0);
    for (i = 0; i < algorithmName.length; i++) {
      sumatoria += Rj[i] * Rj[i];
    }
    friedman = (sumatoria - termino2) * termino1;
    out +=
        "\n\nFriedman statistic considering reduction performance (distributed according to chi-square with "
            + (algorithmName.length - 1)
            + " degrees of freedom: "
            + nf6.format(friedman)
            + ".\n\n";

    double pFriedman;
    pFriedman = ChiSq(friedman, (algorithmName.length - 1));

    System.out.print("P-value computed by Friedman Test: " + pFriedman + ".\\newline\n\n");

    /*Compute the Iman-Davenport statistic*/
    if (Iman) {
      iman = ((nDatasets - 1) * friedman) / (nDatasets * (algorithmName.length - 1) - friedman);
      out +=
          "Iman and Davenport statistic considering reduction performance (distributed according to F-distribution with "
              + (algorithmName.length - 1)
              + " and "
              + (algorithmName.length - 1) * (nDatasets - 1)
              + " degrees of freedom: "
              + nf6.format(iman)
              + ".\n\n";

      double pIman;

      pIman = FishF(iman, (algorithmName.length - 1), (algorithmName.length - 1) * (nDatasets - 1));
      System.out.print("P-value computed by Iman and Daveport Test: " + pIman + ".\\newline\n\n");
    }

    termino3 =
        Math.sqrt(
            (double) algorithmName.length
                * ((double) algorithmName.length + 1)
                / (6.0 * (double) nDatasets));

    out += "\n\n\\pagebreak\n\n";

    /** ********** NxN COMPARISON ************* */
    out += "\\section{Post hoc comparisons}";
    out +=
        "\n\nResults achieved on post hoc comparisons for $\\alpha = 0.05$, $\\alpha = 0.10$ and adjusted p-values.\n\n";
    /*Compute the unadjusted p_i value for each comparison alpha=0.05*/
    Pi = new double[(int) combinatoria(2, algorithmName.length)];
    ALPHAiHolm = new double[(int) combinatoria(2, algorithmName.length)];
    ALPHAiShaffer = new double[(int) combinatoria(2, algorithmName.length)];
    ordenAlgoritmos = new String[(int) combinatoria(2, algorithmName.length)];
    ordenRankings = new double[(int) combinatoria(2, algorithmName.length)];
    order = new int[(int) combinatoria(2, algorithmName.length)];
    parejitas = new Relation[(int) combinatoria(2, algorithmName.length)];
    T = new Vector<Integer>();
    T = trueHShaffer(algorithmName.length);
    Tarray = new int[T.size()];
    for (i = 0; i < T.size(); i++) {
      Tarray[i] = ((Integer) T.elementAt(i)).intValue();
    }
    Arrays.sort(Tarray);

    SE = termino3;
    vistos = new boolean[(int) combinatoria(2, algorithmName.length)];
    for (i = 0, k = 0; i < algorithmName.length; i++) {
      for (j = i + 1; j < algorithmName.length; j++, k++) {
        ordenRankings[k] = Math.abs(Rj[i] - Rj[j]);
        ordenAlgoritmos[k] = (String) algorithmName[i] + " vs. " + (String) algorithmName[j];
        parejitas[k] = new Relation(i, j);
      }
    }

    Arrays.fill(vistos, false);
    for (i = 0; i < ordenRankings.length; i++) {
      for (j = 0; vistos[j] == true; j++) ;
      pos = j;
      maxVal = ordenRankings[j];
      for (j = j + 1; j < ordenRankings.length; j++) {
        if (vistos[j] == false && ordenRankings[j] > maxVal) {
          pos = j;
          maxVal = ordenRankings[j];
        }
      }
      vistos[pos] = true;
      order[i] = pos;
    }

    /*Computing the logically related hypotheses tests (Shaffer and Bergmann-Hommel)*/

    pos = 0;
    tmp = Tarray.length - 1;
    for (i = 0; i < order.length; i++) {
      Pi[i] = 2 * CDF_Normal.normp((-1) * Math.abs((ordenRankings[order[i]]) / SE));
      ALPHAiHolm[i] = 0.05 / ((double) order.length - (double) i);
      ALPHAiShaffer[i] = 0.05 / ((double) order.length - (double) Math.max(pos, i));
      if (i == pos && Pi[i] <= ALPHAiShaffer[i]) {
        tmp--;
        pos = (int) combinatoria(2, algorithmName.length) - Tarray[tmp];
      }
    }

    out += "\\subsection{P-values for $\\alpha=0.05$}\n\n";

    int count = 4;

    if (Holm) {
      count++;
    }
    if (Scha) {
      count++;
    }
    out +=
        "\\begin{table}[!htp]\n\\centering\\scriptsize\n"
            + "\\begin{tabular}{"
            + printC(count)
            + "}\n"
            + "$i$&algorithms&$z=(R_0 - R_i)/SE$&$p$";
    if (Holm) {
      out += "&Holm";
    }
    if (Scha) {
      out += "&Shaffer";
    }

    out += "\\\\\n\\hline";

    for (i = 0; i < order.length; i++) {
      out +=
          (order.length - i)
              + "&"
              + ordenAlgoritmos[order[i]]
              + "&"
              + nf6.format(Math.abs((ordenRankings[order[i]]) / SE))
              + "&"
              + nf6.format(Pi[i]);
      if (Holm) {
        out += "&" + nf6.format(ALPHAiHolm[i]);
      }
      if (Scha) {
        out += "&" + nf6.format(ALPHAiShaffer[i]);
      }
      out += "\\\\\n";
    }

    out +=
        "\\hline\n"
            + "\\end{tabular}\n\\caption{P-values Table for $\\alpha=0.05$}\n"
            + "\\end{table}";

    /*Compute the rejected hipotheses for each test*/

    if (Nemenyi) {
      out +=
          "Nemenyi's procedure rejects those hypotheses that have a p-value $\\le"
              + nf6.format(0.05 / (double) (order.length))
              + "$.\n\n";
    }

    if (Holm) {
      parar = false;
      for (i = 0; i < order.length && !parar; i++) {
        if (Pi[i] > ALPHAiHolm[i]) {
          out +=
              "Holm's procedure rejects those hypotheses that have a p-value $\\le"
                  + nf6.format(ALPHAiHolm[i])
                  + "$.\n\n";
          parar = true;
        }
      }
    }

    if (Scha) {
      parar = false;
      for (i = 0; i < order.length && !parar; i++) {
        if (Pi[i] <= ALPHAiShaffer[i]) {
          out +=
              "Shaffer's procedure rejects those hypotheses that have a p-value $\\le"
                  + nf6.format(ALPHAiShaffer[i])
                  + "$.\n\n";
          parar = true;
        }
      }
    }

    /*For Bergmann-Hommel's procedure, 9 algorithms could suppose intense computation*/
    if (algorithmName.length <= MAX_ALGORITHMS) {
      for (i = 0; i < algorithmName.length; i++) {
        indices.add(new Integer(i));
      }
      exhaustiveI = obtainExhaustive(indices);
      cuadro = new boolean[algorithmName.length][algorithmName.length];
      for (i = 0; i < algorithmName.length; i++) {
        Arrays.fill(cuadro[i], false);
      }

      for (i = 0; i < exhaustiveI.size(); i++) {
        minPi =
            2
                * CDF_Normal.normp(
                    (-1)
                        * Math.abs(
                            Rj[
                                    ((Relation)
                                            ((Vector<Relation>) exhaustiveI.elementAt(i))
                                                .elementAt(0))
                                        .i]
                                - Rj[
                                    ((Relation)
                                            ((Vector<Relation>) exhaustiveI.elementAt(i))
                                                .elementAt(0))
                                        .j])
                        / SE);

        for (j = 1; j < ((Vector<Relation>) exhaustiveI.elementAt(i)).size(); j++) {
          tmpPi =
              2
                  * CDF_Normal.normp(
                      (-1)
                          * Math.abs(
                              Rj[
                                      ((Relation)
                                              ((Vector<Relation>) exhaustiveI.elementAt(i))
                                                  .elementAt(j))
                                          .i]
                                  - Rj[
                                      ((Relation)
                                              ((Vector<Relation>) exhaustiveI.elementAt(i))
                                                  .elementAt(j))
                                          .j])
                          / SE);
          if (tmpPi < minPi) {
            minPi = tmpPi;
          }
        }
        if (minPi > (0.05 / ((double) ((Vector<Relation>) exhaustiveI.elementAt(i)).size()))) {
          for (j = 0; j < ((Vector<Relation>) exhaustiveI.elementAt(i)).size(); j++) {
            cuadro[((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)).elementAt(j)).i][
                    ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)).elementAt(j)).j] =
                true;
          }
        }
      }

      if (Berg) {
        cad = "";
        cad += "Bergmann's procedure rejects these hypotheses:\n\n";
        cad += "\\begin{itemize}\n\n";

        counter = 0;
        for (i = 0; i < cuadro.length; i++) {
          for (j = i + 1; j < cuadro.length; j++) {
            if (cuadro[i][j] == false) {
              cad += "\\item " + algorithmName[i] + " vs. " + algorithmName[j] + "\n\n";
              counter++;
            }
          }
        }
        cad += "\\end{itemize}\n\n";

        if (counter > 0) {
          out += cad;
        } else {
          out += "Bergmann's procedure does not reject any hypotheses.\n\n";
        }
      }
    }

    out += "\\pagebreak\n\n";
    out += "\\subsection{P-values for $\\alpha=0.10$}\n\n";

    /*Compute the unadjusted p_i value for each comparison alpha=0.10*/
    Pi = new double[(int) combinatoria(2, algorithmName.length)];
    ALPHAiHolm = new double[(int) combinatoria(2, algorithmName.length)];
    ALPHAiShaffer = new double[(int) combinatoria(2, algorithmName.length)];
    ordenAlgoritmos = new String[(int) combinatoria(2, algorithmName.length)];
    ordenRankings = new double[(int) combinatoria(2, algorithmName.length)];
    order = new int[(int) combinatoria(2, algorithmName.length)];

    SE = termino3;
    vistos = new boolean[(int) combinatoria(2, algorithmName.length)];
    for (i = 0, k = 0; i < algorithmName.length; i++) {
      for (j = i + 1; j < algorithmName.length; j++, k++) {
        ordenRankings[k] = Math.abs(Rj[i] - Rj[j]);
        ordenAlgoritmos[k] = (String) algorithmName[i] + " vs. " + (String) algorithmName[j];
      }
    }

    Arrays.fill(vistos, false);
    for (i = 0; i < ordenRankings.length; i++) {
      for (j = 0; vistos[j] == true; j++) ;
      pos = j;
      maxVal = ordenRankings[j];

      for (j = j + 1; j < ordenRankings.length; j++) {
        if (vistos[j] == false && ordenRankings[j] > maxVal) {
          pos = j;
          maxVal = ordenRankings[j];
        }
      }
      vistos[pos] = true;
      order[i] = pos;
    }

    /*Computing the logically related hypotheses tests (Shaffer and Bergmann-Hommel)*/
    pos = 0;
    tmp = Tarray.length - 1;
    for (i = 0; i < order.length; i++) {
      Pi[i] = 2 * CDF_Normal.normp((-1) * Math.abs((ordenRankings[order[i]]) / SE));
      ALPHAiHolm[i] = 0.1 / ((double) order.length - (double) i);
      ALPHAiShaffer[i] = 0.1 / ((double) order.length - (double) Math.max(pos, i));
      if (i == pos && Pi[i] <= ALPHAiShaffer[i]) {
        tmp--;
        pos = (int) combinatoria(2, algorithmName.length) - Tarray[tmp];
      }
    }

    count = 4;

    if (Holm) {
      count++;
    }
    if (Scha) {
      count++;
    }
    out +=
        "\\begin{table}[!htp]\n\\centering\\scriptsize\n"
            + "\\begin{tabular}{"
            + printC(count)
            + "}\n"
            + "$i$&algorithms&$z=(R_0 - R_i)/SE$&$p$";
    if (Holm) {
      out += "&Holm";
    }
    if (Scha) {
      out += "&Shaffer";
    }
    out += "\\\\\n\\hline";

    for (i = 0; i < order.length; i++) {
      out +=
          (order.length - i)
              + "&"
              + ordenAlgoritmos[order[i]]
              + "&"
              + nf6.format(Math.abs((ordenRankings[order[i]]) / SE))
              + "&"
              + nf6.format(Pi[i]);
      if (Holm) {
        out += "&" + nf6.format(ALPHAiHolm[i]);
      }
      if (Scha) {
        out += "&" + nf6.format(ALPHAiShaffer[i]);
      }
      out += "\\\\\n";
    }

    out +=
        "\\hline\n"
            + "\\end{tabular}\n\\caption{P-values Table for $\\alpha=0.10$}\n"
            + "\\end{table}";

    /*Compute the rejected hipotheses for each test*/

    if (Nemenyi) {
      out +=
          "Nemenyi's procedure rejects those hypotheses that have a p-value $\\le"
              + nf6.format(0.10 / (double) (order.length))
              + "$.\n\n";
    }

    if (Holm) {
      parar = false;

      for (i = 0; i < order.length && !parar; i++) {
        if (Pi[i] > ALPHAiHolm[i]) {
          out +=
              "Holm's procedure rejects those hypotheses that have a p-value $\\le"
                  + nf6.format(ALPHAiHolm[i])
                  + "$.\n\n";
          parar = true;
        }
      }
    }

    if (Scha) {
      parar = false;
      for (i = 0; i < order.length && !parar; i++) {
        if (Pi[i] <= ALPHAiShaffer[i]) {
          out +=
              "Shaffer's procedure rejects those hypotheses that have a p-value $\\le"
                  + nf6.format(ALPHAiShaffer[i])
                  + "$.\n\n";
          parar = true;
        }
      }
    }

    /*For Bergmann-Hommel's procedure, 9 algorithms could suppose intense computation*/
    if (algorithmName.length <= MAX_ALGORITHMS) {

      indices.removeAllElements();
      for (i = 0; i < algorithmName.length; i++) {
        indices.add(new Integer(i));
      }

      exhaustiveI = obtainExhaustive(indices);
      cuadro = new boolean[algorithmName.length][algorithmName.length];

      for (i = 0; i < algorithmName.length; i++) {
        Arrays.fill(cuadro[i], false);
      }

      for (i = 0; i < exhaustiveI.size(); i++) {
        minPi =
            2
                * CDF_Normal.normp(
                    (-1)
                        * Math.abs(
                            Rj[
                                    ((Relation)
                                            ((Vector<Relation>) exhaustiveI.elementAt(i))
                                                .elementAt(0))
                                        .i]
                                - Rj[
                                    ((Relation)
                                            ((Vector<Relation>) exhaustiveI.elementAt(i))
                                                .elementAt(0))
                                        .j])
                        / SE);
        for (j = 1; j < ((Vector<Relation>) exhaustiveI.elementAt(i)).size(); j++) {
          tmpPi =
              2
                  * CDF_Normal.normp(
                      (-1)
                          * Math.abs(
                              Rj[
                                      ((Relation)
                                              ((Vector<Relation>) exhaustiveI.elementAt(i))
                                                  .elementAt(j))
                                          .i]
                                  - Rj[
                                      ((Relation)
                                              ((Vector<Relation>) exhaustiveI.elementAt(i))
                                                  .elementAt(j))
                                          .j])
                          / SE);
          if (tmpPi < minPi) {
            minPi = tmpPi;
          }
        }

        if (minPi > 0.1 / ((double) ((Vector<Relation>) exhaustiveI.elementAt(i)).size())) {
          for (j = 0; j < ((Vector<Relation>) exhaustiveI.elementAt(i)).size(); j++) {
            cuadro[((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)).elementAt(j)).i][
                    ((Relation) ((Vector<Relation>) exhaustiveI.elementAt(i)).elementAt(j)).j] =
                true;
          }
        }
      }

      if (Berg) {

        cad = "";
        cad += "Bergmann's procedure rejects these hypotheses:\n\n";
        cad += "\\begin{itemize}\n\n";

        counter = 0;
        for (i = 0; i < cuadro.length; i++) {
          for (j = i + 1; j < cuadro.length; j++) {
            if (cuadro[i][j] == false) {
              cad += "\\item " + algorithmName[i] + " vs. " + algorithmName[j] + "\n\n";
              counter++;
            }
          }
        }
        cad += "\\end{itemize}\n\n";

        if (counter > 0) {
          out += cad;
        } else {
          out += "Bergmann's procedure does not reject any hypotheses.\n\n";
        }
      }
    }

    out += "\\pagebreak\n\n";

    /** ********** ADJUSTED P-VALUES NxN COMPARISON ************* */
    out += "\\subsection{Adjusted p-values}\n\n";

    adjustedP = new double[Pi.length][4];
    pos = 0;
    tmp = Tarray.length - 1;

    for (i = 0; i < adjustedP.length; i++) {
      adjustedP[i][0] = Pi[i] * (double) (adjustedP.length);
      adjustedP[i][1] = Pi[i] * (double) (adjustedP.length - i);
      adjustedP[i][2] = Pi[i] * ((double) adjustedP.length - (double) Math.max(pos, i));

      if (i == pos) {
        tmp--;
        pos = (int) combinatoria(2, algorithmName.length) - Tarray[tmp];
      }

      if (algorithmName.length <= MAX_ALGORITHMS) {
        maxAPi = Double.MIN_VALUE;
        minPi = Double.MAX_VALUE;
        for (j = 0; j < exhaustiveI.size(); j++) {
          if (exhaustiveI.elementAt(j).toString().contains(parejitas[order[i]].toString())) {
            minPi =
                2
                    * CDF_Normal.normp(
                        (-1)
                            * Math.abs(
                                Rj[
                                        ((Relation)
                                                ((Vector<Relation>) exhaustiveI.elementAt(j))
                                                    .elementAt(0))
                                            .i]
                                    - Rj[
                                        ((Relation)
                                                ((Vector<Relation>) exhaustiveI.elementAt(j))
                                                    .elementAt(0))
                                            .j])
                            / SE);
            for (k = 1; k < ((Vector<Relation>) exhaustiveI.elementAt(j)).size(); k++) {
              tmpPi =
                  2
                      * CDF_Normal.normp(
                          (-1)
                              * Math.abs(
                                  Rj[
                                          ((Relation)
                                                  ((Vector<Relation>) exhaustiveI.elementAt(j))
                                                      .elementAt(k))
                                              .i]
                                      - Rj[
                                          ((Relation)
                                                  ((Vector<Relation>) exhaustiveI.elementAt(j))
                                                      .elementAt(k))
                                              .j])
                              / SE);
              if (tmpPi < minPi) {
                minPi = tmpPi;
              }
            }
            tmpAPi = minPi * (double) (((Vector<Relation>) exhaustiveI.elementAt(j)).size());
            if (tmpAPi > maxAPi) {
              maxAPi = tmpAPi;
            }
          }
        }

        adjustedP[i][3] = maxAPi;
      }
    }

    for (i = 1; i < adjustedP.length; i++) {
      if (adjustedP[i][1] < adjustedP[i - 1][1]) adjustedP[i][1] = adjustedP[i - 1][1];
      if (adjustedP[i][2] < adjustedP[i - 1][2]) adjustedP[i][2] = adjustedP[i - 1][2];
      if (adjustedP[i][3] < adjustedP[i - 1][3]) adjustedP[i][3] = adjustedP[i - 1][3];
    }

    count = 3;

    if (Nemenyi) {
      count++;
    }
    if (Holm) {
      count++;
    }
    if (Scha) {
      count++;
    }
    if (Berg) {
      count++;
    }

    out +=
        "\\begin{table}[!htp]\n\\centering\\scriptsize\n"
            + "\\begin{tabular}{"
            + printC(count)
            + "}\n"
            + "i&hypothesis&unadjusted $p$";

    if (Nemenyi) {
      out += "&$p_{Neme}$";
    }
    if (Holm) {
      out += "&$p_{Holm}$";
    }
    if (Scha) {
      out += "&$p_{Shaf}$";
    }
    if (Berg) {
      out += "&$p_{Berg}$";
    }

    out += "\\\\\n\\hline";

    for (i = 0; i < Pi.length; i++) {
      out +=
          (i + 1)
              + "&"
              + algorithmName[parejitas[order[i]].i]
              + " vs ."
              + algorithmName[parejitas[order[i]].j]
              + "&"
              + nf6.format(Pi[i]);
      if (Nemenyi) {
        out += "&" + nf6.format(adjustedP[i][0]);
      }
      if (Holm) {
        out += "&" + nf6.format(adjustedP[i][1]);
      }
      if (Scha) {
        out += "&" + nf6.format(adjustedP[i][2]);
      }
      if (Berg) {
        out += "&" + nf6.format(adjustedP[i][3]);
      }

      out += "\\\\\n";
    }

    out += "\\hline\n" + "\\end{tabular}\n\\caption{Adjusted $p$-values}\n" + "\\end{table}\n\n";
    out += "\\end{landscape}\n\\end{document}";

    return out;
  } // end-method
Beispiel #16
0
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    // declarations
    double[] outputs;
    double[] outputs2;
    Instance neighbor;
    double dist, mean;
    int actual;
    Randomize rnd = new Randomize();
    Instance ex;
    gCenter kmeans = null;
    int iterations = 0;
    double E;
    double prevE;
    int totalMissing = 0;
    boolean allMissing = true;

    rnd.setSeed(semilla);
    // PROCESS
    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(input_train_name, true);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][nvariables]; // matrix with transformed data
      kmeans = new gCenter(K, ndatos, nvariables);

      timesSeen = new FreqList[nvariables];
      mostCommon = new String[nvariables];

      // first, we choose k 'means' randomly from all
      // instances
      totalMissing = 0;
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);
        if (inst.existsAnyMissingValue()) totalMissing++;
      }
      if (totalMissing == ndatos) allMissing = true;
      else allMissing = false;
      for (int numMeans = 0; numMeans < K; numMeans++) {
        do {
          actual = (int) (ndatos * rnd.Rand());
          ex = IS.getInstance(actual);
        } while (ex.existsAnyMissingValue() && !allMissing);

        kmeans.copyCenter(ex, numMeans);
      }

      // now, iterate adjusting clusters' centers and
      // instances to them
      prevE = 0;
      iterations = 0;
      do {
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          kmeans.setClusterOf(inst, i);
        }
        // set new centers
        kmeans.recalculateCenters(IS);
        // compute RMSE
        E = 0;
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          E += kmeans.distance(inst, kmeans.getClusterOf(i));
        }
        iterations++;
        // System.out.println(iterations+"\t"+E);
        if (Math.abs(prevE - E) == 0) iterations = maxIter;
        else prevE = E;
      } while (E > minError && iterations < maxIter);
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);

        in = 0;
        out = 0;

        for (int j = 0; j < nvariables; j++) {
          Attribute a = Attributes.getAttribute(j);

          direccion = a.getDirectionAttribute();
          tipo = a.getType();

          if (direccion == Attribute.INPUT) {
            if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
              X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
            } else {
              if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
              else {
                actual = kmeans.getClusterOf(i);
                X[i][j] = new String(kmeans.valueAt(actual, j));
              }
            }
            in++;
          } else {
            if (direccion == Attribute.OUTPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
              } else {
                if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              out++;
            }
          }
        }
      }
    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    write_results(output_train_name);
    /** ************************************************************************************ */
    // does a test file associated exist?
    if (input_train_name.compareTo(input_test_name) != 0) {
      try {

        // Load in memory a dataset that contains a classification problem
        IStest.readSet(input_test_name, false);
        int in = 0;
        int out = 0;

        ndatos = IStest.getNumInstances();
        nvariables = Attributes.getNumAttributes();
        nentradas = Attributes.getInputNumAttributes();
        nsalidas = Attributes.getOutputNumAttributes();

        for (int i = 0; i < ndatos; i++) {
          Instance inst = IStest.getInstance(i);

          in = 0;
          out = 0;

          for (int j = 0; j < nvariables; j++) {
            Attribute a = Attributes.getAttribute(j);

            direccion = a.getDirectionAttribute();
            tipo = a.getType();

            if (direccion == Attribute.INPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
                X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
              } else {
                if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              in++;
            } else {
              if (direccion == Attribute.OUTPUT) {
                if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                  X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
                } else {
                  if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                  else {
                    actual = kmeans.getClusterOf(i);
                    X[i][j] = new String(kmeans.valueAt(actual, j));
                  }
                }
                out++;
              }
            }
          }
        }
      } catch (Exception e) {
        System.out.println("Dataset exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
      write_results(output_test_name);
    }
  }
Beispiel #17
0
  /**
   * Computes and stores the information gain of each attribute (variable) of the dataset
   *
   * @param Examples Set of instances of the dataset
   * @param nFile Name of the file
   */
  public void GainInit(TableDat Examples, String nFile) {

    int i, j, h, v;
    boolean encontrado;
    float info_gk, suma, suma1, suma2, p_clase, logaritmo;
    int num_clase[] = new int[n_clases];
    int n_vars = this.getNVars();
    int MaxVal = this.getMaxVal();
    float p[][] = new float[n_vars][MaxVal];
    float p_cond[][][] = new float[n_clases][n_vars][MaxVal];
    GI = new float[n_vars];
    intervalosGI = new float[n_vars][MaxVal];

    String contents;
    contents = "\n--------------------------------------------\n";
    contents += "|       Computation of the info gain       |\n";
    contents += "--------------------------------------------\n";
    contents += "Points for computation of the info gain:\n";

    // Loads the values for "intervalosGI"
    float marca, p_corte;
    for (int v1 = 0; v1 < n_vars; v1++) {
      if (this.getContinuous(v1) == true) {
        contents += "\tVariable " + var[v1].getName() + ": ";
        marca = (this.getMax(v1) - this.getMin(v1)) / ((float) (this.getNLabelVar(v1) - 1));
        p_corte = this.getMin(v1) + marca / 2;
        for (int et = 0; et < this.getNLabelVar(v1); et++) {
          intervalosGI[v1][et] = p_corte;
          contents += intervalosGI[v1][et] + "  ";
          p_corte += marca;
        }
        contents += "\n";
      }
    }

    // Structure initialization
    for (i = 0; i < n_clases; i++) num_clase[i] = 0;
    for (i = 0; i < n_vars; i++)
      for (j = 0; j < MaxVal; j++) {
        p[i][j] = 0; // Simple probabilities matrix
        for (h = 0; h < n_clases; h++) p_cond[h][i][j] = 0; // Conditional probabilities matrix
      }

    // Computation of the Simple and Conditional probabilities matrixs
    for (i = 0; i < Examples.getNEx(); i++) {
      num_clase[Examples.getClass(i)]++; // distribution by classes
      for (j = 0; j < n_vars; j++) { // distribution by values
        if (!this.getContinuous(j)) { // Discrete variable
          if (!Examples.getLost(this, i, j)) {
            // if the value is not a lost one
            p[j][(int) Examples.getDat(i, j)]++;
            p_cond[(int) Examples.getClass(i)][j][(int) Examples.getDat(i, j)]++;
          }
        } else { // Continuous variable
          encontrado = false;
          h = 0;
          while (!encontrado && h < this.getNLabelVar(j)) {
            if (Examples.getDat(i, j) <= intervalosGI[j][h]) encontrado = true;
            else h++;
          }
          if (encontrado == true) {
            p[j][h]++;
            p_cond[(int) Examples.getClass(i)][j][h]++;
          } else {
            if (!Examples.getLost(this, i, j)) {
              // Lost value
              System.out.println(
                  "Fallo al calcular la ganancia de infor, Variable " + j + " Ejemplo " + i);
              return;
            }
          }
        }
      }
    }
    for (h = 0; h < n_clases; h++)
      for (i = 0; i < n_vars; i++) {
        if (!this.getContinuous(i)) // Discrete variable
        for (j = (int) this.getMin(i); j <= (int) this.getMax(i); j++)
            p_cond[h][i][j] = p_cond[h][i][j] / Examples.getNEx();
        else // Continuous variable
        for (j = 0; j < this.getNLabelVar(i); j++)
            p_cond[h][i][j] = p_cond[h][i][j] / Examples.getNEx();
      }
    for (i = 0; i < n_vars; i++) {
      if (!this.getContinuous(i)) // Discrete variable
      for (j = (int) this.getMin(i); j <= (int) this.getMax(i); j++)
          p[i][j] = p[i][j] / Examples.getNEx();
      else // Continuous variable
      for (j = 0; j < this.getNLabelVar(i); j++) p[i][j] = p[i][j] / Examples.getNEx();
    }

    // Info Gk computation
    suma = 0;
    for (i = 0; i < n_clases; i++) {
      p_clase = ((float) num_clase[i]) / Examples.getNEx();
      if (p_clase > 0) {
        logaritmo = (float) (Math.log((double) p_clase) / Math.log(2));
        suma += p_clase * logaritmo;
      }
    }
    info_gk = (-1) * suma;

    // Information gain computation for each attibute
    for (v = 0; v < n_vars; v++) {
      suma = info_gk;
      suma1 = 0;
      if (!this.getContinuous(v)) { // Discrete Variable
        for (i = (int) this.getMin(v); i <= (int) this.getMax(v); i++) {
          suma2 = 0;
          for (j = 0; j < n_clases; j++)
            if (p_cond[j][v][i] > 0) {
              logaritmo = (float) (Math.log(p_cond[j][v][i]) / Math.log(2));
              suma2 += p_cond[j][v][i] * logaritmo;
            }
          suma1 += p[v][i] * (-1) * suma2;
        }
      } else { // Continuous Variable
        for (i = 0; i < this.getNLabelVar(v); i++) {
          suma2 = 0;
          for (j = 0; j < n_clases; j++)
            if (p_cond[j][v][i] > 0) {
              logaritmo = (float) (Math.log(p_cond[j][v][i]) / Math.log(2));
              suma2 += p_cond[j][v][i] * logaritmo;
            }
          suma1 += p[v][i] * (-1) * suma2;
        }
      }
      GI[v] = suma + (-1) * suma1;
    }

    contents += "Information Gain of the variables:\n";
    for (v = 0; v < n_vars; v++) {
      if (this.getContinuous(v) == true)
        contents += "\tVariable " + var[v].getName() + ": " + GI[v] + "\n";
    }

    if (nFile != "") Files.addToFile(nFile, contents);
  }
Beispiel #18
0
  /** It launches the algorithm */
  public void execute() {

    int i, j, k, l;
    int t;
    int ele;
    double prob[];
    double aux;
    double NUmax = 1.5; // used for lineal ranking
    double NUmin = 0.5; // used for lineal ranking
    double pos1, pos2;
    int sel1, sel2;
    int data[][];
    int infoAttr[];
    int classData[];
    Vector<Rule> contenedor = new Vector<Rule>();
    Vector<Rule> conjR = new Vector<Rule>();
    Rule tmpRule;
    Condition tmpCondition[] = new Condition[1];
    RuleSet population[];
    RuleSet hijo1, hijo2;

    if (somethingWrong) { // We do not execute the program
      System.err.println("An error was found, the data-set has numerical values.");
      System.err.println("Aborting the program");
      // We should not use the statement: System.exit(-1);
    } else {
      Randomize.setSeed(seed);

      nClasses = train.getnClasses();

      /*Build the nominal data information*/
      infoAttr = new int[train.getnInputs()];
      for (i = 0; i < infoAttr.length; i++) {
        infoAttr[i] = train.numberValues(i);
      }

      data = new int[train.getnData()][train.getnInputs()];
      for (i = 0; i < data.length; i++) {
        for (j = 0; j < data[i].length; j++) {
          if (train.isMissing(i, j)) data[i][j] = -1;
          else data[i][j] = train.valueExample(i, j);
        }
      }

      classData = new int[train.getnData()];
      for (i = 0; i < classData.length; i++) {
        classData[i] = train.getOutputAsInteger(i);
      }

      /*Find first-order rules which result interesting*/

      for (i = 0; i < nClasses; i++) {
        for (j = 0; j < infoAttr.length; j++) {
          for (k = 0; k < infoAttr[j]; k++) {
            tmpCondition[0] = new Condition(j, k);
            tmpRule = new Rule(tmpCondition);
            if (Math.abs(computeAdjustedResidual(data, classData, tmpRule, i)) > 1.96) {
              if (!contenedor.contains(tmpRule)) {
                contenedor.add(tmpRule);
                conjR.add(tmpRule);
              }
            }
          }
        }
      }

      // Construct the Baker selection roulette
      prob = new double[popSize];
      for (j = 0; j < popSize; j++) {
        aux = (double) (NUmax - NUmin) * ((double) j / (popSize - 1));
        prob[j] = (double) (1.0 / (popSize)) * (NUmax - aux);
      }
      for (j = 1; j < popSize; j++) prob[j] = prob[j] + prob[j - 1];

      /*Steady-State Genetic Algorithm*/
      ele = 2;
      population = new RuleSet[popSize];
      while (conjR.size() >= 2) {
        t = 0;

        System.out.println("Producing rules of level " + ele);

        for (i = 0; i < population.length; i++) {
          population[i] = new RuleSet(conjR);
          population[i].computeFitness(data, classData, infoAttr, contenedor, nClasses);
        }

        Arrays.sort(population);

        while (t < numGenerations && !population[0].equals(population[popSize - 1])) {
          System.out.println("Generation " + t);
          t++;

          /*Baker's selection*/
          pos1 = Randomize.Rand();
          pos2 = Randomize.Rand();
          for (l = 0; l < popSize && prob[l] < pos1; l++) ;
          sel1 = l;
          for (l = 0; l < popSize && prob[l] < pos2; l++) ;
          sel2 = l;

          hijo1 = new RuleSet(population[sel1]);
          hijo2 = new RuleSet(population[sel2]);

          if (Randomize.Rand() < pCross) {
            RuleSet.crossover1(hijo1, hijo2);
          } else {
            RuleSet.crossover2(hijo1, hijo2);
          }

          RuleSet.mutation(hijo1, conjR, pMut, data, classData, infoAttr, contenedor, nClasses);
          RuleSet.mutation(hijo2, conjR, pMut, data, classData, infoAttr, contenedor, nClasses);

          hijo1.computeFitness(data, classData, infoAttr, contenedor, nClasses);
          hijo2.computeFitness(data, classData, infoAttr, contenedor, nClasses);

          population[popSize - 2] = new RuleSet(hijo1);
          population[popSize - 1] = new RuleSet(hijo2);

          Arrays.sort(population);
        }

        /*Decode function*/
        ele++;
        conjR.removeAllElements();
        System.out.println(
            "Fitness of the best chromosome in rule level " + ele + ": " + population[0].fitness);
        for (i = 0; i < population[0].getRuleSet().length; i++) {
          if (Math.abs(computeAdjustedResidual(data, classData, population[0].getRule(i), i))
              > 1.96) {
            if (validarRegla(population[0].getRule(i))
                && !contenedor.contains(population[0].getRule(i))) {
              contenedor.add(population[0].getRule(i));
              conjR.add(population[0].getRule(i));
            }
          }
        }
      }

      // Finally we should fill the training and test output files
      doOutput(this.val, this.outputTr, data, classData, infoAttr, contenedor, nClasses);
      doOutput(this.test, this.outputTst, data, classData, infoAttr, contenedor, nClasses);

      /*Print the rule obtained*/
      for (i = contenedor.size() - 1; i >= 0; i--) {
        if (reglaPositiva(
            this.train, data, classData, infoAttr, nClasses, contenedor.elementAt(i))) {
          Fichero.AnadirtoFichero(outputRule, contenedor.elementAt(i).toString(train));
          Fichero.AnadirtoFichero(
              outputRule,
              " -> "
                  + consecuente(
                      this.train, data, classData, infoAttr, nClasses, contenedor.elementAt(i))
                  + "\n");
        }
      }
      System.out.println("Algorithm Finished");
    }
  }
Beispiel #19
0
  /**
   * SMOTE preprocessing procedure
   *
   * @param datosTrain input training dta
   * @param realTrain actual training data
   * @param nominalTrain nominal attribute values
   * @param nulosTrain null values
   * @param clasesTrain training classes
   * @param datosArt synthetic instances
   */
  public void SMOTE(
      double datosTrain[][],
      double realTrain[][],
      int nominalTrain[][],
      boolean nulosTrain[][],
      int clasesTrain[],
      double datosArt[][],
      double realArt[][],
      int nominalArt[][],
      boolean nulosArt[][],
      int clasesArt[],
      int kSMOTE,
      int ASMO,
      double smoting,
      boolean balance,
      int nPos,
      int posID,
      int nNeg,
      int negID,
      boolean distanceEu) {

    int i, j, l, m;
    int tmp, pos;
    int positives[];
    int neighbors[][];
    double genS[][];
    double genR[][];
    int genN[][];
    boolean genM[][];
    int clasesGen[];
    int nn;

    /* Localize the positive instances */
    positives = new int[nPos];
    for (i = 0, j = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] == posID) {
        positives[j] = i;
        j++;
      }
    }

    /* Randomize the instance presentation */
    for (i = 0; i < positives.length; i++) {
      tmp = positives[i];
      pos = Randomize.Randint(0, positives.length - 1);
      positives[i] = positives[pos];
      positives[pos] = tmp;
    }

    /* Obtain k-nearest neighbors of each positive instance */
    neighbors = new int[positives.length][kSMOTE];
    for (i = 0; i < positives.length; i++) {
      switch (ASMO) {
        case 0:
          KNN.evaluacionKNN2(
              kSMOTE,
              datosTrain,
              realTrain,
              nominalTrain,
              nulosTrain,
              clasesTrain,
              datosTrain[positives[i]],
              realTrain[positives[i]],
              nominalTrain[positives[i]],
              nulosTrain[positives[i]],
              Math.max(posID, negID) + 1,
              distanceEu,
              neighbors[i]);
          break;
        case 1:
          evaluacionKNNClass(
              kSMOTE,
              datosTrain,
              realTrain,
              nominalTrain,
              nulosTrain,
              clasesTrain,
              datosTrain[positives[i]],
              realTrain[positives[i]],
              nominalTrain[positives[i]],
              nulosTrain[positives[i]],
              Math.max(posID, negID) + 1,
              distanceEu,
              neighbors[i],
              posID);
          break;
        case 2:
          evaluacionKNNClass(
              kSMOTE,
              datosTrain,
              realTrain,
              nominalTrain,
              nulosTrain,
              clasesTrain,
              datosTrain[positives[i]],
              realTrain[positives[i]],
              nominalTrain[positives[i]],
              nulosTrain[positives[i]],
              Math.max(posID, negID) + 1,
              distanceEu,
              neighbors[i],
              negID);
          break;
      }
    }

    /* Interpolation of the minority instances */
    if (balance) {
      genS = new double[nNeg - nPos][datosTrain[0].length];
      genR = new double[nNeg - nPos][datosTrain[0].length];
      genN = new int[nNeg - nPos][datosTrain[0].length];
      genM = new boolean[nNeg - nPos][datosTrain[0].length];
      clasesGen = new int[nNeg - nPos];
    } else {
      genS = new double[(int) (nPos * smoting)][datosTrain[0].length];
      genR = new double[(int) (nPos * smoting)][datosTrain[0].length];
      genN = new int[(int) (nPos * smoting)][datosTrain[0].length];
      genM = new boolean[(int) (nPos * smoting)][datosTrain[0].length];
      clasesGen = new int[(int) (nPos * smoting)];
    }
    for (i = 0; i < genS.length; i++) {
      clasesGen[i] = posID;
      nn = Randomize.Randint(0, kSMOTE - 1);
      interpola(
          realTrain[positives[i % positives.length]],
          realTrain[neighbors[i % positives.length][nn]],
          nominalTrain[positives[i % positives.length]],
          nominalTrain[neighbors[i % positives.length][nn]],
          nulosTrain[positives[i % positives.length]],
          nulosTrain[neighbors[i % positives.length][nn]],
          genS[i],
          genR[i],
          genN[i],
          genM[i]);
    }

    for (j = 0; j < datosTrain.length; j++) {
      for (l = 0; l < datosTrain[0].length; l++) {
        datosArt[j][l] = datosTrain[j][l];
        realArt[j][l] = realTrain[j][l];
        nominalArt[j][l] = nominalTrain[j][l];
        nulosArt[j][l] = nulosTrain[j][l];
      }
      clasesArt[j] = clasesTrain[j];
    }
    for (m = 0; j < datosArt.length; j++, m++) {
      for (l = 0; l < datosTrain[0].length; l++) {
        datosArt[j][l] = genS[m][l];
        realArt[j][l] = genR[m][l];
        nominalArt[j][l] = genN[m][l];
        nulosArt[j][l] = genM[m][l];
      }
      clasesArt[j] = clasesGen[m];
    }
  }
Beispiel #20
0
  private double computeStandarizedResidual(
      int data[][], int classData[], Condition cond, int clase) {
    double tmp = computeEAipAjq(data, classData, cond, clase);

    return (computeCountAipAjq(data, classData, cond, clase) - tmp) / Math.sqrt(tmp);
  }
Beispiel #21
0
  public void ejecutar() {

    int i, j, l, m;
    double alfai;
    int nClases;

    int claseObt;

    boolean marcas[];
    boolean notFound;

    int init;
    int clasSel[];

    int baraje[];

    int pos, tmp;
    String instanciasIN[];
    String instanciasOUT[];

    long tiempo = System.currentTimeMillis();

    /* Getting the number of differents classes */

    nClases = 0;

    for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i];

    nClases++;

    /* Shuffle the train set */

    baraje = new int[datosTrain.length];

    Randomize.setSeed(semilla);

    for (i = 0; i < datosTrain.length; i++) baraje[i] = i;

    for (i = 0; i < datosTrain.length; i++) {

      pos = Randomize.Randint(i, datosTrain.length - 1);

      tmp = baraje[i];

      baraje[i] = baraje[pos];

      baraje[pos] = tmp;
    }

    /*
     * Inicialization of the flagged instaces vector for a posterior
     * elimination
     */

    marcas = new boolean[datosTrain.length];

    for (i = 0; i < datosTrain.length; i++) marcas[i] = false;

    if (datosTrain.length > 0) {

      // marcas[baraje[0]] = true; //the first instance is included always

      nSel = n_p;
      if (nSel < nClases) nSel = nClases;

    } else {

      System.err.println("Input dataset is empty");

      nSel = 0;
    }
    clasSel = new int[nClases];
    System.out.print("Selecting initial neurons... ");
    // at least, there must be 1 neuron of each class at the beginning
    init = nClases;
    for (i = 0; i < nClases && i < datosTrain.length; i++) {
      pos = Randomize.Randint(0, datosTrain.length - 1);
      tmp = 0;
      while ((clasesTrain[pos] != i || marcas[pos]) && tmp < datosTrain.length) {
        pos = (pos + 1) % datosTrain.length;
        tmp++;
      }
      if (tmp < datosTrain.length) marcas[pos] = true;
      else init--;
      // clasSel[i] = i;
    }
    for (i = init; i < Math.min(nSel, datosTrain.length); i++) {
      tmp = 0;
      pos = Randomize.Randint(0, datosTrain.length - 1);
      while (marcas[pos]) {
        pos = (pos + 1) % datosTrain.length;
        tmp++;
      }
      // if(i<nClases){
      // notFound = true;
      // do{
      // for(j=i-1;j>=0 && notFound;j--){
      // if(clasSel[j] == clasesTrain[pos])
      // notFound = false;
      // }
      // if(!notFound)
      // pos = Randomize.Randint (0, datosTrain.length-1);
      // }while(!notFound);
      // }
      // clasSel[i] = clasesTrain[pos];
      marcas[pos] = true;
      init++;
    }
    nSel = init;
    System.out.println("Initial neurons selected: " + nSel);

    /* Building of the S set from the flags */

    conjS = new double[nSel][datosTrain[0].length];

    clasesS = new int[nSel];

    for (m = 0, l = 0; m < datosTrain.length; m++) {

      if (marcas[m]) { // the instance must be copied to the solution

        for (j = 0; j < datosTrain[0].length; j++) {

          conjS[l][j] = datosTrain[m][j];
        }

        clasesS[l] = clasesTrain[m];

        l++;
      }
    }

    alfai = alpha;
    boolean change = true;
    /* Body of the LVQ algorithm. */

    // Train the network
    for (int it = 0; it < T && change; it++) {
      change = false;
      alpha = alfai;
      for (i = 1; i < datosTrain.length; i++) {
        // search for the nearest neuron to training instance
        pos = NN(nSel, conjS, datosTrain[baraje[i]]);
        // nearest neuron labels correctly the class of training
        // instance?

        if (clasesS[pos] != clasesTrain[baraje[i]]) { // NO - repel
          // the neuron
          for (j = 0; j < conjS[pos].length; j++) {
            conjS[pos][j] = conjS[pos][j] - alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]);
          }
          change = true;
        } else { // YES - migrate the neuron towards the input vector
          for (j = 0; j < conjS[pos].length; j++) {
            conjS[pos][j] = conjS[pos][j] + alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]);
          }
        }
        alpha = nu * alpha;
      }
      // Shuffle again the training partition
      baraje = new int[datosTrain.length];

      for (i = 0; i < datosTrain.length; i++) baraje[i] = i;

      for (i = 0; i < datosTrain.length; i++) {

        pos = Randomize.Randint(i, datosTrain.length - 1);

        tmp = baraje[i];

        baraje[i] = baraje[pos];

        baraje[pos] = tmp;
      }
    }
    System.out.println(
        "LVQ " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s");
    // Classify the train data set
    instanciasIN = new String[datosReferencia.length];
    instanciasOUT = new String[datosReferencia.length];
    for (i = 0; i < datosReferencia.length; i++) {
      /* Classify the instance selected in this iteration */
      Attribute a = Attributes.getOutputAttribute(0);

      int tipo = a.getType();
      claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosReferencia[i], nClases);
      if (tipo != Attribute.NOMINAL) {
        instanciasIN[i] = new String(String.valueOf(clasesReferencia[i]));
        instanciasOUT[i] = new String(String.valueOf(claseObt));
      } else {
        instanciasIN[i] = new String(a.getNominalValue(clasesReferencia[i]));
        instanciasOUT[i] = new String(a.getNominalValue(claseObt));
      }
    }

    escribeSalida(
        ficheroSalida[0], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation);

    // Classify the test data set
    normalizarTest();
    instanciasIN = new String[datosTest.length];
    instanciasOUT = new String[datosTest.length];
    for (i = 0; i < datosTest.length; i++) {
      /* Classify the instance selected in this iteration */
      Attribute a = Attributes.getOutputAttribute(0);

      int tipo = a.getType();

      claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosTest[i], nClases);
      if (tipo != Attribute.NOMINAL) {
        instanciasIN[i] = new String(String.valueOf(clasesTest[i]));
        instanciasOUT[i] = new String(String.valueOf(claseObt));
      } else {
        instanciasIN[i] = new String(a.getNominalValue(clasesTest[i]));
        instanciasOUT[i] = new String(a.getNominalValue(claseObt));
      }
    }

    escribeSalida(
        ficheroSalida[1], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation);

    // Print the network to a file
    printNetworkToFile(ficheroSalida[2], referencia.getHeader());
  }
Beispiel #22
0
  /**
   * Obtain all exhaustive comparisons possible from an array of indexes
   *
   * @param indices A verctos of indexes.
   * @return A vector with vectors containing all the possible relations between the indexes
   */
  @SuppressWarnings("unchecked")
  public static Vector<Vector<Relation>> obtainExhaustive(Vector<Integer> indices) {

    Vector<Vector<Relation>> result = new Vector<Vector<Relation>>();
    int i, j, k;
    String binario;
    boolean[] number = new boolean[indices.size()];
    Vector<Integer> ind1, ind2;
    Vector<Relation> set = new Vector<Relation>();
    Vector<Vector<Relation>> res1, res2;
    Vector<Relation> temp;
    Vector<Relation> temp2;
    Vector<Relation> temp3;

    ind1 = new Vector<Integer>();
    ind2 = new Vector<Integer>();
    temp = new Vector<Relation>();
    temp2 = new Vector<Relation>();
    temp3 = new Vector<Relation>();

    for (i = 0; i < indices.size(); i++) {
      for (j = i + 1; j < indices.size(); j++) {
        set.addElement(
            new Relation(
                ((Integer) indices.elementAt(i)).intValue(),
                ((Integer) indices.elementAt(j)).intValue()));
      }
    }
    if (set.size() > 0) result.addElement(set);

    for (i = 1; i < (int) (Math.pow(2, indices.size() - 1)); i++) {
      Arrays.fill(number, false);
      ind1.removeAllElements();
      ind2.removeAllElements();
      temp.removeAllElements();
      temp2.removeAllElements();
      temp3.removeAllElements();
      binario = Integer.toString(i, 2);
      for (k = 0; k < number.length - binario.length(); k++) {
        number[k] = false;
      }
      for (j = 0; j < binario.length(); j++, k++) {
        if (binario.charAt(j) == '1') number[k] = true;
      }
      for (j = 0; j < number.length; j++) {
        if (number[j] == true) {
          ind1.addElement(new Integer(((Integer) indices.elementAt(j)).intValue()));
        } else {
          ind2.addElement(new Integer(((Integer) indices.elementAt(j)).intValue()));
        }
      }
      res1 = obtainExhaustive(ind1);
      res2 = obtainExhaustive(ind2);
      for (j = 0; j < res1.size(); j++) {
        result.addElement(new Vector<Relation>((Vector<Relation>) res1.elementAt(j)));
      }
      for (j = 0; j < res2.size(); j++) {
        result.addElement(new Vector<Relation>((Vector<Relation>) res2.elementAt(j)));
      }
      for (j = 0; j < res1.size(); j++) {
        temp = (Vector<Relation>) ((Vector<Relation>) res1.elementAt(j)).clone();
        for (k = 0; k < res2.size(); k++) {
          temp2 = (Vector<Relation>) temp.clone();
          temp3 = (Vector<Relation>) ((Vector<Relation>) res2.elementAt(k)).clone();
          if (((Relation) temp2.elementAt(0)).i < ((Relation) temp3.elementAt(0)).i) {
            temp2.addAll((Vector<Relation>) temp3);
            result.addElement(new Vector<Relation>(temp2));
          } else {
            temp3.addAll((Vector<Relation>) temp2);
            result.addElement(new Vector<Relation>(temp3));
          }
        }
      }
    }
    for (i = 0; i < result.size(); i++) {
      if (((Vector<Relation>) result.elementAt(i)).toString().equalsIgnoreCase("[]")) {
        result.removeElementAt(i);
        i--;
      }
    }
    for (i = 0; i < result.size(); i++) {
      for (j = i + 1; j < result.size(); j++) {
        if (((Vector<Relation>) result.elementAt(i))
            .toString()
            .equalsIgnoreCase(((Vector<Relation>) result.elementAt(j)).toString())) {
          result.removeElementAt(j);
          j--;
        }
      }
    }
    return result;
  } // end-method