Exemple #1
0
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    double[] outputs;
    double[] outputs2;
    Instance neighbor;
    double dist, mean;
    int actual;
    int[] N = new int[nneigh];
    double[] Ndist = new double[nneigh];
    boolean allNull;
    svm_problem SVMp = null;
    svm_parameter SVMparam = new svm_parameter();
    svm_model svr = null;
    svm_node SVMn[];
    double[] outputsCandidate = null;
    boolean same = true;
    Vector instancesSelected = new Vector();
    Vector instancesSelected2 = new Vector();

    // SVM PARAMETERS
    SVMparam.C = C;
    SVMparam.cache_size = 10; // 10MB of cache
    SVMparam.degree = degree;
    SVMparam.eps = eps;
    SVMparam.gamma = gamma;
    SVMparam.nr_weight = 0;
    SVMparam.nu = nu;
    SVMparam.p = p;
    SVMparam.shrinking = shrinking;
    SVMparam.probability = 0;
    if (kernelType.compareTo("LINEAR") == 0) {
      SVMparam.kernel_type = svm_parameter.LINEAR;
    } else if (kernelType.compareTo("POLY") == 0) {
      SVMparam.kernel_type = svm_parameter.POLY;
    } else if (kernelType.compareTo("RBF") == 0) {
      SVMparam.kernel_type = svm_parameter.RBF;
    } else if (kernelType.compareTo("SIGMOID") == 0) {
      SVMparam.kernel_type = svm_parameter.SIGMOID;
    }

    SVMparam.svm_type = svm_parameter.EPSILON_SVR;

    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(input_train_name, true);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][2]; // matrix with transformed data

      mostCommon = new String[nvariables];
      SVMp = new svm_problem();
      SVMp.l = ndatos;
      SVMp.y = new double[SVMp.l];
      SVMp.x = new svm_node[SVMp.l][nentradas + 1];
      for (int l = 0; l < SVMp.l; l++) {
        for (int n = 0; n < Attributes.getInputNumAttributes() + 1; n++) {
          SVMp.x[l][n] = new svm_node();
        }
      }

      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);

        SVMp.y[i] = inst.getAllOutputValues()[0];
        for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
          SVMp.x[i][n].index = n;
          SVMp.x[i][n].value = inst.getAllInputValues()[n];
          SVMp.y[i] = inst.getAllOutputValues()[0];
        }
        // end of instance
        SVMp.x[i][nentradas].index = -1;
      }
      if (svm.svm_check_parameter(SVMp, SVMparam) != null) {
        System.out.println("SVM parameter error in training:");
        System.out.println(svm.svm_check_parameter(SVMp, SVMparam));
        System.exit(-1);
      }
      // train the SVM
      if (ndatos > 0) {
        svr = svm.svm_train(SVMp, SVMparam);
      }
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);
        X[i][0] = new String(String.valueOf(inst.getAllOutputValues()[0]));
        //			the values used for regression
        SVMn = new svm_node[Attributes.getInputNumAttributes() + 1];
        for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
          SVMn[n] = new svm_node();
          SVMn[n].index = n;
          SVMn[n].value = inst.getAllInputValues()[n];
        }
        SVMn[nentradas] = new svm_node();
        SVMn[nentradas].index = -1;
        // pedict the class
        X[i][1] = new String(String.valueOf((svm.svm_predict(svr, SVMn))));
      }
    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    write_results(output_train_name);
    /** ************************************************************************************ */
    try {

      // Load in memory a dataset that contains a classification
      // problem
      IS.readSet(input_test_name, false);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][2]; // matrix with transformed data
      // data

      mostCommon = new String[nvariables];

      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);
        X[i][0] = new String(String.valueOf(inst.getAllOutputValues()[0]));

        SVMn = new svm_node[Attributes.getInputNumAttributes() + 1];
        for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
          SVMn[n] = new svm_node();
          SVMn[n].index = n;
          SVMn[n].value = inst.getAllInputValues()[n];
        }
        SVMn[nentradas] = new svm_node();
        SVMn[nentradas].index = -1;
        // pedict the class
        X[i][1] = new String(String.valueOf(svm.svm_predict(svr, SVMn)));
      }
    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    System.out.println("escribiendo test");
    write_results(output_test_name);
  }
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    double[] outputs;
    double[] outputs2;
    try {
      FileWriter file_write = new FileWriter(output_train_name);

      try {

        // Load in memory a dataset that contains a classification problem
        IS.readSet(input_train_name, true);
        int in = 0;
        int out = 0;
        int in2 = 0;
        int out2 = 0;
        int lastMissing = -1;
        boolean fin = false;
        boolean stepNext = false;

        ndatos = IS.getNumInstances();
        nvariables = Attributes.getNumAttributes();
        nentradas = Attributes.getInputNumAttributes();
        nsalidas = Attributes.getOutputNumAttributes();

        String[] row = null;
        X = new Vector[ndatos]; // matrix with transformed data
        for (int i = 0; i < ndatos; i++) X[i] = new Vector();

        timesSeen = new FreqList[nvariables];
        mostCommon = new String[nvariables];

        file_write.write(IS.getHeader());

        // now, print the normalized data
        file_write.write("@data\n");

        // now, search for missed data, and replace them with
        // the most common value

        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);
          in = 0;
          out = 0;
          row = new String[nvariables];

          for (int j = 0; j < nvariables; j++) {
            Attribute a = Attributes.getAttribute(j);

            direccion = a.getDirectionAttribute();
            tipo = a.getType();

            if (direccion == Attribute.INPUT) {
              if (tipo != Attribute.NOMINAL && !inst.existsAnyMissingValue()) {
                row[j] = new String(String.valueOf(inst.getInputRealValues(in)));
              } else {
                if (!inst.existsAnyMissingValue()) row[j] = inst.getInputNominalValues(in);
                else {
                  // missing data
                  outputs = inst.getAllOutputValues();
                  in2 = 0;
                  out2 = 0;
                  for (int attr = 0; attr < nvariables; attr++) {
                    Attribute b = Attributes.getAttribute(attr);
                    direccion = b.getDirectionAttribute();
                    tipo = b.getType();
                    if (direccion == Attribute.INPUT) {
                      if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in2)) {
                        row[attr] = new String(String.valueOf(inst.getInputRealValues(in2)));
                      } else {
                        if (!inst.getInputMissingValues(in2))
                          row[attr] = inst.getInputNominalValues(in2);
                      }
                      in2++;
                    } else {
                      if (direccion == Attribute.OUTPUT) {
                        if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out2)) {
                          row[attr] = new String(String.valueOf(inst.getOutputRealValues(out2)));
                        } else {
                          if (!inst.getOutputMissingValues(out2))
                            row[attr] = inst.getOutputNominalValues(out2);
                        }
                        out2++;
                      }
                    }
                  }
                  // make frecuencies  for each attribute
                  for (int attr = 0; attr < nvariables; attr++) {
                    Attribute b = Attributes.getAttribute(attr);

                    direccion = b.getDirectionAttribute();
                    tipo = b.getType();
                    if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) {
                      lastMissing = attr;
                      timesSeen[attr] = new FreqList();
                      for (int m = 0; m < ndatos; m++) {
                        Instance inst2 = IS.getInstance(m);
                        outputs2 = inst2.getAllOutputValues();
                        boolean sameClass = true;
                        // are they same concept instances??
                        for (int k = 0; k < nsalidas && sameClass; k++)
                          if (outputs[k] != outputs2[k]) sameClass = false;
                        if (sameClass) {
                          if (tipo != Attribute.NOMINAL && !inst2.getInputMissingValues(attr)) {
                            timesSeen[attr].AddElement(
                                new String(String.valueOf(inst2.getInputRealValues(attr))));

                          } else {
                            if (!inst2.getInputMissingValues(attr)) {
                              timesSeen[attr].AddElement(inst2.getInputNominalValues(attr));
                            }
                          }
                        }
                      }
                    }
                  }
                  for (int attr = 0; attr < nvariables; attr++) {
                    if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) {
                      timesSeen[attr].reset();
                    }
                  }
                  fin = false;
                  stepNext = false;
                  while (!fin) {
                    in2 = 0;
                    for (int attr = 0; attr < nvariables && !fin; attr++) {
                      Attribute b = Attributes.getAttribute(attr);

                      direccion = b.getDirectionAttribute();
                      tipo = b.getType();
                      if (direccion == Attribute.INPUT && inst.getInputMissingValues(in2)) {
                        if (stepNext) {
                          timesSeen[attr].iterate();
                          stepNext = false;
                        }
                        if (timesSeen[attr].outOfBounds()) {
                          stepNext = true;
                          if (attr == lastMissing) fin = true;
                          timesSeen[attr].reset();
                        }
                        if (!fin)
                          row[attr] =
                              ((ValueFreq) timesSeen[attr].getCurrent())
                                  .getValue(); // replace missing data
                      }
                      in2++;
                    }
                    if (!fin) {
                      stepNext = true;
                      file_write.write(row[0]);
                      for (int y = 1; y < nvariables; y++) {
                        file_write.write("," + row[y]);
                      }
                      file_write.write("\n");
                      // X[i].addElement(row);
                      // row = (String[])row.clone();
                    }
                  }
                }
              }
              in++;
            } else {
              if (direccion == Attribute.OUTPUT) {
                if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                  row[j] = new String(String.valueOf(inst.getOutputRealValues(out)));
                } else {
                  if (!inst.getOutputMissingValues(out)) row[j] = inst.getOutputNominalValues(out);
                  else row[j] = new String("?");
                }
                out++;
              }
            }
          }
          if (!inst.existsAnyMissingValue()) {
            file_write.write(row[0]);
            for (int y = 1; y < nvariables; y++) {
              file_write.write("," + row[y]);
            }
            file_write.write("\n");
          }
        }
      } catch (Exception e) {
        System.out.println("Dataset exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
      file_write.close();
    } catch (IOException e) {
      System.out.println("IO exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }

    /** ************************************************************************************ */
    // does a test file associated exist?
    if (input_train_name.compareTo(input_test_name) != 0) {
      try {
        FileWriter file_write = new FileWriter(output_test_name);

        try {

          // Load in memory a dataset that contains a classification problem
          IS.readSet(input_test_name, false);
          int in = 0;
          int out = 0;
          int in2 = 0;
          int out2 = 0;
          int lastMissing = -1;
          boolean fin = false;
          boolean stepNext = false;

          ndatos = IS.getNumInstances();
          nvariables = Attributes.getNumAttributes();
          nentradas = Attributes.getInputNumAttributes();
          nsalidas = Attributes.getOutputNumAttributes();

          String[] row = null;
          X = new Vector[ndatos]; // matrix with transformed data
          for (int i = 0; i < ndatos; i++) X[i] = new Vector();

          timesSeen = new FreqList[nvariables];
          mostCommon = new String[nvariables];

          file_write.write(IS.getHeader());

          // now, print the normalized data
          file_write.write("@data\n");

          // now, search for missed data, and replace them with
          // the most common value

          for (int i = 0; i < ndatos; i++) {
            Instance inst = IS.getInstance(i);
            in = 0;
            out = 0;
            row = new String[nvariables];

            for (int j = 0; j < nvariables; j++) {
              Attribute a = Attributes.getAttribute(j);

              direccion = a.getDirectionAttribute();
              tipo = a.getType();

              if (direccion == Attribute.INPUT) {
                if (tipo != Attribute.NOMINAL && !inst.existsAnyMissingValue()) {
                  row[j] = new String(String.valueOf(inst.getInputRealValues(in)));
                } else {
                  if (!inst.existsAnyMissingValue()) row[j] = inst.getInputNominalValues(in);
                  else {
                    // missing data
                    outputs = inst.getAllOutputValues();
                    in2 = 0;
                    out2 = 0;
                    for (int attr = 0; attr < nvariables; attr++) {
                      Attribute b = Attributes.getAttribute(attr);
                      direccion = b.getDirectionAttribute();
                      tipo = b.getType();
                      if (direccion == Attribute.INPUT) {
                        if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in2)) {
                          row[attr] = new String(String.valueOf(inst.getInputRealValues(in2)));
                        } else {
                          if (!inst.getInputMissingValues(in2))
                            row[attr] = inst.getInputNominalValues(in2);
                        }
                        in2++;
                      } else {
                        if (direccion == Attribute.OUTPUT) {
                          if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out2)) {
                            row[attr] = new String(String.valueOf(inst.getOutputRealValues(out2)));
                          } else {
                            if (!inst.getOutputMissingValues(out2))
                              row[attr] = inst.getOutputNominalValues(out2);
                          }
                          out2++;
                        }
                      }
                    }
                    // make frecuencies  for each attribute
                    for (int attr = 0; attr < nvariables; attr++) {
                      Attribute b = Attributes.getAttribute(attr);

                      direccion = b.getDirectionAttribute();
                      tipo = b.getType();
                      if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) {
                        lastMissing = attr;
                        timesSeen[attr] = new FreqList();
                        for (int m = 0; m < ndatos; m++) {
                          Instance inst2 = IS.getInstance(m);
                          outputs2 = inst2.getAllOutputValues();
                          boolean sameClass = true;
                          // are they same concept instances??
                          for (int k = 0; k < nsalidas && sameClass; k++)
                            if (outputs[k] != outputs2[k]) sameClass = false;
                          if (sameClass) {
                            if (tipo != Attribute.NOMINAL && !inst2.getInputMissingValues(attr)) {
                              timesSeen[attr].AddElement(
                                  new String(String.valueOf(inst2.getInputRealValues(attr))));

                            } else {
                              if (!inst2.getInputMissingValues(attr)) {
                                timesSeen[attr].AddElement(inst2.getInputNominalValues(attr));
                              }
                            }
                          }
                        }
                      }
                    }
                    for (int attr = 0; attr < nvariables; attr++) {
                      if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) {
                        timesSeen[attr].reset();
                      }
                    }
                    fin = false;
                    stepNext = false;
                    while (!fin) {
                      in2 = 0;
                      for (int attr = 0; attr < nvariables && !fin; attr++) {
                        Attribute b = Attributes.getAttribute(attr);

                        direccion = b.getDirectionAttribute();
                        tipo = b.getType();
                        if (direccion == Attribute.INPUT && inst.getInputMissingValues(in2)) {
                          if (stepNext) {
                            timesSeen[attr].iterate();
                            stepNext = false;
                          }
                          if (timesSeen[attr].outOfBounds()) {
                            stepNext = true;
                            if (attr == lastMissing) fin = true;
                            timesSeen[attr].reset();
                          }
                          if (!fin)
                            row[attr] =
                                ((ValueFreq) timesSeen[attr].getCurrent())
                                    .getValue(); // replace missing data
                        }
                        in2++;
                      }
                      if (!fin) {
                        stepNext = true;
                        file_write.write(row[0]);
                        for (int y = 1; y < nvariables; y++) {
                          file_write.write("," + row[y]);
                        }
                        file_write.write("\n");
                        // X[i].addElement(row);
                        // row = (String[])row.clone();
                      }
                    }
                  }
                }
                in++;
              } else {
                if (direccion == Attribute.OUTPUT) {
                  if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                    row[j] = new String(String.valueOf(inst.getOutputRealValues(out)));
                  } else {
                    if (!inst.getOutputMissingValues(out))
                      row[j] = inst.getOutputNominalValues(out);
                    else row[j] = new String("?");
                  }
                  out++;
                }
              }
            }
            if (!inst.existsAnyMissingValue()) {
              file_write.write(row[0]);
              for (int y = 1; y < nvariables; y++) {
                file_write.write("," + row[y]);
              }
              file_write.write("\n");
            }
          }
        } catch (Exception e) {
          System.out.println("Dataset exception = " + e);
          e.printStackTrace();
          System.exit(-1);
        }
        file_write.close();
      } catch (IOException e) {
        System.out.println("IO exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
    }
  }