예제 #1
0
  /**
   * Function to read the .dat file that contains the information of the dataset.
   *
   * @param name The reader object where the itemsets are readed.
   * @param train The flag if the file is for training
   */
  public MyDataset(String name, boolean train) {
    try {
      // create the set of instances
      IS = new InstanceSet();
      // Read the itemsets.
      IS.readSet(name, train);
    } catch (DatasetException e) {
      System.out.println("Error loading dataset instances");
      e.printStackTrace();
      System.exit(-1);
    } catch (HeaderFormatException e) {
      System.out.println("Error loading dataset instances");
      e.printStackTrace();
      System.exit(-1);
    }

    // Store Dataset file attributes
    readHeader();

    itemsets = new Vector(IS.getNumInstances());

    // read all the itemsets
    getItemsetFull();

    totalCond = this.numAllConditions();
  }
예제 #2
0
  /**
   * It reads the whole input data-set and it stores each example and its associated output value in
   * local arrays to ease their use.
   *
   * @param datasetFile String name of the file containing the dataset
   * @param train boolean It must have the value "true" if we are reading the training data-set
   * @throws IOException If there ocurs any problem with the reading of the data-set
   */
  public void readRegressionSet(String datasetFile, boolean train) throws IOException {
    try {
      // Load in memory a dataset that contains a regression problem
      IS.readSet(datasetFile, train);
      nData = IS.getNumInstances();
      nInputs = Attributes.getInputNumAttributes();
      nVars = nInputs + Attributes.getOutputNumAttributes();

      // outputIntegerheck that there is only one output variable
      if (Attributes.getOutputNumAttributes() > 1) {
        System.out.println("This algorithm can not process MIMO datasets");
        System.out.println("All outputs but the first one will be removed");
        System.exit(1);
      }
      boolean noOutputs = false;
      if (Attributes.getOutputNumAttributes() < 1) {
        System.out.println("This algorithm can not process datasets without outputs");
        System.out.println("Zero-valued output generated");
        noOutputs = true;
        System.exit(1);
      }

      // Initialice and fill our own tables
      X = new double[nData][nInputs];
      missing = new boolean[nData][nInputs];
      outputInteger = new int[nData];

      // Maximum and minimum of inputs
      emax = new double[nInputs];
      emin = new double[nInputs];
      for (int i = 0; i < nInputs; i++) {
        emax[i] = Attributes.getAttribute(i).getMaxAttribute();
        emin[i] = Attributes.getAttribute(i).getMinAttribute();
      }
      // All values are casted into double/integer
      nClasses = 0;
      for (int i = 0; i < nData; i++) {
        Instance inst = IS.getInstance(i);
        for (int j = 0; j < nInputs; j++) {
          X[i][j] = IS.getInputNumericValue(i, j); // inst.getInputRealValues(j);
          missing[i][j] = inst.getInputMissingValues(j);
          if (missing[i][j]) {
            X[i][j] = emin[j] - 1;
          }
        }

        if (noOutputs) {
          outputReal[i] = outputInteger[i] = 0;
        } else {
          outputReal[i] = IS.getOutputNumericValue(i, 0);
          outputInteger[i] = (int) outputReal[i];
        }
      }
    } catch (Exception e) {
      System.out.println("DBG: Exception in readSet");
      e.printStackTrace();
    }
    computeStatistics();
  }
예제 #3
0
파일: ABB.java 프로젝트: Navieclipse/KEEL
  /**
   * Main method for ABB, that explores the search space by pruning nodes and checking their
   * inconsistency ratio.
   */
  private void runABB() {
    boolean[] root = startSolution();
    System.arraycopy(root, 0, features, 0, root.length);

    abb(root);

    /* checks if a subset satisfies the condition (more than 0 selected features) */
    if (features == null) {
      System.err.println("ERROR: It couldn't be possible to find any solution.");
      System.exit(0);
    }
  }
예제 #4
0
파일: Main.java 프로젝트: RubelAhmed57/KEEL
 static void checkDataset() {
   Attribute[] outputs = Attributes.getOutputAttributes();
   if (outputs.length != 1) {
     LogManager.printErr("Only datasets with one output are supported");
     System.exit(1);
   }
   if (outputs[0].getType() != Attribute.NOMINAL) {
     LogManager.printErr("Output attribute should be nominal");
     System.exit(1);
   }
   Parameters.numClasses = outputs[0].getNumNominalValues();
   Parameters.numAttributes = Attributes.getInputAttributes().length;
 }
  // Read the patron file, and parse data into strings
  private void config_read(String fileParam) {
    File inputFile = new File(fileParam);

    if (inputFile == null || !inputFile.exists()) {
      System.out.println("parameter " + fileParam + " file doesn't exists!");
      System.exit(-1);
    }
    // begin the configuration read from file
    try {
      FileReader file_reader = new FileReader(inputFile);
      BufferedReader buf_reader = new BufferedReader(file_reader);
      // FileWriter file_write = new FileWriter(outputFile);

      String line;

      do {
        line = buf_reader.readLine();
      } while (line.length() == 0); // avoid empty lines for processing -> produce exec failure
      String out[] = line.split("algorithm = ");
      // alg_name = new String(out[1]); //catch the algorithm name
      // input & output filenames
      do {
        line = buf_reader.readLine();
      } while (line.length() == 0);
      out = line.split("inputData = ");
      out = out[1].split("\\s\"");
      input_train_name = new String(out[0].substring(1, out[0].length() - 1));
      input_test_name = new String(out[1].substring(0, out[1].length() - 1));
      if (input_test_name.charAt(input_test_name.length() - 1) == '"')
        input_test_name = input_test_name.substring(0, input_test_name.length() - 1);

      do {
        line = buf_reader.readLine();
      } while (line.length() == 0);
      out = line.split("outputData = ");
      out = out[1].split("\\s\"");
      output_train_name = new String(out[0].substring(1, out[0].length() - 1));
      output_test_name = new String(out[1].substring(0, out[1].length() - 1));
      if (output_test_name.charAt(output_test_name.length() - 1) == '"')
        output_test_name = output_test_name.substring(0, output_test_name.length() - 1);

      file_reader.close();

    } catch (IOException e) {
      System.out.println("IO exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
  }
예제 #6
0
파일: LVQ.java 프로젝트: triguero/Keel3.0
  private void normalizarTest() {

    int i, j, cont = 0, k;
    Instance temp;
    boolean hecho;
    double caja[];
    StringTokenizer tokens;
    boolean nulls[];

    /* Check if dataset corresponding with a classification problem */

    if (Attributes.getOutputNumAttributes() < 1) {
      System.err.println(
          "This dataset haven´t outputs, so it not corresponding to a classification problem.");
      System.exit(-1);
    } else if (Attributes.getOutputNumAttributes() > 1) {
      System.err.println("This dataset have more of one output.");
      System.exit(-1);
    }

    if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) {
      System.err.println(
          "This dataset have an input attribute with floating values, so it not corresponding to a classification problem.");
      System.exit(-1);
    }

    datosTest = new double[test.getNumInstances()][Attributes.getInputNumAttributes()];
    clasesTest = new int[test.getNumInstances()];
    caja = new double[1];

    for (i = 0; i < test.getNumInstances(); i++) {
      temp = test.getInstance(i);
      nulls = temp.getInputMissingValues();
      datosTest[i] = test.getInstance(i).getAllInputValues();
      for (j = 0; j < nulls.length; j++) if (nulls[j]) datosTest[i][j] = 0.0;
      caja = test.getInstance(i).getAllOutputValues();
      clasesTest[i] = (int) caja[0];
      for (k = 0; k < datosTest[i].length; k++) {
        if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) {
          datosTest[i][k] /= Attributes.getInputAttribute(k).getNominalValuesList().size() - 1;
        } else {
          datosTest[i][k] -= Attributes.getInputAttribute(k).getMinAttribute();
          datosTest[i][k] /=
              Attributes.getInputAttribute(k).getMaxAttribute()
                  - Attributes.getInputAttribute(k).getMinAttribute();
        }
      }
    }
  }
예제 #7
0
파일: LVQ.java 프로젝트: triguero/Keel3.0
  public LVQ(String ficheroScript) {
    super(ficheroScript);
    try {
      referencia = new InstanceSet();
      referencia.readSet(ficheroReferencia, false);

      /*Normalize the data*/
      normalizarReferencia();
    } catch (Exception e) {
      System.err.println(e);
      System.exit(1);
    }
  }
예제 #8
0
파일: ABB.java 프로젝트: Navieclipse/KEEL
    /**
     * Constructor of the Class Parametros
     *
     * @param nombreFileParametros is the pathname of input parameter file
     */
    Parametros(String nombreFileParametros) {

      try {
        int i;
        String fichero, linea, tok;
        StringTokenizer lineasFile, tokens;

        /* read the parameter file using Files class */
        fichero = Files.readFile(nombreFileParametros);
        fichero += "\n";

        /* remove all \r characters. it is neccesary for a correst use in Windows and UNIX  */
        fichero = fichero.replace('\r', ' ');

        /* extracts the differents tokens of the file */
        lineasFile = new StringTokenizer(fichero, "\n");

        i = 0;
        while (lineasFile.hasMoreTokens()) {

          linea = lineasFile.nextToken();
          i++;
          tokens = new StringTokenizer(linea, " ,\t");
          if (tokens.hasMoreTokens()) {

            tok = tokens.nextToken();
            if (tok.equalsIgnoreCase("algorithm")) nameAlgorithm = getParamString(tokens);
            else if (tok.equalsIgnoreCase("inputdata")) getInputFiles(tokens);
            else if (tok.equalsIgnoreCase("outputdata")) getOutputFiles(tokens);
            else if (tok.equalsIgnoreCase("seed")) seed = getParamLong(tokens);
            else throw new java.io.IOException("Syntax error on line " + i + ": [" + tok + "]\n");
          }
        }

      } catch (java.io.FileNotFoundException e) {
        System.err.println(e + "Parameter file");
      } catch (java.io.IOException e) {
        System.err.println(e + "Aborting program");
        System.exit(-1);
      }

      /** show the read parameter in the standard output */
      String contents = "-- Parameters echo --- \n";
      contents += "Algorithm name: " + nameAlgorithm + "\n";
      contents += "Input Train File: " + trainFileNameInput + "\n";
      contents += "Input Test File: " + testFileNameInput + "\n";
      contents += "Output Train File: " + trainFileNameOutput + "\n";
      contents += "Output Test File: " + testFileNameOutput + "\n";
      System.out.println(contents);
    }
예제 #9
0
파일: ABB.java 프로젝트: Navieclipse/KEEL
  /**
   * Removes one feature at a time, starting from the furthest on the right
   *
   * @param featuresVector solution to generate its neighbor
   * @param which number of the feature to remove starting from the right
   * @return next neighbor of the given solution with one less feature
   */
  private static boolean[] removeOne(boolean featuresVector[], int which) {
    boolean[] fv = new boolean[featuresVector.length];
    System.arraycopy(featuresVector, 0, fv, 0, fv.length);
    boolean stop = false;
    int count = 0;
    for (int i = fv.length - 1; i >= 0 && !stop; i--) {
      if (fv[i]) {
        count++;
        if (count == which) {
          fv[i] = false;
          stop = true;
        }
      }
    }

    return fv;
  }
예제 #10
0
파일: Main.java 프로젝트: micyee/granada
  /** @param args the command line arguments */
  public static void main(String[] args) {
    ParserParameters.doParse(args[0]);
    LogManager.initLogManager();

    InstanceSet is = new InstanceSet();
    try {
      is.readSet(Parameters.trainInputFile, true);
    } catch (Exception e) {
      LogManager.printErr(e.toString());
      System.exit(1);
    }
    checkDataset();

    Discretizer dis;
    String name = Parameters.algorithmName;
    dis = new FayyadDiscretizer();
    dis.buildCutPoints(is);
    dis.applyDiscretization(Parameters.trainInputFile, Parameters.trainOutputFile);
    dis.applyDiscretization(Parameters.testInputFile, Parameters.testOutputFile);
    LogManager.closeLog();
  }
예제 #11
0
파일: ABB.java 프로젝트: Navieclipse/KEEL
  /** Recursive method for ABB */
  private void abb(boolean feat[]) {
    boolean[] child;
    double measure;

    threshold = data.measureIEP(feat);

    for (int i = 0; i < cardinalidadCto(feat); i++) {
      child = removeOne(feat, i);
      measure = data.measureIEP(child);

      if (legitimate(child) && measure < threshold) {
        if (measure < data.measureIEP(features)) {
          // we keep the best found in 'features'
          System.arraycopy(child, 0, features, 0, child.length);
        }
        abb(child);
      } else { // we prune this node
        pruned.add(child);
      }
    }
  }
예제 #12
0
  // Write data matrix X to disk, in KEEL format
  private void write_results(String output) {
    // File OutputFile = new File(output_train_name.substring(1, output_train_name.length()-1));
    try {
      FileWriter file_write = new FileWriter(output);

      file_write.write(IS.getHeader());

      // now, print the normalized data
      file_write.write("@data\n");
      for (int i = 0; i < ndatos; i++) {
        file_write.write(X[i][0]);
        for (int j = 1; j < nvariables; j++) {
          file_write.write("," + X[i][j]);
        }
        file_write.write("\n");
      }
      file_write.close();
    } catch (IOException e) {
      System.out.println("IO exception = " + e);
      System.exit(-1);
    }
  }
예제 #13
0
파일: LVQ.java 프로젝트: triguero/Keel3.0
  public void ejecutar() {

    int i, j, l, m;
    double alfai;
    int nClases;

    int claseObt;

    boolean marcas[];
    boolean notFound;

    int init;
    int clasSel[];

    int baraje[];

    int pos, tmp;
    String instanciasIN[];
    String instanciasOUT[];

    long tiempo = System.currentTimeMillis();

    /* Getting the number of differents classes */

    nClases = 0;

    for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i];

    nClases++;

    /* Shuffle the train set */

    baraje = new int[datosTrain.length];

    Randomize.setSeed(semilla);

    for (i = 0; i < datosTrain.length; i++) baraje[i] = i;

    for (i = 0; i < datosTrain.length; i++) {

      pos = Randomize.Randint(i, datosTrain.length - 1);

      tmp = baraje[i];

      baraje[i] = baraje[pos];

      baraje[pos] = tmp;
    }

    /*
     * Inicialization of the flagged instaces vector for a posterior
     * elimination
     */

    marcas = new boolean[datosTrain.length];

    for (i = 0; i < datosTrain.length; i++) marcas[i] = false;

    if (datosTrain.length > 0) {

      // marcas[baraje[0]] = true; //the first instance is included always

      nSel = n_p;
      if (nSel < nClases) nSel = nClases;

    } else {

      System.err.println("Input dataset is empty");

      nSel = 0;
    }
    clasSel = new int[nClases];
    System.out.print("Selecting initial neurons... ");
    // at least, there must be 1 neuron of each class at the beginning
    init = nClases;
    for (i = 0; i < nClases && i < datosTrain.length; i++) {
      pos = Randomize.Randint(0, datosTrain.length - 1);
      tmp = 0;
      while ((clasesTrain[pos] != i || marcas[pos]) && tmp < datosTrain.length) {
        pos = (pos + 1) % datosTrain.length;
        tmp++;
      }
      if (tmp < datosTrain.length) marcas[pos] = true;
      else init--;
      // clasSel[i] = i;
    }
    for (i = init; i < Math.min(nSel, datosTrain.length); i++) {
      tmp = 0;
      pos = Randomize.Randint(0, datosTrain.length - 1);
      while (marcas[pos]) {
        pos = (pos + 1) % datosTrain.length;
        tmp++;
      }
      // if(i<nClases){
      // notFound = true;
      // do{
      // for(j=i-1;j>=0 && notFound;j--){
      // if(clasSel[j] == clasesTrain[pos])
      // notFound = false;
      // }
      // if(!notFound)
      // pos = Randomize.Randint (0, datosTrain.length-1);
      // }while(!notFound);
      // }
      // clasSel[i] = clasesTrain[pos];
      marcas[pos] = true;
      init++;
    }
    nSel = init;
    System.out.println("Initial neurons selected: " + nSel);

    /* Building of the S set from the flags */

    conjS = new double[nSel][datosTrain[0].length];

    clasesS = new int[nSel];

    for (m = 0, l = 0; m < datosTrain.length; m++) {

      if (marcas[m]) { // the instance must be copied to the solution

        for (j = 0; j < datosTrain[0].length; j++) {

          conjS[l][j] = datosTrain[m][j];
        }

        clasesS[l] = clasesTrain[m];

        l++;
      }
    }

    alfai = alpha;
    boolean change = true;
    /* Body of the LVQ algorithm. */

    // Train the network
    for (int it = 0; it < T && change; it++) {
      change = false;
      alpha = alfai;
      for (i = 1; i < datosTrain.length; i++) {
        // search for the nearest neuron to training instance
        pos = NN(nSel, conjS, datosTrain[baraje[i]]);
        // nearest neuron labels correctly the class of training
        // instance?

        if (clasesS[pos] != clasesTrain[baraje[i]]) { // NO - repel
          // the neuron
          for (j = 0; j < conjS[pos].length; j++) {
            conjS[pos][j] = conjS[pos][j] - alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]);
          }
          change = true;
        } else { // YES - migrate the neuron towards the input vector
          for (j = 0; j < conjS[pos].length; j++) {
            conjS[pos][j] = conjS[pos][j] + alpha * (datosTrain[baraje[i]][j] - conjS[pos][j]);
          }
        }
        alpha = nu * alpha;
      }
      // Shuffle again the training partition
      baraje = new int[datosTrain.length];

      for (i = 0; i < datosTrain.length; i++) baraje[i] = i;

      for (i = 0; i < datosTrain.length; i++) {

        pos = Randomize.Randint(i, datosTrain.length - 1);

        tmp = baraje[i];

        baraje[i] = baraje[pos];

        baraje[pos] = tmp;
      }
    }
    System.out.println(
        "LVQ " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s");
    // Classify the train data set
    instanciasIN = new String[datosReferencia.length];
    instanciasOUT = new String[datosReferencia.length];
    for (i = 0; i < datosReferencia.length; i++) {
      /* Classify the instance selected in this iteration */
      Attribute a = Attributes.getOutputAttribute(0);

      int tipo = a.getType();
      claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosReferencia[i], nClases);
      if (tipo != Attribute.NOMINAL) {
        instanciasIN[i] = new String(String.valueOf(clasesReferencia[i]));
        instanciasOUT[i] = new String(String.valueOf(claseObt));
      } else {
        instanciasIN[i] = new String(a.getNominalValue(clasesReferencia[i]));
        instanciasOUT[i] = new String(a.getNominalValue(claseObt));
      }
    }

    escribeSalida(
        ficheroSalida[0], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation);

    // Classify the test data set
    normalizarTest();
    instanciasIN = new String[datosTest.length];
    instanciasOUT = new String[datosTest.length];
    for (i = 0; i < datosTest.length; i++) {
      /* Classify the instance selected in this iteration */
      Attribute a = Attributes.getOutputAttribute(0);

      int tipo = a.getType();

      claseObt = KNN.evaluacionKNN2(1, conjS, clasesS, datosTest[i], nClases);
      if (tipo != Attribute.NOMINAL) {
        instanciasIN[i] = new String(String.valueOf(clasesTest[i]));
        instanciasOUT[i] = new String(String.valueOf(claseObt));
      } else {
        instanciasIN[i] = new String(a.getNominalValue(clasesTest[i]));
        instanciasOUT[i] = new String(a.getNominalValue(claseObt));
      }
    }

    escribeSalida(
        ficheroSalida[1], instanciasIN, instanciasOUT, entradas, salida, nEntradas, relation);

    // Print the network to a file
    printNetworkToFile(ficheroSalida[2], referencia.getHeader());
  }
예제 #14
0
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    double[] outputs;
    double[] outputs2;
    Instance neighbor;
    double dist, mean;
    int actual;
    int[] N = new int[nneigh];
    double[] Ndist = new double[nneigh];
    boolean allNull;
    svm_problem SVMp = null;
    svm_parameter SVMparam = new svm_parameter();
    svm_model svr = null;
    svm_node SVMn[];
    double[] outputsCandidate = null;
    boolean same = true;
    Vector instancesSelected = new Vector();
    Vector instancesSelected2 = new Vector();

    // SVM PARAMETERS
    SVMparam.C = C;
    SVMparam.cache_size = 10; // 10MB of cache
    SVMparam.degree = degree;
    SVMparam.eps = eps;
    SVMparam.gamma = gamma;
    SVMparam.nr_weight = 0;
    SVMparam.nu = nu;
    SVMparam.p = p;
    SVMparam.shrinking = shrinking;
    SVMparam.probability = 0;
    if (kernelType.compareTo("LINEAR") == 0) {
      SVMparam.kernel_type = svm_parameter.LINEAR;
    } else if (kernelType.compareTo("POLY") == 0) {
      SVMparam.kernel_type = svm_parameter.POLY;
    } else if (kernelType.compareTo("RBF") == 0) {
      SVMparam.kernel_type = svm_parameter.RBF;
    } else if (kernelType.compareTo("SIGMOID") == 0) {
      SVMparam.kernel_type = svm_parameter.SIGMOID;
    }

    SVMparam.svm_type = svm_parameter.EPSILON_SVR;

    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(input_train_name, true);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][2]; // matrix with transformed data

      mostCommon = new String[nvariables];
      SVMp = new svm_problem();
      SVMp.l = ndatos;
      SVMp.y = new double[SVMp.l];
      SVMp.x = new svm_node[SVMp.l][nentradas + 1];
      for (int l = 0; l < SVMp.l; l++) {
        for (int n = 0; n < Attributes.getInputNumAttributes() + 1; n++) {
          SVMp.x[l][n] = new svm_node();
        }
      }

      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);

        SVMp.y[i] = inst.getAllOutputValues()[0];
        for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
          SVMp.x[i][n].index = n;
          SVMp.x[i][n].value = inst.getAllInputValues()[n];
          SVMp.y[i] = inst.getAllOutputValues()[0];
        }
        // end of instance
        SVMp.x[i][nentradas].index = -1;
      }
      if (svm.svm_check_parameter(SVMp, SVMparam) != null) {
        System.out.println("SVM parameter error in training:");
        System.out.println(svm.svm_check_parameter(SVMp, SVMparam));
        System.exit(-1);
      }
      // train the SVM
      if (ndatos > 0) {
        svr = svm.svm_train(SVMp, SVMparam);
      }
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);
        X[i][0] = new String(String.valueOf(inst.getAllOutputValues()[0]));
        //			the values used for regression
        SVMn = new svm_node[Attributes.getInputNumAttributes() + 1];
        for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
          SVMn[n] = new svm_node();
          SVMn[n].index = n;
          SVMn[n].value = inst.getAllInputValues()[n];
        }
        SVMn[nentradas] = new svm_node();
        SVMn[nentradas].index = -1;
        // pedict the class
        X[i][1] = new String(String.valueOf((svm.svm_predict(svr, SVMn))));
      }
    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    write_results(output_train_name);
    /** ************************************************************************************ */
    try {

      // Load in memory a dataset that contains a classification
      // problem
      IS.readSet(input_test_name, false);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][2]; // matrix with transformed data
      // data

      mostCommon = new String[nvariables];

      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);
        X[i][0] = new String(String.valueOf(inst.getAllOutputValues()[0]));

        SVMn = new svm_node[Attributes.getInputNumAttributes() + 1];
        for (int n = 0; n < Attributes.getInputNumAttributes(); n++) {
          SVMn[n] = new svm_node();
          SVMn[n].index = n;
          SVMn[n].value = inst.getAllInputValues()[n];
        }
        SVMn[nentradas] = new svm_node();
        SVMn[nentradas].index = -1;
        // pedict the class
        X[i][1] = new String(String.valueOf(svm.svm_predict(svr, SVMn)));
      }
    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    System.out.println("escribiendo test");
    write_results(output_test_name);
  }
예제 #15
0
  /**
   * The main method of the class that includes the operations of the algorithm. It includes all the
   * operations that the algorithm has and finishes when it writes the output information into
   * files.
   */
  public void run() {

    int S[];
    int i, j, l, m;
    int nPos = 0, nNeg = 0;
    int posID;
    int nClases;
    int pos;
    int baraje[];
    int tmp;
    double conjS[][];
    int clasesS[];
    int tamS = 0;
    int claseObt;
    int cont;
    int busq;
    boolean marcas[];
    int nSel;
    double conjS2[][];
    int clasesS2[];
    double minDist, dist;

    long tiempo = System.currentTimeMillis();

    /*CNN PART*/

    /*Count of number of positive and negative examples*/
    for (i = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] == 0) nPos++;
      else nNeg++;
    }
    if (nPos > nNeg) {
      tmp = nPos;
      nPos = nNeg;
      nNeg = tmp;
      posID = 1;
    } else {
      posID = 0;
    }

    /*Inicialization of the candidates set*/
    S = new int[datosTrain.length];
    for (i = 0; i < S.length; i++) S[i] = Integer.MAX_VALUE;

    /*Inserting an element of mayority class*/
    Randomize.setSeed(semilla);
    pos = Randomize.Randint(0, clasesTrain.length - 1);
    while (clasesTrain[pos] == posID) pos = (pos + 1) % clasesTrain.length;
    S[tamS] = pos;
    tamS++;

    /*Insert all subset of minority class*/
    for (i = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] == posID) {
        S[tamS] = i;
        tamS++;
      }
    }

    /*Algorithm body. We resort randomly the instances of T and compare with the rest of S.
    If an instance doesn´t classified correctly, it is inserted in S*/
    baraje = new int[datosTrain.length];
    for (i = 0; i < datosTrain.length; i++) baraje[i] = i;
    for (i = 0; i < datosTrain.length; i++) {
      pos = Randomize.Randint(i, clasesTrain.length - 1);
      tmp = baraje[i];
      baraje[i] = baraje[pos];
      baraje[pos] = tmp;
    }

    for (i = 0; i < datosTrain.length; i++) {
      if (clasesTrain[i] != posID) { // only for mayority class instances
        /*Construction of the S set from the previous vector S*/
        conjS = new double[tamS][datosTrain[0].length];
        clasesS = new int[tamS];
        for (j = 0; j < tamS; j++) {
          for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[S[j]][l];
          clasesS[j] = clasesTrain[S[j]];
        }

        /*Do KNN to the instance*/
        claseObt = KNN.evaluacionKNN(k, conjS, clasesS, datosTrain[baraje[i]], 2);
        if (claseObt != clasesTrain[baraje[i]]) { // fail in the class, it is included in S
          Arrays.sort(S);
          busq = Arrays.binarySearch(S, baraje[i]);
          if (busq < 0) {
            S[tamS] = baraje[i];
            tamS++;
          }
        }
      }
    }

    /*Construction of the S set from the previous vector S*/
    conjS = new double[tamS][datosTrain[0].length];
    clasesS = new int[tamS];
    for (j = 0; j < tamS; j++) {
      for (l = 0; l < datosTrain[0].length; l++) conjS[j][l] = datosTrain[S[j]][l];
      clasesS[j] = clasesTrain[S[j]];
    }

    /*TOMEK LINKS PART*/

    /*Inicialization of the instance flagged vector of the S set*/
    marcas = new boolean[conjS.length];
    for (i = 0; i < conjS.length; i++) {
      marcas[i] = true;
    }
    nSel = conjS.length;

    for (i = 0; i < conjS.length; i++) {
      minDist = Double.POSITIVE_INFINITY;
      pos = 0;
      for (j = 0; j < conjS.length; j++) {
        if (i != j) {
          dist = KNN.distancia(conjS[i], conjS[j]);
          if (dist < minDist) {
            minDist = dist;
            pos = j;
          }
        }
      }
      if (clasesS[i] != clasesS[pos]) {
        if (clasesS[i] != posID) {
          if (marcas[i] == true) {
            marcas[i] = false;
            nSel--;
          }
        } else {
          if (marcas[pos] == true) {
            marcas[pos] = false;
            nSel--;
          }
        }
      }
    }

    /*Construction of the S set from the flags*/
    conjS2 = new double[nSel][conjS[0].length];
    clasesS2 = new int[nSel];
    for (m = 0, l = 0; m < conjS.length; m++) {
      if (marcas[m]) { // the instance will evaluate
        for (j = 0; j < conjS[0].length; j++) {
          conjS2[l][j] = conjS[m][j];
        }
        clasesS2[l] = clasesS[m];
        l++;
      }
    }

    System.out.println(
        "CNN_TomekLinks "
            + relation
            + " "
            + (double) (System.currentTimeMillis() - tiempo) / 1000.0
            + "s");

    OutputIS.escribeSalida(
        ficheroSalida[0], conjS2, clasesS2, entradas, salida, nEntradas, relation);
    OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida, nEntradas, relation);
  }
예제 #16
0
  /** It runs the Qstatistic */
  public void runAlgorithm() {

    int i, j, l, h;
    double conjS[][];
    double conjR[][];
    int conjN[][];
    boolean conjM[][];
    int clasesS[];
    int nSel = 0;
    Chromosome poblacion[];
    int ev = 0;
    Chromosome C[];
    int baraje[];
    int pos, tmp;
    Chromosome newPob[];
    int d;
    int tamC;
    Chromosome pobTemp[];
    int nPos = 0, nNeg = 0, posID, negID;
    double datosArt[][];
    double realArt[][];
    int nominalArt[][];
    boolean nulosArt[][];
    int clasesArt[];
    int tamS;

    long tiempo = System.currentTimeMillis();

    // Randomize.setSeed (semilla);
    posID = clasesTrain[0];
    negID = -1;
    for (i = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] != posID) {
        negID = clasesTrain[i];
        break;
      }
    }
    /* Count of number of positive and negative examples */
    for (i = 0; i < clasesTrain.length; i++) {
      if (clasesTrain[i] == posID) nPos++;
      else nNeg++;
    }
    if (nPos > nNeg) {
      tmp = nPos;
      nPos = nNeg;
      nNeg = tmp;
      tmp = posID;
      posID = negID;
      negID = tmp;
    } else {
      /*
       * tmp = posID; posID = negID; negID = tmp;
       */
    }

    if (hybrid.equalsIgnoreCase("smote + eus")) {
      if (balance) {
        tamS = 2 * nNeg;
      } else {
        tamS = nNeg + nPos + (int) (nPos * smoting);
      }
      datosArt = new double[tamS][datosTrain[0].length];
      realArt = new double[tamS][datosTrain[0].length];
      nominalArt = new int[tamS][datosTrain[0].length];
      nulosArt = new boolean[tamS][datosTrain[0].length];
      clasesArt = new int[tamS];

      SMOTE(
          datosTrain,
          realTrain,
          nominalTrain,
          nulosTrain,
          clasesTrain,
          datosArt,
          realArt,
          nominalArt,
          nulosArt,
          clasesArt,
          kSMOTE,
          ASMO,
          smoting,
          balance,
          nPos,
          posID,
          nNeg,
          negID,
          distanceEu);
    } else {
      datosArt = new double[datosTrain.length][datosTrain[0].length];
      realArt = new double[datosTrain.length][datosTrain[0].length];
      nominalArt = new int[datosTrain.length][datosTrain[0].length];
      nulosArt = new boolean[datosTrain.length][datosTrain[0].length];
      clasesArt = new int[clasesTrain.length];
      for (i = 0; i < datosTrain.length; i++) {
        for (j = 0; j < datosTrain[i].length; j++) {
          datosArt[i][j] = datosTrain[i][j];
          realArt[i][j] = realTrain[i][j];
          nominalArt[i][j] = nominalTrain[i][j];
          nulosArt[i][j] = nulosTrain[i][j];
        }
        clasesArt[i] = clasesTrain[i];
      }
    }

    /* Count of number of positive and negative examples */
    nPos = nNeg = 0;
    for (i = 0; i < clasesArt.length; i++) {
      if (clasesArt[i] == posID) nPos++;
      else nNeg++;
    }

    if (majSelection) d = nNeg / 4;
    else d = datosArt.length / 4;

    /* Random initialization of the population */
    poblacion = new Chromosome[popSize];
    baraje = new int[popSize];
    for (i = 0; i < popSize; i++)
      if (majSelection) poblacion[i] = new Chromosome(nNeg);
      else poblacion[i] = new Chromosome(datosArt.length);

    /* Initial evaluation of the population */
    for (i = 0; i < popSize; i++)
      poblacion[i].evalua(
          datosTrain,
          realTrain,
          nominalTrain,
          nulosTrain,
          clasesTrain,
          datosArt,
          realArt,
          nominalArt,
          nulosArt,
          clasesArt,
          wrapper,
          k,
          evMeas,
          majSelection,
          pFactor,
          P,
          posID,
          nPos,
          distanceEu,
          entradas,
          anteriores,
          salidasAnteriores);

    /* Until stop condition */
    while (ev < nEval) {
      C = new Chromosome[popSize];

      /* Selection(r) of C(t) from P(t) */
      for (i = 0; i < popSize; i++) baraje[i] = i;
      for (i = 0; i < popSize; i++) {
        pos = Randomize.Randint(i, popSize - 1);
        tmp = baraje[i];
        baraje[i] = baraje[pos];
        baraje[pos] = tmp;
      }
      for (i = 0; i < popSize; i++)
        if (majSelection) C[i] = new Chromosome(nNeg, poblacion[baraje[i]]);
        else C[i] = new Chromosome(datosArt.length, poblacion[baraje[i]]);

      /* Structure recombination in C(t) constructing C'(t) */
      tamC = recombinar(C, d, nNeg, nPos, majSelection);
      newPob = new Chromosome[tamC];
      for (i = 0, l = 0; i < C.length; i++) {
        if (C[i].esValido()) { // the cromosome must be copied to the
          // new poblation C'(t)
          if (majSelection) newPob[l] = new Chromosome(nNeg, C[i]);
          else newPob[l] = new Chromosome(datosArt.length, C[i]);
          l++;
        }
      }

      /* Structure evaluation in C'(t) */
      for (i = 0; i < newPob.length; i++) {
        newPob[i].evalua(
            datosTrain,
            realTrain,
            nominalTrain,
            nulosTrain,
            clasesTrain,
            datosArt,
            realArt,
            nominalArt,
            nulosArt,
            clasesArt,
            wrapper,
            k,
            evMeas,
            majSelection,
            pFactor,
            P,
            posID,
            nPos,
            distanceEu,
            entradas,
            anteriores,
            salidasAnteriores);
        ev++;
      }

      /* Selection(s) of P(t) from C'(t) and P(t-1) */
      Arrays.sort(poblacion);
      Arrays.sort(newPob);
      /*
       * If the best of C' is worse than the worst of P(t-1), then there
       * will no changes
       */
      if (tamC == 0 || newPob[0].getCalidad() < poblacion[popSize - 1].getCalidad()) {
        d--;
      } else {
        pobTemp = new Chromosome[popSize];
        for (i = 0, j = 0, l = 0; i < popSize && l < tamC; i++) {
          if (poblacion[j].getCalidad() > newPob[l].getCalidad()) {
            if (majSelection) pobTemp[i] = new Chromosome(nNeg, poblacion[j]);
            else pobTemp[i] = new Chromosome(datosArt.length, poblacion[j]);
            j++;
          } else {
            if (majSelection) pobTemp[i] = new Chromosome(nNeg, newPob[l]);
            else pobTemp[i] = new Chromosome(datosArt.length, newPob[l]);
            l++;
          }
        }
        if (l == tamC) { // there are cromosomes for copying
          for (; i < popSize; i++) {
            if (majSelection) pobTemp[i] = new Chromosome(nNeg, poblacion[j]);
            else pobTemp[i] = new Chromosome(datosArt.length, poblacion[j]);
            j++;
          }
        }
        poblacion = pobTemp;
      }

      /* Last step of the algorithm */
      if (d <= 0) {
        for (i = 1; i < popSize; i++) {
          poblacion[i].divergeCHC(r, poblacion[0], prob0to1Div);
        }
        for (i = 0; i < popSize; i++)
          if (!(poblacion[i].estaEvaluado())) {
            poblacion[i].evalua(
                datosTrain,
                realTrain,
                nominalTrain,
                nulosTrain,
                clasesTrain,
                datosArt,
                realArt,
                nominalArt,
                nulosArt,
                clasesArt,
                wrapper,
                k,
                evMeas,
                majSelection,
                pFactor,
                P,
                posID,
                nPos,
                distanceEu,
                entradas,
                anteriores,
                salidasAnteriores);
            ev++;
          }

        /* Reinicialization of d value */
        if (majSelection) d = (int) (r * (1.0 - r) * (double) nNeg);
        else d = (int) (r * (1.0 - r) * (double) datosArt.length);
      }
    }

    Arrays.sort(poblacion);

    if (majSelection) {
      nSel = poblacion[0].genesActivos() + nPos;

      /* Construction of S set from the best cromosome */
      conjS = new double[nSel][datosArt[0].length];
      conjR = new double[nSel][datosArt[0].length];
      conjN = new int[nSel][datosArt[0].length];
      conjM = new boolean[nSel][datosArt[0].length];
      clasesS = new int[nSel];
      h = 0;
      for (i = 0, l = 0; i < nNeg; i++, h++) {
        for (; clasesArt[h] == posID && h < clasesArt.length; h++) ;
        if (poblacion[0].getGen(i)) { // the instance must be copied to
          // the solution
          for (j = 0; j < datosArt[h].length; j++) {
            conjS[l][j] = datosArt[h][j];
            conjR[l][j] = realArt[h][j];
            conjN[l][j] = nominalArt[h][j];
            conjM[l][j] = nulosArt[h][j];
          }
          clasesS[l] = clasesArt[h];
          l++;
        }
      }
      for (i = 0; i < datosArt.length; i++) {
        if (clasesArt[i] == posID) {
          for (j = 0; j < datosArt[i].length; j++) {
            conjS[l][j] = datosArt[i][j];
            conjR[l][j] = realArt[i][j];
            conjN[l][j] = nominalArt[i][j];
            conjM[l][j] = nulosArt[i][j];
          }
          clasesS[l] = clasesArt[i];
          l++;
        }
      }
    } else {
      nSel = poblacion[0].genesActivos();

      /* Construction of S set from the best cromosome */
      conjS = new double[nSel][datosArt[0].length];
      conjR = new double[nSel][datosArt[0].length];
      conjN = new int[nSel][datosArt[0].length];
      conjM = new boolean[nSel][datosArt[0].length];
      clasesS = new int[nSel];
      for (i = 0, l = 0; i < datosArt.length; i++) {
        if (poblacion[0].getGen(i)) { // the instance must be copied to
          // the solution
          for (j = 0; j < datosArt[i].length; j++) {
            conjS[l][j] = datosArt[i][j];
            conjR[l][j] = realArt[i][j];
            conjN[l][j] = nominalArt[i][j];
            conjM[l][j] = nulosArt[i][j];
          }
          clasesS[l] = clasesArt[i];
          l++;
        }
      }
    }

    if (hybrid.equalsIgnoreCase("eus + smote")) {
      nPos = nNeg = 0;
      for (i = 0; i < clasesS.length; i++) {
        if (clasesS[i] == posID) nPos++;
        else nNeg++;
      }
      if (nPos < nNeg) {
        if (balance) {
          tamS = 2 * nNeg;
        } else {
          tamS = nNeg + nPos + (int) (nPos * smoting);
        }
        datosArt = new double[tamS][datosTrain[0].length];
        realArt = new double[tamS][datosTrain[0].length];
        nominalArt = new int[tamS][datosTrain[0].length];
        nulosArt = new boolean[tamS][datosTrain[0].length];
        clasesArt = new int[tamS];

        SMOTE(
            conjS,
            conjR,
            conjN,
            conjM,
            clasesS,
            datosArt,
            realArt,
            nominalArt,
            nulosArt,
            clasesArt,
            kSMOTE,
            ASMO,
            smoting,
            balance,
            nPos,
            posID,
            nNeg,
            negID,
            distanceEu);

        nSel = datosArt.length;

        /* Construction of S set from the best cromosome */
        conjS = new double[nSel][datosArt[0].length];
        conjR = new double[nSel][datosArt[0].length];
        conjN = new int[nSel][datosArt[0].length];
        conjM = new boolean[nSel][datosArt[0].length];
        clasesS = new int[nSel];
        for (i = 0; i < datosArt.length; i++) {
          for (j = 0; j < datosArt[i].length; j++) {
            conjS[i][j] = datosArt[i][j];
            conjR[i][j] = realArt[i][j];
            conjN[i][j] = nominalArt[i][j];
            conjM[i][j] = nulosArt[i][j];
          }
          clasesS[i] = clasesArt[i];
        }
      }
    }

    /*
     * for (i = 0; i < poblacion.length; i++){ for (j = 0; j <
     * poblacion[0].cuerpo.length; j++){
     * System.out.print((poblacion[i].cuerpo[j] ? 1 : 0)); }
     * System.out.println(" Calidad: " + poblacion[i].calidad); }
     */
    best = poblacion[0].cuerpo.clone();
    bestOutputs = poblacion[0].prediction.clone();
    System.out.println(
        "QstatEUSCHC "
            + relation
            + " "
            + (double) (System.currentTimeMillis() - tiempo) / 1000.0
            + "s");

    OutputIS.escribeSalida(
        ficheroSalida[0], conjR, conjN, conjM, clasesS, entradas, salida, nEntradas, relation);
    // OutputIS.escribeSalida(ficheroSalida[1], test, entradas, salida,
    // nEntradas, relation);
  }
예제 #17
0
파일: ENNRS.java 프로젝트: TheMurderer/keel
  public void ejecutar() {

    int i, j, l, m, o;

    int nClases;

    int claseObt;

    boolean marcas[];

    double conjS[][];

    int clasesS[];

    int eleS[], eleT[];

    int bestAc, aciertos;

    int temp[];

    int pos, tmp;

    long tiempo = System.currentTimeMillis();

    /*Getting the number of different classes*/

    nClases = 0;

    for (i = 0; i < clasesTrain.length; i++) if (clasesTrain[i] > nClases) nClases = clasesTrain[i];

    nClases++;

    /*Inicialization of the flagged instance vector of the S set*/

    marcas = new boolean[datosTrain.length];

    for (i = 0; i < datosTrain.length; i++) marcas[i] = false;

    /*Allocate memory for the random selection*/

    m = (int) ((porcentaje * datosTrain.length) / 100.0);

    eleS = new int[m];

    eleT = new int[datosTrain.length - m];

    temp = new int[datosTrain.length];

    for (i = 0; i < datosTrain.length; i++) temp[i] = i;

    /** Random distribution of elements in each set */
    Randomize.setSeed(semilla);

    for (i = 0; i < eleS.length; i++) {

      pos = Randomize.Randint(i, datosTrain.length - 1);

      tmp = temp[i];

      temp[i] = temp[pos];

      temp[pos] = tmp;

      eleS[i] = temp[i];
    }

    for (i = 0; i < eleT.length; i++) {

      pos = Randomize.Randint(m + i, datosTrain.length - 1);

      tmp = temp[m + i];

      temp[m + i] = temp[pos];

      temp[pos] = tmp;

      eleT[i] = temp[m + i];
    }

    for (i = 0; i < eleS.length; i++) marcas[eleS[i]] = true;

    /*Building of the S set from the flags*/

    conjS = new double[m][datosTrain[0].length];

    clasesS = new int[m];

    for (o = 0, l = 0; o < datosTrain.length; o++) {

      if (marcas[o]) { // the instance will be evaluated

        for (j = 0; j < datosTrain[0].length; j++) {

          conjS[l][j] = datosTrain[o][j];
        }

        clasesS[l] = clasesTrain[o];

        l++;
      }
    }

    /*Evaluation of the S set*/

    bestAc = 0;

    for (i = 0; i < datosTrain.length; i++) {

      claseObt = KNN.evaluacionKNN2(k, conjS, clasesS, datosTrain[i], nClases);

      if (claseObt == clasesTrain[i]) // correct clasification
      bestAc++;
    }

    /*Body of the ENNRS algorithm. Change the S set in each iteration for instances
    of the T set until get a complete sustitution*/

    for (i = 0; i < n; i++) {

      /*Preparation the set to interchange*/

      for (j = 0; j < eleS.length; j++) {

        pos = Randomize.Randint(j, eleT.length - 1);

        tmp = eleT[j];

        eleT[j] = eleT[pos];

        eleT[pos] = tmp;
      }

      /*Interchange of instances*/

      for (j = 0; j < eleS.length; j++) {

        tmp = eleS[j];

        eleS[j] = eleT[j];

        eleT[j] = tmp;

        marcas[eleS[j]] = true;

        marcas[eleT[j]] = false;
      }

      /*Building of the S set from the flags*/

      for (o = 0, l = 0; o < datosTrain.length; o++) {

        if (marcas[o]) { // the instance will evaluate

          for (j = 0; j < datosTrain[0].length; j++) {

            conjS[l][j] = datosTrain[o][j];
          }

          clasesS[l] = clasesTrain[o];

          l++;
        }
      }

      /*Evaluation of the S set*/

      aciertos = 0;

      for (j = 0; j < datosTrain.length; j++) {

        claseObt = KNN.evaluacionKNN2(k, conjS, clasesS, datosTrain[j], nClases);

        if (claseObt == clasesTrain[j]) // correct clasification
        aciertos++;
      }

      if (aciertos > bestAc) { // keep S

        bestAc = aciertos;

      } else { // undo changes

        for (j = 0; j < eleS.length; j++) {

          tmp = eleS[j];

          eleS[j] = eleT[j];

          eleT[j] = tmp;

          marcas[eleS[j]] = true;

          marcas[eleT[j]] = false;
        }
      }
    }

    /*Building of the S set from the flags*/
    /*Building of the S set from the flags*/

    for (o = 0, l = 0; o < datosTrain.length; o++) {

      if (marcas[o]) { // the instance will evaluate

        for (j = 0; j < datosTrain[0].length; j++) {

          conjS[l][j] = datosTrain[o][j];
        }

        clasesS[l] = clasesTrain[o];

        l++;
      }
    }

    System.out.println(
        "ENNRS " + relation + " " + (double) (System.currentTimeMillis() - tiempo) / 1000.0 + "s");

    // COn conjS me vale.
    int trainRealClass[][];
    int trainPrediction[][];

    trainRealClass = new int[datosTrain.length][1];
    trainPrediction = new int[datosTrain.length][1];

    // Working on training
    for (i = 0; i < datosTrain.length; i++) {
      trainRealClass[i][0] = clasesTrain[i];
      trainPrediction[i][0] = KNN.evaluate(datosTrain[i], conjS, nClases, clasesS, this.k);
    }

    KNN.writeOutput(ficheroSalida[0], trainRealClass, trainPrediction, entradas, salida, relation);

    // Working on test
    int realClass[][] = new int[datosTest.length][1];
    int prediction[][] = new int[datosTest.length][1];

    // Check  time

    for (i = 0; i < realClass.length; i++) {
      realClass[i][0] = clasesTest[i];
      prediction[i][0] = KNN.evaluate(datosTest[i], conjS, nClases, clasesS, this.k);
    }

    KNN.writeOutput(ficheroSalida[1], realClass, prediction, entradas, salida, relation);
  }
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    double[] outputs;
    double[] outputs2;
    try {
      FileWriter file_write = new FileWriter(output_train_name);

      try {

        // Load in memory a dataset that contains a classification problem
        IS.readSet(input_train_name, true);
        int in = 0;
        int out = 0;
        int in2 = 0;
        int out2 = 0;
        int lastMissing = -1;
        boolean fin = false;
        boolean stepNext = false;

        ndatos = IS.getNumInstances();
        nvariables = Attributes.getNumAttributes();
        nentradas = Attributes.getInputNumAttributes();
        nsalidas = Attributes.getOutputNumAttributes();

        String[] row = null;
        X = new Vector[ndatos]; // matrix with transformed data
        for (int i = 0; i < ndatos; i++) X[i] = new Vector();

        timesSeen = new FreqList[nvariables];
        mostCommon = new String[nvariables];

        file_write.write(IS.getHeader());

        // now, print the normalized data
        file_write.write("@data\n");

        // now, search for missed data, and replace them with
        // the most common value

        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);
          in = 0;
          out = 0;
          row = new String[nvariables];

          for (int j = 0; j < nvariables; j++) {
            Attribute a = Attributes.getAttribute(j);

            direccion = a.getDirectionAttribute();
            tipo = a.getType();

            if (direccion == Attribute.INPUT) {
              if (tipo != Attribute.NOMINAL && !inst.existsAnyMissingValue()) {
                row[j] = new String(String.valueOf(inst.getInputRealValues(in)));
              } else {
                if (!inst.existsAnyMissingValue()) row[j] = inst.getInputNominalValues(in);
                else {
                  // missing data
                  outputs = inst.getAllOutputValues();
                  in2 = 0;
                  out2 = 0;
                  for (int attr = 0; attr < nvariables; attr++) {
                    Attribute b = Attributes.getAttribute(attr);
                    direccion = b.getDirectionAttribute();
                    tipo = b.getType();
                    if (direccion == Attribute.INPUT) {
                      if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in2)) {
                        row[attr] = new String(String.valueOf(inst.getInputRealValues(in2)));
                      } else {
                        if (!inst.getInputMissingValues(in2))
                          row[attr] = inst.getInputNominalValues(in2);
                      }
                      in2++;
                    } else {
                      if (direccion == Attribute.OUTPUT) {
                        if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out2)) {
                          row[attr] = new String(String.valueOf(inst.getOutputRealValues(out2)));
                        } else {
                          if (!inst.getOutputMissingValues(out2))
                            row[attr] = inst.getOutputNominalValues(out2);
                        }
                        out2++;
                      }
                    }
                  }
                  // make frecuencies  for each attribute
                  for (int attr = 0; attr < nvariables; attr++) {
                    Attribute b = Attributes.getAttribute(attr);

                    direccion = b.getDirectionAttribute();
                    tipo = b.getType();
                    if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) {
                      lastMissing = attr;
                      timesSeen[attr] = new FreqList();
                      for (int m = 0; m < ndatos; m++) {
                        Instance inst2 = IS.getInstance(m);
                        outputs2 = inst2.getAllOutputValues();
                        boolean sameClass = true;
                        // are they same concept instances??
                        for (int k = 0; k < nsalidas && sameClass; k++)
                          if (outputs[k] != outputs2[k]) sameClass = false;
                        if (sameClass) {
                          if (tipo != Attribute.NOMINAL && !inst2.getInputMissingValues(attr)) {
                            timesSeen[attr].AddElement(
                                new String(String.valueOf(inst2.getInputRealValues(attr))));

                          } else {
                            if (!inst2.getInputMissingValues(attr)) {
                              timesSeen[attr].AddElement(inst2.getInputNominalValues(attr));
                            }
                          }
                        }
                      }
                    }
                  }
                  for (int attr = 0; attr < nvariables; attr++) {
                    if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) {
                      timesSeen[attr].reset();
                    }
                  }
                  fin = false;
                  stepNext = false;
                  while (!fin) {
                    in2 = 0;
                    for (int attr = 0; attr < nvariables && !fin; attr++) {
                      Attribute b = Attributes.getAttribute(attr);

                      direccion = b.getDirectionAttribute();
                      tipo = b.getType();
                      if (direccion == Attribute.INPUT && inst.getInputMissingValues(in2)) {
                        if (stepNext) {
                          timesSeen[attr].iterate();
                          stepNext = false;
                        }
                        if (timesSeen[attr].outOfBounds()) {
                          stepNext = true;
                          if (attr == lastMissing) fin = true;
                          timesSeen[attr].reset();
                        }
                        if (!fin)
                          row[attr] =
                              ((ValueFreq) timesSeen[attr].getCurrent())
                                  .getValue(); // replace missing data
                      }
                      in2++;
                    }
                    if (!fin) {
                      stepNext = true;
                      file_write.write(row[0]);
                      for (int y = 1; y < nvariables; y++) {
                        file_write.write("," + row[y]);
                      }
                      file_write.write("\n");
                      // X[i].addElement(row);
                      // row = (String[])row.clone();
                    }
                  }
                }
              }
              in++;
            } else {
              if (direccion == Attribute.OUTPUT) {
                if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                  row[j] = new String(String.valueOf(inst.getOutputRealValues(out)));
                } else {
                  if (!inst.getOutputMissingValues(out)) row[j] = inst.getOutputNominalValues(out);
                  else row[j] = new String("?");
                }
                out++;
              }
            }
          }
          if (!inst.existsAnyMissingValue()) {
            file_write.write(row[0]);
            for (int y = 1; y < nvariables; y++) {
              file_write.write("," + row[y]);
            }
            file_write.write("\n");
          }
        }
      } catch (Exception e) {
        System.out.println("Dataset exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
      file_write.close();
    } catch (IOException e) {
      System.out.println("IO exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }

    /** ************************************************************************************ */
    // does a test file associated exist?
    if (input_train_name.compareTo(input_test_name) != 0) {
      try {
        FileWriter file_write = new FileWriter(output_test_name);

        try {

          // Load in memory a dataset that contains a classification problem
          IS.readSet(input_test_name, false);
          int in = 0;
          int out = 0;
          int in2 = 0;
          int out2 = 0;
          int lastMissing = -1;
          boolean fin = false;
          boolean stepNext = false;

          ndatos = IS.getNumInstances();
          nvariables = Attributes.getNumAttributes();
          nentradas = Attributes.getInputNumAttributes();
          nsalidas = Attributes.getOutputNumAttributes();

          String[] row = null;
          X = new Vector[ndatos]; // matrix with transformed data
          for (int i = 0; i < ndatos; i++) X[i] = new Vector();

          timesSeen = new FreqList[nvariables];
          mostCommon = new String[nvariables];

          file_write.write(IS.getHeader());

          // now, print the normalized data
          file_write.write("@data\n");

          // now, search for missed data, and replace them with
          // the most common value

          for (int i = 0; i < ndatos; i++) {
            Instance inst = IS.getInstance(i);
            in = 0;
            out = 0;
            row = new String[nvariables];

            for (int j = 0; j < nvariables; j++) {
              Attribute a = Attributes.getAttribute(j);

              direccion = a.getDirectionAttribute();
              tipo = a.getType();

              if (direccion == Attribute.INPUT) {
                if (tipo != Attribute.NOMINAL && !inst.existsAnyMissingValue()) {
                  row[j] = new String(String.valueOf(inst.getInputRealValues(in)));
                } else {
                  if (!inst.existsAnyMissingValue()) row[j] = inst.getInputNominalValues(in);
                  else {
                    // missing data
                    outputs = inst.getAllOutputValues();
                    in2 = 0;
                    out2 = 0;
                    for (int attr = 0; attr < nvariables; attr++) {
                      Attribute b = Attributes.getAttribute(attr);
                      direccion = b.getDirectionAttribute();
                      tipo = b.getType();
                      if (direccion == Attribute.INPUT) {
                        if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in2)) {
                          row[attr] = new String(String.valueOf(inst.getInputRealValues(in2)));
                        } else {
                          if (!inst.getInputMissingValues(in2))
                            row[attr] = inst.getInputNominalValues(in2);
                        }
                        in2++;
                      } else {
                        if (direccion == Attribute.OUTPUT) {
                          if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out2)) {
                            row[attr] = new String(String.valueOf(inst.getOutputRealValues(out2)));
                          } else {
                            if (!inst.getOutputMissingValues(out2))
                              row[attr] = inst.getOutputNominalValues(out2);
                          }
                          out2++;
                        }
                      }
                    }
                    // make frecuencies  for each attribute
                    for (int attr = 0; attr < nvariables; attr++) {
                      Attribute b = Attributes.getAttribute(attr);

                      direccion = b.getDirectionAttribute();
                      tipo = b.getType();
                      if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) {
                        lastMissing = attr;
                        timesSeen[attr] = new FreqList();
                        for (int m = 0; m < ndatos; m++) {
                          Instance inst2 = IS.getInstance(m);
                          outputs2 = inst2.getAllOutputValues();
                          boolean sameClass = true;
                          // are they same concept instances??
                          for (int k = 0; k < nsalidas && sameClass; k++)
                            if (outputs[k] != outputs2[k]) sameClass = false;
                          if (sameClass) {
                            if (tipo != Attribute.NOMINAL && !inst2.getInputMissingValues(attr)) {
                              timesSeen[attr].AddElement(
                                  new String(String.valueOf(inst2.getInputRealValues(attr))));

                            } else {
                              if (!inst2.getInputMissingValues(attr)) {
                                timesSeen[attr].AddElement(inst2.getInputNominalValues(attr));
                              }
                            }
                          }
                        }
                      }
                    }
                    for (int attr = 0; attr < nvariables; attr++) {
                      if (direccion == Attribute.INPUT && inst.getInputMissingValues(attr)) {
                        timesSeen[attr].reset();
                      }
                    }
                    fin = false;
                    stepNext = false;
                    while (!fin) {
                      in2 = 0;
                      for (int attr = 0; attr < nvariables && !fin; attr++) {
                        Attribute b = Attributes.getAttribute(attr);

                        direccion = b.getDirectionAttribute();
                        tipo = b.getType();
                        if (direccion == Attribute.INPUT && inst.getInputMissingValues(in2)) {
                          if (stepNext) {
                            timesSeen[attr].iterate();
                            stepNext = false;
                          }
                          if (timesSeen[attr].outOfBounds()) {
                            stepNext = true;
                            if (attr == lastMissing) fin = true;
                            timesSeen[attr].reset();
                          }
                          if (!fin)
                            row[attr] =
                                ((ValueFreq) timesSeen[attr].getCurrent())
                                    .getValue(); // replace missing data
                        }
                        in2++;
                      }
                      if (!fin) {
                        stepNext = true;
                        file_write.write(row[0]);
                        for (int y = 1; y < nvariables; y++) {
                          file_write.write("," + row[y]);
                        }
                        file_write.write("\n");
                        // X[i].addElement(row);
                        // row = (String[])row.clone();
                      }
                    }
                  }
                }
                in++;
              } else {
                if (direccion == Attribute.OUTPUT) {
                  if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                    row[j] = new String(String.valueOf(inst.getOutputRealValues(out)));
                  } else {
                    if (!inst.getOutputMissingValues(out))
                      row[j] = inst.getOutputNominalValues(out);
                    else row[j] = new String("?");
                  }
                  out++;
                }
              }
            }
            if (!inst.existsAnyMissingValue()) {
              file_write.write(row[0]);
              for (int y = 1; y < nvariables; y++) {
                file_write.write("," + row[y]);
              }
              file_write.write("\n");
            }
          }
        } catch (Exception e) {
          System.out.println("Dataset exception = " + e);
          e.printStackTrace();
          System.exit(-1);
        }
        file_write.close();
      } catch (IOException e) {
        System.out.println("IO exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
    }
  }
예제 #19
0
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    // declarations
    double[] outputs;
    double[] outputs2;
    Instance neighbor;
    double dist, mean;
    int actual;
    Randomize rnd = new Randomize();
    Instance ex;
    gCenter kmeans = null;
    int iterations = 0;
    double E;
    double prevE;
    int totalMissing = 0;
    boolean allMissing = true;

    rnd.setSeed(semilla);
    // PROCESS
    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(input_train_name, true);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][nvariables]; // matrix with transformed data
      kmeans = new gCenter(K, ndatos, nvariables);

      timesSeen = new FreqList[nvariables];
      mostCommon = new String[nvariables];

      // first, we choose k 'means' randomly from all
      // instances
      totalMissing = 0;
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);
        if (inst.existsAnyMissingValue()) totalMissing++;
      }
      if (totalMissing == ndatos) allMissing = true;
      else allMissing = false;
      for (int numMeans = 0; numMeans < K; numMeans++) {
        do {
          actual = (int) (ndatos * rnd.Rand());
          ex = IS.getInstance(actual);
        } while (ex.existsAnyMissingValue() && !allMissing);

        kmeans.copyCenter(ex, numMeans);
      }

      // now, iterate adjusting clusters' centers and
      // instances to them
      prevE = 0;
      iterations = 0;
      do {
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          kmeans.setClusterOf(inst, i);
        }
        // set new centers
        kmeans.recalculateCenters(IS);
        // compute RMSE
        E = 0;
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          E += kmeans.distance(inst, kmeans.getClusterOf(i));
        }
        iterations++;
        // System.out.println(iterations+"\t"+E);
        if (Math.abs(prevE - E) == 0) iterations = maxIter;
        else prevE = E;
      } while (E > minError && iterations < maxIter);
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);

        in = 0;
        out = 0;

        for (int j = 0; j < nvariables; j++) {
          Attribute a = Attributes.getAttribute(j);

          direccion = a.getDirectionAttribute();
          tipo = a.getType();

          if (direccion == Attribute.INPUT) {
            if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
              X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
            } else {
              if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
              else {
                actual = kmeans.getClusterOf(i);
                X[i][j] = new String(kmeans.valueAt(actual, j));
              }
            }
            in++;
          } else {
            if (direccion == Attribute.OUTPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
              } else {
                if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              out++;
            }
          }
        }
      }
    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    write_results(output_train_name);
    /** ************************************************************************************ */
    // does a test file associated exist?
    if (input_train_name.compareTo(input_test_name) != 0) {
      try {

        // Load in memory a dataset that contains a classification problem
        IStest.readSet(input_test_name, false);
        int in = 0;
        int out = 0;

        ndatos = IStest.getNumInstances();
        nvariables = Attributes.getNumAttributes();
        nentradas = Attributes.getInputNumAttributes();
        nsalidas = Attributes.getOutputNumAttributes();

        for (int i = 0; i < ndatos; i++) {
          Instance inst = IStest.getInstance(i);

          in = 0;
          out = 0;

          for (int j = 0; j < nvariables; j++) {
            Attribute a = Attributes.getAttribute(j);

            direccion = a.getDirectionAttribute();
            tipo = a.getType();

            if (direccion == Attribute.INPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
                X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
              } else {
                if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              in++;
            } else {
              if (direccion == Attribute.OUTPUT) {
                if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                  X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
                } else {
                  if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                  else {
                    actual = kmeans.getClusterOf(i);
                    X[i][j] = new String(kmeans.valueAt(actual, j));
                  }
                }
                out++;
              }
            }
          }
        }
      } catch (Exception e) {
        System.out.println("Dataset exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
      write_results(output_test_name);
    }
  }
예제 #20
0
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(input_train_name, true);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][nvariables]; // matrix with transformed data
      boolean[] isMissed =
          new boolean[ndatos]; // vector which points out instances with missed data

      try {
        FileWriter file_write = new FileWriter(output_train_name);

        file_write.write(IS.getHeader());

        // now, print the normalized data
        file_write.write("@data\n");
        // file_write.close();
        PrintWriter pw = new PrintWriter(file_write);
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);
          if (!inst.existsAnyMissingValue()) {
            inst.printAsOriginal(pw);
            // file_write.write(inst.toString()); // DOES NOT WRITE BACK NON-DEF DIRECTION
            // ATTRIBUTES!!!!
            file_write.write("\n");
          }
        }
        pw.close();
        file_write.close();
      } catch (IOException e) {
        System.out.println("IO exception = " + e);
        System.exit(-1);
      }

    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    // does a test file associated exist?
    if (input_train_name.compareTo(input_test_name) != 0) {
      try {

        // Load in memory a dataset that contains a classification problem
        IS.readSet(input_test_name, false);
        int in = 0;
        int out = 0;

        ndatos = IS.getNumInstances();
        nvariables = Attributes.getNumAttributes();
        nentradas = Attributes.getInputNumAttributes();
        nsalidas = Attributes.getOutputNumAttributes();

        X = new String[ndatos][nvariables]; // matrix with transformed data
        boolean[] isMissed =
            new boolean[ndatos]; // vector which points out instances with missed data

        try {
          FileWriter file_write = new FileWriter(output_test_name);

          file_write.write(IS.getHeader());

          // now, print the normalized data
          file_write.write("@data\n");
          PrintWriter pw = new PrintWriter(file_write);
          for (int i = 0; i < ndatos; i++) {
            Instance inst = IS.getInstance(i);
            if (!inst.existsAnyMissingValue()) {
              inst.printAsOriginal(pw);
              file_write.write("\n");
            }
          }
          pw.close();
          file_write.close();
        } catch (IOException e) {
          System.out.println("IO exception = " + e);
          System.exit(-1);
        }

      } catch (Exception e) {
        System.out.println("Dataset exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
    }

    // write_results(); / since there ins't any data transformation, is not needed
  }