Beispiel #1
0
  /**
   * Computes the distance between two instances (without previous normalization)
   *
   * @param i First instance
   * @param j Second instance
   * @return The Euclidean distance between i and j
   */
  private double distance(Instance i, Instance j) {
    double dist = 0;
    int in = 0;
    int out = 0;

    for (int l = 0; l < nvariables; l++) {
      Attribute a = Attributes.getAttribute(l);

      direccion = a.getDirectionAttribute();
      tipo = a.getType();

      if (direccion == Attribute.INPUT) {
        if (tipo != Attribute.NOMINAL && !i.getInputMissingValues(in)) {
          // real value, apply euclidean distance
          dist +=
              (i.getInputRealValues(in) - j.getInputRealValues(in))
                  * (i.getInputRealValues(in) - j.getInputRealValues(in));
        } else {
          if (!i.getInputMissingValues(in)
              && i.getInputNominalValues(in) != j.getInputNominalValues(in)) dist += 1;
        }
        in++;
      } else {
        if (direccion == Attribute.OUTPUT) {
          if (tipo != Attribute.NOMINAL && !i.getOutputMissingValues(out)) {
            dist +=
                (i.getOutputRealValues(out) - j.getOutputRealValues(out))
                    * (i.getOutputRealValues(out) - j.getOutputRealValues(out));
          } else {
            if (!i.getOutputMissingValues(out)
                && i.getOutputNominalValues(out) != j.getOutputNominalValues(out)) dist += 1;
          }
          out++;
        }
      }
    }
    return dist;
  }
Beispiel #2
0
  /** Process the training and test files provided in the parameters file to the constructor. */
  public void process() {
    // declarations
    double[] outputs;
    double[] outputs2;
    Instance neighbor;
    double dist, mean;
    int actual;
    Randomize rnd = new Randomize();
    Instance ex;
    gCenter kmeans = null;
    int iterations = 0;
    double E;
    double prevE;
    int totalMissing = 0;
    boolean allMissing = true;

    rnd.setSeed(semilla);
    // PROCESS
    try {

      // Load in memory a dataset that contains a classification problem
      IS.readSet(input_train_name, true);
      int in = 0;
      int out = 0;

      ndatos = IS.getNumInstances();
      nvariables = Attributes.getNumAttributes();
      nentradas = Attributes.getInputNumAttributes();
      nsalidas = Attributes.getOutputNumAttributes();

      X = new String[ndatos][nvariables]; // matrix with transformed data
      kmeans = new gCenter(K, ndatos, nvariables);

      timesSeen = new FreqList[nvariables];
      mostCommon = new String[nvariables];

      // first, we choose k 'means' randomly from all
      // instances
      totalMissing = 0;
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);
        if (inst.existsAnyMissingValue()) totalMissing++;
      }
      if (totalMissing == ndatos) allMissing = true;
      else allMissing = false;
      for (int numMeans = 0; numMeans < K; numMeans++) {
        do {
          actual = (int) (ndatos * rnd.Rand());
          ex = IS.getInstance(actual);
        } while (ex.existsAnyMissingValue() && !allMissing);

        kmeans.copyCenter(ex, numMeans);
      }

      // now, iterate adjusting clusters' centers and
      // instances to them
      prevE = 0;
      iterations = 0;
      do {
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          kmeans.setClusterOf(inst, i);
        }
        // set new centers
        kmeans.recalculateCenters(IS);
        // compute RMSE
        E = 0;
        for (int i = 0; i < ndatos; i++) {
          Instance inst = IS.getInstance(i);

          E += kmeans.distance(inst, kmeans.getClusterOf(i));
        }
        iterations++;
        // System.out.println(iterations+"\t"+E);
        if (Math.abs(prevE - E) == 0) iterations = maxIter;
        else prevE = E;
      } while (E > minError && iterations < maxIter);
      for (int i = 0; i < ndatos; i++) {
        Instance inst = IS.getInstance(i);

        in = 0;
        out = 0;

        for (int j = 0; j < nvariables; j++) {
          Attribute a = Attributes.getAttribute(j);

          direccion = a.getDirectionAttribute();
          tipo = a.getType();

          if (direccion == Attribute.INPUT) {
            if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
              X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
            } else {
              if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
              else {
                actual = kmeans.getClusterOf(i);
                X[i][j] = new String(kmeans.valueAt(actual, j));
              }
            }
            in++;
          } else {
            if (direccion == Attribute.OUTPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
              } else {
                if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              out++;
            }
          }
        }
      }
    } catch (Exception e) {
      System.out.println("Dataset exception = " + e);
      e.printStackTrace();
      System.exit(-1);
    }
    write_results(output_train_name);
    /** ************************************************************************************ */
    // does a test file associated exist?
    if (input_train_name.compareTo(input_test_name) != 0) {
      try {

        // Load in memory a dataset that contains a classification problem
        IStest.readSet(input_test_name, false);
        int in = 0;
        int out = 0;

        ndatos = IStest.getNumInstances();
        nvariables = Attributes.getNumAttributes();
        nentradas = Attributes.getInputNumAttributes();
        nsalidas = Attributes.getOutputNumAttributes();

        for (int i = 0; i < ndatos; i++) {
          Instance inst = IStest.getInstance(i);

          in = 0;
          out = 0;

          for (int j = 0; j < nvariables; j++) {
            Attribute a = Attributes.getAttribute(j);

            direccion = a.getDirectionAttribute();
            tipo = a.getType();

            if (direccion == Attribute.INPUT) {
              if (tipo != Attribute.NOMINAL && !inst.getInputMissingValues(in)) {
                X[i][j] = new String(String.valueOf(inst.getInputRealValues(in)));
              } else {
                if (!inst.getInputMissingValues(in)) X[i][j] = inst.getInputNominalValues(in);
                else {
                  actual = kmeans.getClusterOf(i);
                  X[i][j] = new String(kmeans.valueAt(actual, j));
                }
              }
              in++;
            } else {
              if (direccion == Attribute.OUTPUT) {
                if (tipo != Attribute.NOMINAL && !inst.getOutputMissingValues(out)) {
                  X[i][j] = new String(String.valueOf(inst.getOutputRealValues(out)));
                } else {
                  if (!inst.getOutputMissingValues(out)) X[i][j] = inst.getOutputNominalValues(out);
                  else {
                    actual = kmeans.getClusterOf(i);
                    X[i][j] = new String(kmeans.valueAt(actual, j));
                  }
                }
                out++;
              }
            }
          }
        }
      } catch (Exception e) {
        System.out.println("Dataset exception = " + e);
        e.printStackTrace();
        System.exit(-1);
      }
      write_results(output_test_name);
    }
  }