Пример #1
0
 int sumValues(Vector v) {
   int sum = 0;
   for (int i = 0, size = v.size(); i < size; i++) {
     sum += ((Integer) v.elementAt(i)).intValue();
   }
   return sum;
 }
Пример #2
0
  protected Vector discretizeAttribute(int attribute, int[] values, int begin, int end) {
    Vector cd = classDistribution(attribute, values, begin, end);
    if (cd.size() == 1) return new Vector();
    int numValues = sumValues(cd);
    double entAll = computeEntropy(cd, numValues);

    Vector candidateCutPoints = getCandidateCutPoints(attribute, values, begin, end);
    if (candidateCutPoints.size() == 0) return new Vector();

    int posMin = ((Integer) candidateCutPoints.elementAt(0)).intValue();
    double entMin = computePartitionEntropy(attribute, values, begin, posMin, end);
    for (int i = 1, size = candidateCutPoints.size(); i < size; i++) {
      int pos = ((Integer) candidateCutPoints.elementAt(i)).intValue();
      double ent = computePartitionEntropy(attribute, values, begin, pos, end);
      if (ent < entMin) {
        entMin = ent;
        posMin = pos;
      }
    }

    if (entMin < entAll) {
      Vector res1 = discretizeAttribute(attribute, values, begin, posMin - 1);
      double cutPoint =
          (realValues[attribute][values[posMin - 1]] + realValues[attribute][values[posMin]]) / 2.0;
      res1.addElement(new Double(cutPoint));
      Vector res2 = discretizeAttribute(attribute, values, posMin, end);
      res1.addAll(res2);
      return res1;
    }
    return new Vector();
  }
Пример #3
0
  double computeEntropy(Vector v, int numValues) {
    double ent = 0;

    for (int i = 0, size = v.size(); i < size; i++) {
      double prob = ((Integer) v.elementAt(i)).intValue();
      prob /= (double) numValues;
      ent += prob * Math.log(prob) / Math.log(2);
    }
    return -ent;
  }
Пример #4
0
  Vector getCandidateCutPoints(int attribute, int[] values, int begin, int end) {
    Vector cutPoints = new Vector();
    double valueAnt = realValues[attribute][values[begin]];

    for (int i = begin; i <= end; i++) {
      double val = realValues[attribute][values[i]];
      if (val != valueAnt) cutPoints.addElement(new Integer(i));
      valueAnt = val;
    }
    return cutPoints;
  }
Пример #5
0
  Vector classDistribution(int attribute, int[] values, int begin, int end) {
    int[] classCount = new int[Parameters.numClasses];
    for (int i = 0; i < Parameters.numClasses; i++) classCount[i] = 0;

    for (int i = begin; i <= end; i++) classCount[classOfInstances[values[i]]]++;

    Vector res = new Vector();
    for (int i = 0; i < Parameters.numClasses; i++) {
      if (classCount[i] > 0) res.addElement(new Integer(classCount[i]));
    }

    return res;
  }