예제 #1
0
  protected Vector discretizeAttribute(int attribute, int[] values, int begin, int end) {
    Vector cd = classDistribution(attribute, values, begin, end);
    if (cd.size() == 1) return new Vector();
    int numValues = sumValues(cd);
    double entAll = computeEntropy(cd, numValues);

    Vector candidateCutPoints = getCandidateCutPoints(attribute, values, begin, end);
    if (candidateCutPoints.size() == 0) return new Vector();

    int posMin = ((Integer) candidateCutPoints.elementAt(0)).intValue();
    double entMin = computePartitionEntropy(attribute, values, begin, posMin, end);
    for (int i = 1, size = candidateCutPoints.size(); i < size; i++) {
      int pos = ((Integer) candidateCutPoints.elementAt(i)).intValue();
      double ent = computePartitionEntropy(attribute, values, begin, pos, end);
      if (ent < entMin) {
        entMin = ent;
        posMin = pos;
      }
    }

    if (entMin < entAll) {
      Vector res1 = discretizeAttribute(attribute, values, begin, posMin - 1);
      double cutPoint =
          (realValues[attribute][values[posMin - 1]] + realValues[attribute][values[posMin]]) / 2.0;
      res1.addElement(new Double(cutPoint));
      Vector res2 = discretizeAttribute(attribute, values, posMin, end);
      res1.addAll(res2);
      return res1;
    }
    return new Vector();
  }
예제 #2
0
 int sumValues(Vector v) {
   int sum = 0;
   for (int i = 0, size = v.size(); i < size; i++) {
     sum += ((Integer) v.elementAt(i)).intValue();
   }
   return sum;
 }
예제 #3
0
  double computeEntropy(Vector v, int numValues) {
    double ent = 0;

    for (int i = 0, size = v.size(); i < size; i++) {
      double prob = ((Integer) v.elementAt(i)).intValue();
      prob /= (double) numValues;
      ent += prob * Math.log(prob) / Math.log(2);
    }
    return -ent;
  }