protected Vector discretizeAttribute(int attribute, int[] values, int begin, int end) { Vector cd = classDistribution(attribute, values, begin, end); if (cd.size() == 1) return new Vector(); int numValues = sumValues(cd); double entAll = computeEntropy(cd, numValues); Vector candidateCutPoints = getCandidateCutPoints(attribute, values, begin, end); if (candidateCutPoints.size() == 0) return new Vector(); int posMin = ((Integer) candidateCutPoints.elementAt(0)).intValue(); double entMin = computePartitionEntropy(attribute, values, begin, posMin, end); for (int i = 1, size = candidateCutPoints.size(); i < size; i++) { int pos = ((Integer) candidateCutPoints.elementAt(i)).intValue(); double ent = computePartitionEntropy(attribute, values, begin, pos, end); if (ent < entMin) { entMin = ent; posMin = pos; } } if (entMin < entAll) { Vector res1 = discretizeAttribute(attribute, values, begin, posMin - 1); double cutPoint = (realValues[attribute][values[posMin - 1]] + realValues[attribute][values[posMin]]) / 2.0; res1.addElement(new Double(cutPoint)); Vector res2 = discretizeAttribute(attribute, values, posMin, end); res1.addAll(res2); return res1; } return new Vector(); }
int sumValues(Vector v) { int sum = 0; for (int i = 0, size = v.size(); i < size; i++) { sum += ((Integer) v.elementAt(i)).intValue(); } return sum; }
double computeEntropy(Vector v, int numValues) { double ent = 0; for (int i = 0, size = v.size(); i < size; i++) { double prob = ((Integer) v.elementAt(i)).intValue(); prob /= (double) numValues; ent += prob * Math.log(prob) / Math.log(2); } return -ent; }