private double entropy(Map<String, String> specifiedAttributes) { double totalExamples = records.count(); double positiveExamples = records.countPositive(specifiedAttributes); double negativeExamples = records.countNegative(specifiedAttributes); return -nlog2(positiveExamples / totalExamples) - nlog2(negativeExamples / totalExamples); }
private double entropy( String attribute, String decision, Map<String, String> specifiedAttributes) { double totalExamples = records.count(attribute, decision, specifiedAttributes); double positiveExamples = records.countPositive(attribute, decision, specifiedAttributes); double negativeExamples = records.countNegative(attribute, decision, specifiedAttributes); // logger.info("positiveExamples is --> {}.", positiveExamples); // logger.info("negativeExamples is --> {}.", negativeExamples); // logger.info("totalExamples is --> {}.", totalExamples); if (positiveExamples == 0 || negativeExamples == 0 || totalExamples == 0) return 0; return -nlog2(positiveExamples / totalExamples) - nlog2(negativeExamples / totalExamples); }
private double informationGain(String attribute, Map<String, String> specifiedAttributes) { double sum = entropy(specifiedAttributes); double examplesCount = records.count(specifiedAttributes); if (examplesCount == 0) return sum; Map<String, Set<String>> decisions = records.extractDecisions(); for (String decision : decisions.get(attribute)) { double entropyPart = entropy(attribute, decision, specifiedAttributes); // logger.info("entropyPart is --> {}.", entropyPart); double decisionCount = records.countDecisions(attribute, decision); sum += -(decisionCount / examplesCount) * entropyPart; } return sum; }