public Node initRoot(List<Record> data) { Node root = DecisionTree.INSTANCE.getRoot(); SampleDistribute sampleDis = root.getSampleDistribute(); for (Record record : data) { sampleDis.acceptOneRecord(record); } root.calculteEntropy(); root.setPotentialClassifierAttrs(new HashSet<>(Data.INSTANCE.getDeterminingAttributes())); return root; }
private double calcEntropy(Node parent, Collection<Node> children) { int total = parent.getTotal(); return children .stream() .map(e -> ((double) e.getTotal() / (double) total) * e.getEntropy()) .reduce(0.0, (x, y) -> x + y); }
private Collection<DecisionTree.Node> getChildrenForBestClassifierAttribute(Node node) { Collection<DecisionTree.Node> result = null; Set<String> sets = new HashSet<>(node.getPotentialClassifierAttrs()); if (sets.isEmpty()) return null; String selectedAttri = ""; double min = Double.MAX_VALUE; for (String attribute : sets) { Collection<DecisionTree.Node> children = generateChildren(node, attribute); double entropy = calcEntropy(node, children); if (entropy < min) { min = entropy; selectedAttri = attribute; result = children; } } node.setBestClassifierAttribute(selectedAttri); return result; }
public Collection<Node> generateChildren(Node node, String attribute) { Collection<Node> children = node.generateChildren(attribute); node.setChildren(new ArrayList<Node>(children)); return children; }