// this procedure selects a leaf and a word and then splits the leaf and // creates two new leaves private void improve() { Node bestLeaf = null; // getRandomLeaf(); String bestWord = null; // Main.dict.getRandomWord(); double bestIG = 0; Node currentLeaf; String currentWord; ArrayList<String> words; ArrayList<Message> msgs; for (int i = 0; i < leaves.size(); i++) { currentLeaf = leaves.get(i); msgs = currentLeaf.getMessages(); for (int j = 0; j < msgs.size(); j++) { words = msgs.get(j).getWords(); for (int k = 0; k < words.size(); k++) { currentWord = words.get(k); double currentIG = IG(currentWord, currentLeaf); if (currentIG >= bestIG) { bestIG = currentIG; bestWord = currentWord; bestLeaf = currentLeaf; } } } } // System.out.println("Chose: "+ bestWord+" with IG: "+ bestIG); split(bestLeaf, bestWord); if (tenFirstWords.size() < 10) { tenFirstWords.add(bestWord); } // improvement leaves.remove(bestLeaf); leaves.add(bestLeaf.getLeft()); leaves.add(bestLeaf.getRight()); }
private double N(Node L, int i) { int sum = 0; ArrayList<Message> msgs = L.getMessages(); for (int j = 0; j < msgs.size(); j++) { if (msgs.get(j).getClassification() == i) sum++; } return (double) sum; }
private void split(Node leaf, String word) { leaf.setIsInnerNode(word); Node newLeafLeft; // contains word Node newLeafRight; // don't contain word ArrayList<Message> messagesWithWord = new ArrayList<Message>(); ArrayList<Message> messagesWithOutWord = new ArrayList<Message>(); ArrayList<Message> messages = leaf.getMessages(); Message msg; for (int i = 0; i < messages.size(); i++) { msg = messages.get(i); if (msg.contains(word)) { messagesWithWord.add(msg); } else { messagesWithOutWord.add(msg); } } newLeafLeft = new Node(messagesWithWord); newLeafRight = new Node(messagesWithOutWord); leaf.setLeft(newLeafLeft); leaf.setRight(newLeafRight); }
private double N(Node L) { return L.getMessages().size(); }