private Distribution<Integer> getSegmentedWordLengthDistribution(Treebank tb) { // CharacterLevelTagExtender ext = new CharacterLevelTagExtender(); ClassicCounter<Integer> c = new ClassicCounter<Integer>(); for (Iterator iterator = tb.iterator(); iterator.hasNext(); ) { Tree gold = (Tree) iterator.next(); StringBuilder goldChars = new StringBuilder(); ArrayList goldYield = gold.yield(); for (Iterator wordIter = goldYield.iterator(); wordIter.hasNext(); ) { Word word = (Word) wordIter.next(); goldChars.append(word); } List<HasWord> ourWords = segment(goldChars.toString()); for (int i = 0; i < ourWords.size(); i++) { c.incrementCount(Integer.valueOf(ourWords.get(i).word().length())); } } return Distribution.getDistribution(c); }