/** * Takes a Tree and a collinizer and returns a Collection of {@link Constituent}s for PARSEVAL * evaluation. Some notes on this particular parseval: * * <ul> * <li>It is character-based, which allows it to be used on segmentation/parsing combination * evaluation. * <li>whether it gives you labeled or unlabeled bracketings depends on the value of the <code> * labelConstituents</code> parameter * </ul> * * (Note that I haven't checked this rigorously yet with the PARSEVAL definition -- Roger.) */ public static Collection<Constituent> parsevalObjectify( Tree t, TreeTransformer collinizer, boolean labelConstituents) { Collection<Constituent> spans = new ArrayList<Constituent>(); Tree t1 = collinizer.transformTree(t); if (t1 == null) { return spans; } for (Tree node : t1) { if (node.isLeaf() || node.isPreTerminal() || (node != t1 && node.parent(t1) == null)) { continue; } int leftEdge = t1.leftCharEdge(node); int rightEdge = t1.rightCharEdge(node); if (labelConstituents) spans.add(new LabeledConstituent(leftEdge, rightEdge, node.label())); else spans.add(new SimpleConstituent(leftEdge, rightEdge)); } return spans; }
protected static void updateTreeLabels( Tree root, Tree tree, MutableInteger offset, MutableInteger leafIndex) { if (tree.isLeaf()) { leafIndex.value++; return; } String labelValue = tree.label().value().toUpperCase(); int begin = root.leftCharEdge(tree); int end = root.rightCharEdge(tree); // System.out.println(labelValue+"("+begin+","+end+")"); int length = end - begin; // apply offset to begin extent begin += offset.value; // calculate offset delta based on label if (double_quote_lable_pattern.matcher(labelValue).matches() && length > 1) { offset.value--; log.debug("Quotes label pattern fired: " + offset); } else if (bracket_label_pattern.matcher(labelValue).matches()) { offset.value -= 4; log.debug("Bracket label pattern fired: " + offset); } else if (tree.isPreTerminal()) { Tree leaf = tree.firstChild(); String text = leaf.label().value(); Matcher matcher = escaped_char_pattern.matcher(text); while (matcher.find()) { offset.value--; } } for (Tree child : tree.children()) updateTreeLabels(root, child, offset, leafIndex); // apply offset to end extent end += offset.value; // set begin and end offsets on node MapLabel label = new MapLabel(tree.label()); label.put(BEGIN_KEY, begin); label.put(END_KEY, end); label.put(MapLabel.INDEX_KEY, leafIndex.value); tree.setLabel(label); }