コード例 #1
0
 /**
  * Takes a Tree and a collinizer and returns a Collection of {@link Constituent}s for PARSEVAL
  * evaluation. Some notes on this particular parseval:
  *
  * <ul>
  *   <li>It is character-based, which allows it to be used on segmentation/parsing combination
  *       evaluation.
  *   <li>whether it gives you labeled or unlabeled bracketings depends on the value of the <code>
  *       labelConstituents</code> parameter
  * </ul>
  *
  * (Note that I haven't checked this rigorously yet with the PARSEVAL definition -- Roger.)
  */
 public static Collection<Constituent> parsevalObjectify(
     Tree t, TreeTransformer collinizer, boolean labelConstituents) {
   Collection<Constituent> spans = new ArrayList<Constituent>();
   Tree t1 = collinizer.transformTree(t);
   if (t1 == null) {
     return spans;
   }
   for (Tree node : t1) {
     if (node.isLeaf() || node.isPreTerminal() || (node != t1 && node.parent(t1) == null)) {
       continue;
     }
     int leftEdge = t1.leftCharEdge(node);
     int rightEdge = t1.rightCharEdge(node);
     if (labelConstituents) spans.add(new LabeledConstituent(leftEdge, rightEdge, node.label()));
     else spans.add(new SimpleConstituent(leftEdge, rightEdge));
   }
   return spans;
 }
  protected static void updateTreeLabels(
      Tree root, Tree tree, MutableInteger offset, MutableInteger leafIndex) {
    if (tree.isLeaf()) {
      leafIndex.value++;
      return;
    }
    String labelValue = tree.label().value().toUpperCase();
    int begin = root.leftCharEdge(tree);
    int end = root.rightCharEdge(tree);
    // System.out.println(labelValue+"("+begin+","+end+")");
    int length = end - begin;

    // apply offset to begin extent
    begin += offset.value;

    // calculate offset delta based on label
    if (double_quote_lable_pattern.matcher(labelValue).matches() && length > 1) {
      offset.value--;
      log.debug("Quotes label pattern fired: " + offset);
    } else if (bracket_label_pattern.matcher(labelValue).matches()) {
      offset.value -= 4;
      log.debug("Bracket label pattern fired: " + offset);
    } else if (tree.isPreTerminal()) {
      Tree leaf = tree.firstChild();
      String text = leaf.label().value();
      Matcher matcher = escaped_char_pattern.matcher(text);
      while (matcher.find()) {
        offset.value--;
      }
    }

    for (Tree child : tree.children()) updateTreeLabels(root, child, offset, leafIndex);

    // apply offset to end extent
    end += offset.value;

    // set begin and end offsets on node
    MapLabel label = new MapLabel(tree.label());
    label.put(BEGIN_KEY, begin);
    label.put(END_KEY, end);
    label.put(MapLabel.INDEX_KEY, leafIndex.value);
    tree.setLabel(label);
  }