예제 #1
0
 public static Tree copyHelper(Tree t, Map<Tree, Tree> newToOld, Map<Tree, Tree> oldToNew) {
   Tree[] kids = t.children();
   Tree[] newKids = new Tree[kids.length];
   for (int i = 0, n = kids.length; i < n; i++) {
     newKids[i] = copyHelper(kids[i], newToOld, oldToNew);
   }
   TreeFactory tf = t.treeFactory();
   if (kids.length == 0) {
     Tree newLeaf = tf.newLeaf(t.label());
     newToOld.put(newLeaf, t);
     oldToNew.put(newLeaf, t);
     return newLeaf;
   }
   Tree newNode = tf.newTreeNode(t.label(), Arrays.asList(newKids));
   newToOld.put(newNode, t);
   oldToNew.put(t, newNode);
   return newNode;
 }
  /**
   * Normalize a whole tree -- one can assume that this is the root. This implementation deletes
   * empty elements (ones with nonterminal tag label '-NONE-') from the tree.
   */
  @Override
  public Tree normalizeWholeTree(Tree tree, TreeFactory tf) {
    TreeTransformer transformer1 =
        new TreeTransformer() {
          @Override
          public Tree transformTree(Tree t) {
            if (doSGappedStuff) {
              String lab = t.label().value();
              if (lab.equals("S") && includesEmptyNPSubj(t)) {
                LabelFactory lf = t.label().labelFactory();
                // Note: this changes the tree label, rather than
                // creating a new tree node.  Beware!
                t.setLabel(lf.newLabel(t.label().value() + "-G"));
              }
            }
            return t;
          }
        };
    Filter<Tree> subtreeFilter =
        new Filter<Tree>() {

          private static final long serialVersionUID = -7250433816896327901L;

          @Override
          public boolean accept(Tree t) {
            Tree[] kids = t.children();
            Label l = t.label();
            // The special Switchboard non-terminals clause.
            // Note that it deletes IP which other Treebanks might use!
            if ("RS".equals(t.label().value())
                || "RM".equals(t.label().value())
                || "IP".equals(t.label().value())
                || "CODE".equals(t.label().value())) {
              return false;
            }
            if ((l != null)
                && l.value() != null
                && (l.value().equals("-NONE-"))
                && !t.isLeaf()
                && kids.length == 1
                && kids[0].isLeaf()) {
              // Delete empty/trace nodes (ones marked '-NONE-')
              return false;
            }
            return true;
          }
        };
    Filter<Tree> nodeFilter =
        new Filter<Tree>() {

          private static final long serialVersionUID = 9000955019205336311L;

          @Override
          public boolean accept(Tree t) {
            if (t.isLeaf() || t.isPreTerminal()) {
              return true;
            }
            // The special switchboard non-terminals clause. Try keeping EDITED for now....
            // if ("EDITED".equals(t.label().value())) {
            //   return false;
            // }
            if (t.numChildren() != 1) {
              return true;
            }
            if (t.label() != null
                && t.label().value() != null
                && t.label().value().equals(t.children()[0].label().value())) {
              return false;
            }
            return true;
          }
        };
    TreeTransformer transformer2 =
        new TreeTransformer() {
          @Override
          public Tree transformTree(Tree t) {
            if (temporalAnnotation == TEMPORAL_ANY_TMP_PERCOLATED) {
              String lab = t.label().value();
              if (TmpPattern.matcher(lab).matches()) {
                Tree oldT = t;
                Tree ht;
                do {
                  ht = headFinder.determineHead(oldT);
                  // special fix for possessives! -- make noun before head
                  if (ht.label().value().equals("POS")) {
                    int j = oldT.objectIndexOf(ht);
                    if (j > 0) {
                      ht = oldT.getChild(j - 1);
                    }
                  }
                  LabelFactory lf = ht.label().labelFactory();
                  // Note: this changes the tree label, rather than
                  // creating a new tree node.  Beware!
                  ht.setLabel(lf.newLabel(ht.label().value() + "-TMP"));
                  oldT = ht;
                } while (!ht.isPreTerminal());
                if (lab.startsWith("PP")) {
                  ht = headFinder.determineHead(t);
                  // look to right
                  int j = t.objectIndexOf(ht);
                  int sz = t.children().length;
                  if (j + 1 < sz) {
                    ht = t.getChild(j + 1);
                  }
                  if (ht.label().value().startsWith("NP")) {
                    while (!ht.isLeaf()) {
                      LabelFactory lf = ht.label().labelFactory();
                      // Note: this changes the tree label, rather than
                      // creating a new tree node.  Beware!
                      ht.setLabel(lf.newLabel(ht.label().value() + "-TMP"));
                      ht = headFinder.determineHead(ht);
                    }
                  }
                }
              }
            } else if (temporalAnnotation == TEMPORAL_ALL_TERMINALS) {
              String lab = t.label().value();
              if (NPTmpPattern.matcher(lab).matches()) {
                Tree ht;
                ht = headFinder.determineHead(t);
                if (ht.isPreTerminal()) {
                  // change all tags to -TMP
                  LabelFactory lf = ht.label().labelFactory();
                  Tree[] kids = t.children();
                  for (Tree kid : kids) {
                    if (kid.isPreTerminal()) {
                      // Note: this changes the tree label, rather
                      // than creating a new tree node.  Beware!
                      kid.setLabel(lf.newLabel(kid.value() + "-TMP"));
                    }
                  }
                } else {
                  Tree oldT = t;
                  do {
                    ht = headFinder.determineHead(oldT);
                    oldT = ht;
                  } while (!ht.isPreTerminal());
                  LabelFactory lf = ht.label().labelFactory();
                  // Note: this changes the tree label, rather than
                  // creating a new tree node.  Beware!
                  ht.setLabel(lf.newLabel(ht.label().value() + "-TMP"));
                }
              }
            } else if (temporalAnnotation == TEMPORAL_ALL_NP) {
              String lab = t.label().value();
              if (NPTmpPattern.matcher(lab).matches()) {
                Tree oldT = t;
                Tree ht;
                do {
                  ht = headFinder.determineHead(oldT);
                  // special fix for possessives! -- make noun before head
                  if (ht.label().value().equals("POS")) {
                    int j = oldT.objectIndexOf(ht);
                    if (j > 0) {
                      ht = oldT.getChild(j - 1);
                    }
                  }
                  if (ht.isPreTerminal() || ht.value().startsWith("NP")) {
                    LabelFactory lf = ht.labelFactory();
                    // Note: this changes the tree label, rather than
                    // creating a new tree node.  Beware!
                    ht.setLabel(lf.newLabel(ht.label().value() + "-TMP"));
                    oldT = ht;
                  }
                } while (ht.value().startsWith("NP"));
              }
            } else if (temporalAnnotation == TEMPORAL_ALL_NP_AND_PP
                || temporalAnnotation == TEMPORAL_NP_AND_PP_WITH_NP_HEAD
                || temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP) {
              // also allow chain to start with PP
              String lab = t.value();
              if (NPTmpPattern.matcher(lab).matches() || PPTmpPattern.matcher(lab).matches()) {
                Tree oldT = t;
                do {
                  Tree ht = headFinder.determineHead(oldT);
                  // special fix for possessives! -- make noun before head
                  if (ht.value().equals("POS")) {
                    int j = oldT.objectIndexOf(ht);
                    if (j > 0) {
                      ht = oldT.getChild(j - 1);
                    }
                  } else if ((temporalAnnotation == TEMPORAL_NP_AND_PP_WITH_NP_HEAD
                          || temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP)
                      && (ht.value().equals("IN") || ht.value().equals("TO"))) {
                    // change the head to be NP if possible
                    Tree[] kidlets = oldT.children();
                    for (int k = kidlets.length - 1; k > 0; k--) {
                      if (kidlets[k].value().startsWith("NP")) {
                        ht = kidlets[k];
                      }
                    }
                  }
                  LabelFactory lf = ht.labelFactory();
                  // Note: this next bit changes the tree label, rather
                  // than creating a new tree node.  Beware!
                  if (ht.isPreTerminal() || ht.value().startsWith("NP")) {
                    ht.setLabel(lf.newLabel(ht.value() + "-TMP"));
                  }
                  if (temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP
                      && oldT.value().startsWith("PP")) {
                    oldT.setLabel(lf.newLabel(tlp.basicCategory(oldT.value())));
                  }
                  oldT = ht;
                } while (oldT.value().startsWith("NP") || oldT.value().startsWith("PP"));
              }
            } else if (temporalAnnotation == TEMPORAL_ALL_NP_PP_ADVP) {
              // also allow chain to start with PP or ADVP
              String lab = t.value();
              if (NPTmpPattern.matcher(lab).matches()
                  || PPTmpPattern.matcher(lab).matches()
                  || ADVPTmpPattern.matcher(lab).matches()) {
                Tree oldT = t;
                do {
                  Tree ht = headFinder.determineHead(oldT);
                  // special fix for possessives! -- make noun before head
                  if (ht.value().equals("POS")) {
                    int j = oldT.objectIndexOf(ht);
                    if (j > 0) {
                      ht = oldT.getChild(j - 1);
                    }
                  }
                  // Note: this next bit changes the tree label, rather
                  // than creating a new tree node.  Beware!
                  if (ht.isPreTerminal() || ht.value().startsWith("NP")) {
                    LabelFactory lf = ht.labelFactory();
                    ht.setLabel(lf.newLabel(ht.value() + "-TMP"));
                  }
                  oldT = ht;
                } while (oldT.value().startsWith("NP"));
              }
            } else if (temporalAnnotation == TEMPORAL_9) {
              // also allow chain to start with PP or ADVP
              String lab = t.value();
              if (NPTmpPattern.matcher(lab).matches()
                  || PPTmpPattern.matcher(lab).matches()
                  || ADVPTmpPattern.matcher(lab).matches()) {
                // System.err.println("TMP: Annotating " + t);
                addTMP9(t);
              }
            } else if (temporalAnnotation == TEMPORAL_ACL03PCFG) {
              String lab = t.label().value();
              if (lab != null && NPTmpPattern.matcher(lab).matches()) {
                Tree oldT = t;
                Tree ht;
                do {
                  ht = headFinder.determineHead(oldT);
                  // special fix for possessives! -- make noun before head
                  if (ht.label().value().equals("POS")) {
                    int j = oldT.objectIndexOf(ht);
                    if (j > 0) {
                      ht = oldT.getChild(j - 1);
                    }
                  }
                  oldT = ht;
                } while (!ht.isPreTerminal());
                if (!onlyTagAnnotateNstar || ht.label().value().startsWith("N")) {
                  LabelFactory lf = ht.label().labelFactory();
                  // Note: this changes the tree label, rather than
                  // creating a new tree node.  Beware!
                  ht.setLabel(lf.newLabel(ht.label().value() + "-TMP"));
                }
              }
            }
            if (doAdverbialNP) {
              String lab = t.value();
              if (NPAdvPattern.matcher(lab).matches()) {
                Tree oldT = t;
                Tree ht;
                do {
                  ht = headFinder.determineHead(oldT);
                  // special fix for possessives! -- make noun before head
                  if (ht.label().value().equals("POS")) {
                    int j = oldT.objectIndexOf(ht);
                    if (j > 0) {
                      ht = oldT.getChild(j - 1);
                    }
                  }
                  if (ht.isPreTerminal() || ht.value().startsWith("NP")) {
                    LabelFactory lf = ht.labelFactory();
                    // Note: this changes the tree label, rather than
                    // creating a new tree node.  Beware!
                    ht.setLabel(lf.newLabel(ht.label().value() + "-ADV"));
                    oldT = ht;
                  }
                } while (ht.value().startsWith("NP"));
              }
            }
            return t;
          }
        };
    // if there wasn't an empty nonterminal at the top, but an S, wrap it.
    if (tree.label().value().equals("S")) {
      tree = tf.newTreeNode("ROOT", Collections.singletonList(tree));
    }
    // repair for the phrasal VB in Switchboard (PTB version 3) that should be a VP
    for (Tree subtree : tree) {
      if (subtree.isPhrasal() && "VB".equals(subtree.label().value())) {
        subtree.setValue("VP");
      }
    }
    tree = tree.transform(transformer1);
    if (tree == null) {
      return null;
    }
    tree = tree.prune(subtreeFilter, tf);
    if (tree == null) {
      return null;
    }
    tree = tree.spliceOut(nodeFilter, tf);
    if (tree == null) {
      return null;
    }
    return tree.transform(transformer2, tf);
  }