Example #1
0
 private static void transformCC(Tree t, List<Tree> left, Tree conj, List<Tree> right) {
   TreeFactory tf = t.treeFactory();
   LabelFactory lf = t.label().labelFactory();
   Tree leftQP = tf.newTreeNode(lf.newLabel("NP"), left);
   Tree rightQP = tf.newTreeNode(lf.newLabel("NP"), right);
   List<Tree> newChildren = new ArrayList<Tree>();
   newChildren.add(leftQP);
   newChildren.add(conj);
   newChildren.add(rightQP);
   t.setChildren(newChildren);
 }
    public Tree transformTree(Tree tree) {
      Label lab = tree.label();
      if (tree.isLeaf()) {
        Tree leaf = tf.newLeaf(lab);
        leaf.setScore(tree.score());
        return leaf;
      }
      String s = lab.value();
      s = treebankLanguagePack().basicCategory(s);
      s = treebankLanguagePack().stripGF(s);
      int numKids = tree.numChildren();
      List<Tree> children = new ArrayList<Tree>(numKids);
      for (int cNum = 0; cNum < numKids; cNum++) {
        Tree child = tree.getChild(cNum);
        Tree newChild = transformTree(child);
        children.add(newChild);
      }
      CategoryWordTag newLabel = new CategoryWordTag(lab);
      newLabel.setCategory(s);
      if (lab instanceof HasTag) {
        String tag = ((HasTag) lab).tag();
        tag = treebankLanguagePack().basicCategory(tag);
        tag = treebankLanguagePack().stripGF(tag);

        newLabel.setTag(tag);
      }
      Tree node = tf.newTreeNode(newLabel, children);
      node.setScore(tree.score());
      return node;
    }
 public Tree transformTree(Tree tree) {
   Label lab = tree.label();
   if (tree.isLeaf()) {
     Tree leaf = tf.newLeaf(lab);
     leaf.setScore(tree.score());
     return leaf;
   }
   String s = lab.value();
   s = treebankLanguagePack().basicCategory(s);
   int numKids = tree.numChildren();
   List<Tree> children = new ArrayList<Tree>(numKids);
   for (int cNum = 0; cNum < numKids; cNum++) {
     Tree child = tree.getChild(cNum);
     Tree newChild = transformTree(child);
     // cdm 2007: for just subcategory stripping, null shouldn't happen
     // if (newChild != null) {
     children.add(newChild);
     // }
   }
   // if (children.isEmpty()) {
   //   return null;
   // }
   CategoryWordTag newLabel = new CategoryWordTag(lab);
   newLabel.setCategory(s);
   if (lab instanceof HasTag) {
     String tag = ((HasTag) lab).tag();
     tag = treebankLanguagePack().basicCategory(tag);
     newLabel.setTag(tag);
   }
   Tree node = tf.newTreeNode(newLabel, children);
   node.setScore(tree.score());
   return node;
 }
  public Tree transformTree(Tree tree) {
    TreeFactory tf = tree.treeFactory();
    String tag = tree.label().value();
    if (tree.isPreTerminal()) {
      String word = tree.firstChild().label().value();

      List<Tree> newPreterms = new ArrayList<>();
      for (int i = 0, size = word.length(); i < size; i++) {
        String singleCharLabel = new String(new char[] {word.charAt(i)});
        Tree newLeaf = tf.newLeaf(singleCharLabel);
        String suffix;
        if (useTwoCharTags) {
          if (word.length() == 1 || i == 0) {
            suffix = "_S";
          } else {
            suffix = "_M";
          }
        } else {
          if (word.length() == 1) {
            suffix = "_S";
          } else if (i == 0) {
            suffix = "_B";
          } else if (i == word.length() - 1) {
            suffix = "_E";
          } else {
            suffix = "_M";
          }
        }
        newPreterms.add(tf.newTreeNode(tag + suffix, Collections.<Tree>singletonList(newLeaf)));
      }
      return tf.newTreeNode(tag, newPreterms);
    } else {
      List<Tree> newChildren = new ArrayList<>();
      for (int i = 0; i < tree.children().length; i++) {
        Tree child = tree.children()[i];
        newChildren.add(transformTree(child));
      }
      return tf.newTreeNode(tag, newChildren);
    }
  }
Example #5
0
 public static Tree copyHelper(Tree t, Map<Tree, Tree> newToOld, Map<Tree, Tree> oldToNew) {
   Tree[] kids = t.children();
   Tree[] newKids = new Tree[kids.length];
   for (int i = 0, n = kids.length; i < n; i++) {
     newKids[i] = copyHelper(kids[i], newToOld, oldToNew);
   }
   TreeFactory tf = t.treeFactory();
   if (kids.length == 0) {
     Tree newLeaf = tf.newLeaf(t.label());
     newToOld.put(newLeaf, t);
     oldToNew.put(newLeaf, t);
     return newLeaf;
   }
   Tree newNode = tf.newTreeNode(t.label(), Arrays.asList(newKids));
   newToOld.put(newNode, t);
   oldToNew.put(t, newNode);
   return newNode;
 }
Example #6
0
  private static void transformQP(Tree t) {
    List<Tree> children = t.getChildrenAsList();
    TreeFactory tf = t.treeFactory();
    LabelFactory lf = t.label().labelFactory();

    // create the new XS having the first two children of the QP
    Tree left = tf.newTreeNode(lf.newLabel("XS"), null);
    for (int i = 0; i < 2; i++) {
      left.addChild(children.get(i));
    }

    // remove all the two first children of t before
    for (int i = 0; i < 2; i++) {
      t.removeChild(0);
    }

    // add XS as the first child
    t.addChild(0, left);
  }
  /**
   * Normalize a whole tree -- one can assume that this is the root. This implementation deletes
   * empty elements (ones with nonterminal tag label '-NONE-') from the tree.
   */
  @Override
  public Tree normalizeWholeTree(Tree tree, TreeFactory tf) {
    TreeTransformer transformer1 =
        new TreeTransformer() {
          @Override
          public Tree transformTree(Tree t) {
            if (doSGappedStuff) {
              String lab = t.label().value();
              if (lab.equals("S") && includesEmptyNPSubj(t)) {
                LabelFactory lf = t.label().labelFactory();
                // Note: this changes the tree label, rather than
                // creating a new tree node.  Beware!
                t.setLabel(lf.newLabel(t.label().value() + "-G"));
              }
            }
            return t;
          }
        };
    Filter<Tree> subtreeFilter =
        new Filter<Tree>() {

          private static final long serialVersionUID = -7250433816896327901L;

          @Override
          public boolean accept(Tree t) {
            Tree[] kids = t.children();
            Label l = t.label();
            // The special Switchboard non-terminals clause.
            // Note that it deletes IP which other Treebanks might use!
            if ("RS".equals(t.label().value())
                || "RM".equals(t.label().value())
                || "IP".equals(t.label().value())
                || "CODE".equals(t.label().value())) {
              return false;
            }
            if ((l != null)
                && l.value() != null
                && (l.value().equals("-NONE-"))
                && !t.isLeaf()
                && kids.length == 1
                && kids[0].isLeaf()) {
              // Delete empty/trace nodes (ones marked '-NONE-')
              return false;
            }
            return true;
          }
        };
    Filter<Tree> nodeFilter =
        new Filter<Tree>() {

          private static final long serialVersionUID = 9000955019205336311L;

          @Override
          public boolean accept(Tree t) {
            if (t.isLeaf() || t.isPreTerminal()) {
              return true;
            }
            // The special switchboard non-terminals clause. Try keeping EDITED for now....
            // if ("EDITED".equals(t.label().value())) {
            //   return false;
            // }
            if (t.numChildren() != 1) {
              return true;
            }
            if (t.label() != null
                && t.label().value() != null
                && t.label().value().equals(t.children()[0].label().value())) {
              return false;
            }
            return true;
          }
        };
    TreeTransformer transformer2 =
        new TreeTransformer() {
          @Override
          public Tree transformTree(Tree t) {
            if (temporalAnnotation == TEMPORAL_ANY_TMP_PERCOLATED) {
              String lab = t.label().value();
              if (TmpPattern.matcher(lab).matches()) {
                Tree oldT = t;
                Tree ht;
                do {
                  ht = headFinder.determineHead(oldT);
                  // special fix for possessives! -- make noun before head
                  if (ht.label().value().equals("POS")) {
                    int j = oldT.objectIndexOf(ht);
                    if (j > 0) {
                      ht = oldT.getChild(j - 1);
                    }
                  }
                  LabelFactory lf = ht.label().labelFactory();
                  // Note: this changes the tree label, rather than
                  // creating a new tree node.  Beware!
                  ht.setLabel(lf.newLabel(ht.label().value() + "-TMP"));
                  oldT = ht;
                } while (!ht.isPreTerminal());
                if (lab.startsWith("PP")) {
                  ht = headFinder.determineHead(t);
                  // look to right
                  int j = t.objectIndexOf(ht);
                  int sz = t.children().length;
                  if (j + 1 < sz) {
                    ht = t.getChild(j + 1);
                  }
                  if (ht.label().value().startsWith("NP")) {
                    while (!ht.isLeaf()) {
                      LabelFactory lf = ht.label().labelFactory();
                      // Note: this changes the tree label, rather than
                      // creating a new tree node.  Beware!
                      ht.setLabel(lf.newLabel(ht.label().value() + "-TMP"));
                      ht = headFinder.determineHead(ht);
                    }
                  }
                }
              }
            } else if (temporalAnnotation == TEMPORAL_ALL_TERMINALS) {
              String lab = t.label().value();
              if (NPTmpPattern.matcher(lab).matches()) {
                Tree ht;
                ht = headFinder.determineHead(t);
                if (ht.isPreTerminal()) {
                  // change all tags to -TMP
                  LabelFactory lf = ht.label().labelFactory();
                  Tree[] kids = t.children();
                  for (Tree kid : kids) {
                    if (kid.isPreTerminal()) {
                      // Note: this changes the tree label, rather
                      // than creating a new tree node.  Beware!
                      kid.setLabel(lf.newLabel(kid.value() + "-TMP"));
                    }
                  }
                } else {
                  Tree oldT = t;
                  do {
                    ht = headFinder.determineHead(oldT);
                    oldT = ht;
                  } while (!ht.isPreTerminal());
                  LabelFactory lf = ht.label().labelFactory();
                  // Note: this changes the tree label, rather than
                  // creating a new tree node.  Beware!
                  ht.setLabel(lf.newLabel(ht.label().value() + "-TMP"));
                }
              }
            } else if (temporalAnnotation == TEMPORAL_ALL_NP) {
              String lab = t.label().value();
              if (NPTmpPattern.matcher(lab).matches()) {
                Tree oldT = t;
                Tree ht;
                do {
                  ht = headFinder.determineHead(oldT);
                  // special fix for possessives! -- make noun before head
                  if (ht.label().value().equals("POS")) {
                    int j = oldT.objectIndexOf(ht);
                    if (j > 0) {
                      ht = oldT.getChild(j - 1);
                    }
                  }
                  if (ht.isPreTerminal() || ht.value().startsWith("NP")) {
                    LabelFactory lf = ht.labelFactory();
                    // Note: this changes the tree label, rather than
                    // creating a new tree node.  Beware!
                    ht.setLabel(lf.newLabel(ht.label().value() + "-TMP"));
                    oldT = ht;
                  }
                } while (ht.value().startsWith("NP"));
              }
            } else if (temporalAnnotation == TEMPORAL_ALL_NP_AND_PP
                || temporalAnnotation == TEMPORAL_NP_AND_PP_WITH_NP_HEAD
                || temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP) {
              // also allow chain to start with PP
              String lab = t.value();
              if (NPTmpPattern.matcher(lab).matches() || PPTmpPattern.matcher(lab).matches()) {
                Tree oldT = t;
                do {
                  Tree ht = headFinder.determineHead(oldT);
                  // special fix for possessives! -- make noun before head
                  if (ht.value().equals("POS")) {
                    int j = oldT.objectIndexOf(ht);
                    if (j > 0) {
                      ht = oldT.getChild(j - 1);
                    }
                  } else if ((temporalAnnotation == TEMPORAL_NP_AND_PP_WITH_NP_HEAD
                          || temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP)
                      && (ht.value().equals("IN") || ht.value().equals("TO"))) {
                    // change the head to be NP if possible
                    Tree[] kidlets = oldT.children();
                    for (int k = kidlets.length - 1; k > 0; k--) {
                      if (kidlets[k].value().startsWith("NP")) {
                        ht = kidlets[k];
                      }
                    }
                  }
                  LabelFactory lf = ht.labelFactory();
                  // Note: this next bit changes the tree label, rather
                  // than creating a new tree node.  Beware!
                  if (ht.isPreTerminal() || ht.value().startsWith("NP")) {
                    ht.setLabel(lf.newLabel(ht.value() + "-TMP"));
                  }
                  if (temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP
                      && oldT.value().startsWith("PP")) {
                    oldT.setLabel(lf.newLabel(tlp.basicCategory(oldT.value())));
                  }
                  oldT = ht;
                } while (oldT.value().startsWith("NP") || oldT.value().startsWith("PP"));
              }
            } else if (temporalAnnotation == TEMPORAL_ALL_NP_PP_ADVP) {
              // also allow chain to start with PP or ADVP
              String lab = t.value();
              if (NPTmpPattern.matcher(lab).matches()
                  || PPTmpPattern.matcher(lab).matches()
                  || ADVPTmpPattern.matcher(lab).matches()) {
                Tree oldT = t;
                do {
                  Tree ht = headFinder.determineHead(oldT);
                  // special fix for possessives! -- make noun before head
                  if (ht.value().equals("POS")) {
                    int j = oldT.objectIndexOf(ht);
                    if (j > 0) {
                      ht = oldT.getChild(j - 1);
                    }
                  }
                  // Note: this next bit changes the tree label, rather
                  // than creating a new tree node.  Beware!
                  if (ht.isPreTerminal() || ht.value().startsWith("NP")) {
                    LabelFactory lf = ht.labelFactory();
                    ht.setLabel(lf.newLabel(ht.value() + "-TMP"));
                  }
                  oldT = ht;
                } while (oldT.value().startsWith("NP"));
              }
            } else if (temporalAnnotation == TEMPORAL_9) {
              // also allow chain to start with PP or ADVP
              String lab = t.value();
              if (NPTmpPattern.matcher(lab).matches()
                  || PPTmpPattern.matcher(lab).matches()
                  || ADVPTmpPattern.matcher(lab).matches()) {
                // System.err.println("TMP: Annotating " + t);
                addTMP9(t);
              }
            } else if (temporalAnnotation == TEMPORAL_ACL03PCFG) {
              String lab = t.label().value();
              if (lab != null && NPTmpPattern.matcher(lab).matches()) {
                Tree oldT = t;
                Tree ht;
                do {
                  ht = headFinder.determineHead(oldT);
                  // special fix for possessives! -- make noun before head
                  if (ht.label().value().equals("POS")) {
                    int j = oldT.objectIndexOf(ht);
                    if (j > 0) {
                      ht = oldT.getChild(j - 1);
                    }
                  }
                  oldT = ht;
                } while (!ht.isPreTerminal());
                if (!onlyTagAnnotateNstar || ht.label().value().startsWith("N")) {
                  LabelFactory lf = ht.label().labelFactory();
                  // Note: this changes the tree label, rather than
                  // creating a new tree node.  Beware!
                  ht.setLabel(lf.newLabel(ht.label().value() + "-TMP"));
                }
              }
            }
            if (doAdverbialNP) {
              String lab = t.value();
              if (NPAdvPattern.matcher(lab).matches()) {
                Tree oldT = t;
                Tree ht;
                do {
                  ht = headFinder.determineHead(oldT);
                  // special fix for possessives! -- make noun before head
                  if (ht.label().value().equals("POS")) {
                    int j = oldT.objectIndexOf(ht);
                    if (j > 0) {
                      ht = oldT.getChild(j - 1);
                    }
                  }
                  if (ht.isPreTerminal() || ht.value().startsWith("NP")) {
                    LabelFactory lf = ht.labelFactory();
                    // Note: this changes the tree label, rather than
                    // creating a new tree node.  Beware!
                    ht.setLabel(lf.newLabel(ht.label().value() + "-ADV"));
                    oldT = ht;
                  }
                } while (ht.value().startsWith("NP"));
              }
            }
            return t;
          }
        };
    // if there wasn't an empty nonterminal at the top, but an S, wrap it.
    if (tree.label().value().equals("S")) {
      tree = tf.newTreeNode("ROOT", Collections.singletonList(tree));
    }
    // repair for the phrasal VB in Switchboard (PTB version 3) that should be a VP
    for (Tree subtree : tree) {
      if (subtree.isPhrasal() && "VB".equals(subtree.label().value())) {
        subtree.setValue("VP");
      }
    }
    tree = tree.transform(transformer1);
    if (tree == null) {
      return null;
    }
    tree = tree.prune(subtreeFilter, tf);
    if (tree == null) {
      return null;
    }
    tree = tree.spliceOut(nodeFilter, tf);
    if (tree == null) {
      return null;
    }
    return tree.transform(transformer2, tf);
  }