public Tree transformTree(Tree tree) {
   Label lab = tree.label();
   if (tree.isLeaf()) {
     Tree leaf = tf.newLeaf(lab);
     leaf.setScore(tree.score());
     return leaf;
   }
   String s = lab.value();
   s = treebankLanguagePack().basicCategory(s);
   int numKids = tree.numChildren();
   List<Tree> children = new ArrayList<Tree>(numKids);
   for (int cNum = 0; cNum < numKids; cNum++) {
     Tree child = tree.getChild(cNum);
     Tree newChild = transformTree(child);
     // cdm 2007: for just subcategory stripping, null shouldn't happen
     // if (newChild != null) {
     children.add(newChild);
     // }
   }
   // if (children.isEmpty()) {
   //   return null;
   // }
   CategoryWordTag newLabel = new CategoryWordTag(lab);
   newLabel.setCategory(s);
   if (lab instanceof HasTag) {
     String tag = ((HasTag) lab).tag();
     tag = treebankLanguagePack().basicCategory(tag);
     newLabel.setTag(tag);
   }
   Tree node = tf.newTreeNode(newLabel, children);
   node.setScore(tree.score());
   return node;
 }
    public Tree transformTree(Tree tree) {
      Label lab = tree.label();
      if (tree.isLeaf()) {
        Tree leaf = tf.newLeaf(lab);
        leaf.setScore(tree.score());
        return leaf;
      }
      String s = lab.value();
      s = treebankLanguagePack().basicCategory(s);
      s = treebankLanguagePack().stripGF(s);
      int numKids = tree.numChildren();
      List<Tree> children = new ArrayList<Tree>(numKids);
      for (int cNum = 0; cNum < numKids; cNum++) {
        Tree child = tree.getChild(cNum);
        Tree newChild = transformTree(child);
        children.add(newChild);
      }
      CategoryWordTag newLabel = new CategoryWordTag(lab);
      newLabel.setCategory(s);
      if (lab instanceof HasTag) {
        String tag = ((HasTag) lab).tag();
        tag = treebankLanguagePack().basicCategory(tag);
        tag = treebankLanguagePack().stripGF(tag);

        newLabel.setTag(tag);
      }
      Tree node = tf.newTreeNode(newLabel, children);
      node.setScore(tree.score());
      return node;
    }
Ejemplo n.º 3
0
 /**
  * Construct a fall through tree in case we can't parse this sentence
  *
  * @param words
  * @return a tree with X for all the internal nodes
  */
 public static Tree xTree(List<? extends HasWord> words) {
   TreeFactory lstf = new LabeledScoredTreeFactory();
   List<Tree> lst2 = new ArrayList<Tree>();
   for (HasWord obj : words) {
     String s = obj.word();
     Tree t = lstf.newLeaf(s);
     Tree t2 = lstf.newTreeNode("X", Collections.singletonList(t));
     lst2.add(t2);
   }
   return lstf.newTreeNode("X", lst2);
 }
Ejemplo n.º 4
0
  /** Build a parse tree node corresponding to an elliptic node in the parse XML. */
  private Tree buildEllipticNode(Node root) {
    Element eRoot = (Element) root;
    String constituentStr = eRoot.getNodeName();

    List<Tree> kids = new ArrayList<>();
    Tree leafNode = treeFactory.newLeaf(SpanishTreeNormalizer.EMPTY_LEAF_VALUE);
    if (leafNode.label() instanceof HasWord)
      ((HasWord) leafNode.label()).setWord(SpanishTreeNormalizer.EMPTY_LEAF_VALUE);

    kids.add(leafNode);
    Tree t = treeFactory.newTreeNode(constituentStr, kids);

    return t;
  }
Ejemplo n.º 5
0
  public static void main(String[] args) {
    if (args.length < minArgs) {
      System.out.println(usage());
      System.exit(-1);
    }

    Properties options = StringUtils.argsToProperties(args, argDefs());
    Language language = PropertiesUtils.get(options, "l", Language.English, Language.class);
    TreebankLangParserParams tlpp = language.params;
    DiskTreebank tb = null;
    String encoding = options.getProperty("l", "UTF-8");
    boolean removeBracket = PropertiesUtils.getBool(options, "b", false);

    tlpp.setInputEncoding(encoding);
    tlpp.setOutputEncoding(encoding);
    tb = tlpp.diskTreebank();

    String[] files = options.getProperty("", "").split("\\s+");
    if (files.length != 0) {
      for (String filename : files) {
        tb.loadPath(filename);
      }
    } else {
      log.info(usage());
      System.exit(-1);
    }

    PrintWriter pwo = tlpp.pw();
    String startSymbol = tlpp.treebankLanguagePack().startSymbol();
    TreeFactory tf = new LabeledScoredTreeFactory();
    int nTrees = 0;
    for (Tree t : tb) {
      if (removeBracket) {
        if (t.value().equals(startSymbol)) {
          t = t.firstChild();
        }

      } else if (!t.value().equals(startSymbol)) { // Add a bracket if it isn't already there
        t = tf.newTreeNode(startSymbol, Collections.singletonList(t));
      }
      pwo.println(t.toString());
      nTrees++;
    }
    pwo.close();
    System.err.printf("Processed %d trees.%n", nTrees);
  }
 public static Tree untransformTree(Tree tree) {
   TreeFactory tf = tree.treeFactory();
   if (tree.isPrePreTerminal()) {
     if (tree.firstChild().label().value().matches(".*_.")) {
       StringBuilder word = new StringBuilder();
       for (int i = 0; i < tree.children().length; i++) {
         Tree child = tree.children()[i];
         word.append(child.firstChild().label().value());
       }
       Tree newChild = tf.newLeaf(word.toString());
       tree.setChildren(Collections.singletonList(newChild));
     }
   } else {
     for (int i = 0; i < tree.children().length; i++) {
       Tree child = tree.children()[i];
       untransformTree(child);
     }
   }
   return tree;
 }
  public Tree transformTree(Tree tree) {
    TreeFactory tf = tree.treeFactory();
    String tag = tree.label().value();
    if (tree.isPreTerminal()) {
      String word = tree.firstChild().label().value();

      List<Tree> newPreterms = new ArrayList<>();
      for (int i = 0, size = word.length(); i < size; i++) {
        String singleCharLabel = new String(new char[] {word.charAt(i)});
        Tree newLeaf = tf.newLeaf(singleCharLabel);
        String suffix;
        if (useTwoCharTags) {
          if (word.length() == 1 || i == 0) {
            suffix = "_S";
          } else {
            suffix = "_M";
          }
        } else {
          if (word.length() == 1) {
            suffix = "_S";
          } else if (i == 0) {
            suffix = "_B";
          } else if (i == word.length() - 1) {
            suffix = "_E";
          } else {
            suffix = "_M";
          }
        }
        newPreterms.add(tf.newTreeNode(tag + suffix, Collections.<Tree>singletonList(newLeaf)));
      }
      return tf.newTreeNode(tag, newPreterms);
    } else {
      List<Tree> newChildren = new ArrayList<>();
      for (int i = 0; i < tree.children().length; i++) {
        Tree child = tree.children()[i];
        newChildren.add(transformTree(child));
      }
      return tf.newTreeNode(tag, newChildren);
    }
  }
Ejemplo n.º 8
0
  /**
   * Build a parse tree node corresponding to a constituent.
   *
   * @param root Node describing the constituent
   * @param children Collected child nodes, already parsed
   */
  private Tree buildConstituentNode(Node root, List<Tree> children) {
    Element eRoot = (Element) root;
    String label = eRoot.getNodeName().trim();

    if (detailedAnnotations) {
      if (eRoot.getAttribute(ATTR_COORDINATING).equals("yes")) {
        label += "-coord";
      } else if (eRoot.hasAttribute(ATTR_CLAUSE_TYPE)) {
        label += '-' + eRoot.getAttribute(ATTR_CLAUSE_TYPE);
      }
    }

    return treeFactory.newTreeNode(treeNormalizer.normalizeNonterminal(label), children);
  }
Ejemplo n.º 9
0
  /** Build a parse tree node corresponding to the word in the given XML node. */
  private Tree buildWordNode(Node root) {
    Element eRoot = (Element) root;

    String posStr = getPOS(eRoot);
    posStr = treeNormalizer.normalizeNonterminal(posStr);

    String lemma = eRoot.getAttribute(ATTR_LEMMA);
    String word = getWord(eRoot);

    String leafStr = treeNormalizer.normalizeTerminal(word);
    Tree leafNode = treeFactory.newLeaf(leafStr);
    if (leafNode.label() instanceof HasWord) ((HasWord) leafNode.label()).setWord(leafStr);
    if (leafNode.label() instanceof HasLemma && lemma != null)
      ((HasLemma) leafNode.label()).setLemma(lemma);

    List<Tree> kids = new ArrayList<>();
    kids.add(leafNode);

    Tree t = treeFactory.newTreeNode(posStr, kids);
    if (t.label() instanceof HasTag) ((HasTag) t.label()).setTag(posStr);

    return t;
  }
Ejemplo n.º 10
0
  @Override
  public Tree normalizeWholeTree(Tree tree, TreeFactory tf) {
    tree = tree.prune(hebrewEmptyFilter, tf).spliceOut(aOverAFilter, tf);

    // Add start symbol so that the root has only one sub-state. Escape any enclosing brackets.
    // If the "tree" consists entirely of enclosing brackets e.g. ((())) then this method
    // will return null. In this case, readers e.g. PennTreeReader will try to read the next tree.
    while (tree != null
        && (tree.value() == null || tree.value().equals(""))
        && tree.numChildren() <= 1) tree = tree.firstChild();

    if (tree != null && !tree.value().equals(tlp.startSymbol()))
      tree = tf.newTreeNode(tlp.startSymbol(), Collections.singletonList(tree));

    return tree;
  }
Ejemplo n.º 11
0
  public static Tree createStanfordTree(Annotation root, TreeFactory tFact) {
    JCas aJCas;
    try {
      aJCas = root.getCAS().getJCas();
    } catch (CASException e) {
      throw new IllegalStateException("Unable to get JCas from JCas wrapper");
    }

    // define the new (root) node
    Tree rootNode;

    // before we can create a node, we must check if we have any children (we have to know
    // whether to create a node or a leaf - not very dynamic)
    if (root instanceof Constituent && !isLeaf((Constituent) root)) {
      Constituent node = (Constituent) root;
      List<Tree> childNodes = new ArrayList<Tree>();

      // get childNodes from child annotations
      FSArray children = node.getChildren();
      for (int i = 0; i < children.size(); i++) {
        childNodes.add(createStanfordTree(node.getChildren(i), tFact));
      }

      // now create the node with its children
      rootNode = tFact.newTreeNode(node.getConstituentType(), childNodes);

    } else {
      // Handle leaf annotations
      // Leafs are always Token-annotations
      // We also have to insert a Preterminal node with the value of the
      // POS-Annotation on the token
      // because the POS is not directly stored within the treee
      Token wordAnnotation = (Token) root;

      // create leaf-node for the tree
      Tree wordNode = tFact.newLeaf(wordAnnotation.getCoveredText());

      // create information about preceding and trailing whitespaces in the leaf node
      StringBuilder preWhitespaces = new StringBuilder();
      StringBuilder trailWhitespaces = new StringBuilder();

      List<Token> precedingTokenList = selectPreceding(aJCas, Token.class, wordAnnotation, 1);
      List<Token> followingTokenList = selectFollowing(aJCas, Token.class, wordAnnotation, 1);

      if (precedingTokenList.size() > 0) {
        Token precedingToken = precedingTokenList.get(0);
        int precedingWhitespaces = wordAnnotation.getBegin() - precedingToken.getEnd();
        for (int i = 0; i < precedingWhitespaces; i++) {
          preWhitespaces.append(" ");
        }
      }
      if (followingTokenList.size() > 0) {
        Token followingToken = followingTokenList.get(0);
        int trailingWhitespaces = followingToken.getBegin() - wordAnnotation.getEnd();
        for (int i = 0; i < trailingWhitespaces; i++) {
          trailWhitespaces.append(" ");
        }
      }

      // write whitespace information as CoreAnnotation.BeforeAnnotation and
      // CoreAnnotation.AfterAnnotation to the node add annotation to list and write back to
      // node label
      ((CoreLabel) wordNode.label())
          .set(CoreAnnotations.BeforeAnnotation.class, preWhitespaces.toString());
      ((CoreLabel) wordNode.label())
          .set(CoreAnnotations.AfterAnnotation.class, trailWhitespaces.toString());

      // get POS-annotation
      // get the token that is covered by the POS
      List<POS> coveredPos = JCasUtil.selectCovered(aJCas, POS.class, wordAnnotation);
      // the POS should only cover one token
      assert coveredPos.size() == 1;
      POS pos = coveredPos.get(0);

      // create POS-Node in the tree and attach word-node to it
      rootNode = tFact.newTreeNode(pos.getPosValue(), Arrays.asList((new Tree[] {wordNode})));
    }

    return rootNode;
  }