/**
   * Remove traces and non-terminal decorations (e.g., "-SUBJ" in "NP-SUBJ") from a Penn
   * Treebank-style tree.
   *
   * @param inputTree
   */
  public void normalizeTree(Tree inputTree) {
    inputTree.label().setFromString("ROOT");

    List<Pair<TregexPattern, TsurgeonPattern>> ops =
        new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    TregexMatcher matcher;

    tregexOpStr = "/\\-NONE\\-/=emptynode";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    ps.add(Tsurgeon.parseOperation("prune emptynode"));
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    p = Tsurgeon.collectOperations(ps);
    ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
    Tsurgeon.processPatternsOnTree(ops, inputTree);

    Label nonterminalLabel;

    tregexOpStr = "/.+\\-.+/=nonterminal < __";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    while (matcher.find()) {
      nonterminalLabel = matcher.getNode("nonterminal");
      if (nonterminalLabel == null) continue;
      nonterminalLabel.setFromString(tlp.basicCategory(nonterminalLabel.value()));
    }
  }
Пример #2
0
  public static boolean filter(PhraseInfo phrase) {
    if (phrase == null || phrase.getTree() == null) return false;
    Tree phraseTree = phrase.getTree();

    // __ < ( NP < (PRP !< it|them) ) | < ( NP < ( NP < (PRP !< it|them) ) )
    String invalidPronounPattern =
        " ( NP < (PRP=prp !< " + Utils.wordsConjuction(Rules.VALID_PRONOUNS) + ") ) ";
    String filterPattern =
        "__ < " + invalidPronounPattern + " | < ( NP <" + invalidPronounPattern + ")";

    TregexPattern tregexPattern = TregexPattern.compile(filterPattern);
    TregexMatcher matcher = tregexPattern.matcher(phraseTree);

    if (matcher.matches()) {
      Proof proof = new Proof(ProofType.FAIL_PERSONAL_PRONOUN);
      Tree evdTree = matcher.getNode("prp");
      proof.setEvidenceTree(evdTree);
      phrase.addProof(proof);
      return false;
    }

    return true;
  }
    public String apply(TregexMatcher m) {
      String punc = m.getNode(key).value();
      String punctClass = PunctEquivalenceClasser.getPunctClass(punc);

      return punctClass.equals("") ? "" : annotationMark + punctClass;
    }