/**
   * Remove traces and non-terminal decorations (e.g., "-SUBJ" in "NP-SUBJ") from a Penn
   * Treebank-style tree.
   *
   * @param inputTree
   */
  public void normalizeTree(Tree inputTree) {
    inputTree.label().setFromString("ROOT");

    List<Pair<TregexPattern, TsurgeonPattern>> ops =
        new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    TregexMatcher matcher;

    tregexOpStr = "/\\-NONE\\-/=emptynode";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    ps.add(Tsurgeon.parseOperation("prune emptynode"));
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    p = Tsurgeon.collectOperations(ps);
    ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
    Tsurgeon.processPatternsOnTree(ops, inputTree);

    Label nonterminalLabel;

    tregexOpStr = "/.+\\-.+/=nonterminal < __";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    while (matcher.find()) {
      nonterminalLabel = matcher.getNode("nonterminal");
      if (nonterminalLabel == null) continue;
      nonterminalLabel.setFromString(tlp.basicCategory(nonterminalLabel.value()));
    }
  }
예제 #2
0
 /**
  * Tries to match a pattern against a tree. If it succeeds, apply the surgical operations
  * contained in a {@link TsurgeonPattern}.
  *
  * @param matchPattern A {@link TregexPattern} to be matched against a {@link Tree}.
  * @param p A {@link TsurgeonPattern} to apply.
  * @param t the {@link Tree} to match against and perform surgery on.
  * @return t, which has been surgically modified.
  */
 public static Tree processPattern(TregexPattern matchPattern, TsurgeonPattern p, Tree t) {
   TregexMatcher m = matchPattern.matcher(t);
   while (m.find()) {
     t = p.evaluate(t, m);
     if (t == null) {
       break;
     }
     m = matchPattern.matcher(t);
   }
   return t;
 }
예제 #3
0
  public static boolean filter(PhraseInfo phrase) {
    if (phrase == null || phrase.getTree() == null) return false;
    Tree phraseTree = phrase.getTree();

    // __ < ( NP < (PRP !< it|them) ) | < ( NP < ( NP < (PRP !< it|them) ) )
    String invalidPronounPattern =
        " ( NP < (PRP=prp !< " + Utils.wordsConjuction(Rules.VALID_PRONOUNS) + ") ) ";
    String filterPattern =
        "__ < " + invalidPronounPattern + " | < ( NP <" + invalidPronounPattern + ")";

    TregexPattern tregexPattern = TregexPattern.compile(filterPattern);
    TregexMatcher matcher = tregexPattern.matcher(phraseTree);

    if (matcher.matches()) {
      Proof proof = new Proof(ProofType.FAIL_PERSONAL_PRONOUN);
      Tree evdTree = matcher.getNode("prp");
      proof.setEvidenceTree(evdTree);
      phrase.addProof(proof);
      return false;
    }

    return true;
  }
예제 #4
0
 // todo: add an option to only print each tree once, regardless.  Most useful in conjunction
 // with -w
 public void visitTree(Tree t) {
   treeNumber++;
   if (printTree) {
     pw.print(treeNumber + ":");
     pw.println("Next tree read:");
     tp.printTree(t, pw);
   }
   TregexMatcher match = p.matcher(t);
   if (printNonMatchingTrees) {
     if (match.find()) numMatches++;
     else tp.printTree(t, pw);
     return;
   }
   Tree lastMatchingRootNode = null;
   while (match.find()) {
     if (oneMatchPerRootNode) {
       if (lastMatchingRootNode == match.getMatch()) continue;
       else lastMatchingRootNode = match.getMatch();
     }
     numMatches++;
     if (printFilename && treebank instanceof DiskTreebank) {
       DiskTreebank dtb = (DiskTreebank) treebank;
       pw.print("# ");
       pw.println(dtb.getCurrentFilename());
     }
     if (printSubtreeCode) {
       pw.print(treeNumber);
       pw.print(':');
       pw.println(match.getMatch().nodeNumber(t));
     }
     if (printMatches) {
       if (reportTreeNumbers) {
         pw.print(treeNumber);
         pw.print(": ");
       }
       if (printTree) {
         pw.println("Found a full match:");
       }
       if (printWholeTree) {
         tp.printTree(t, pw);
       } else if (handles != null) {
         if (printTree) {
           pw.println("Here's the node you were interested in:");
         }
         for (String handle : handles) {
           Tree labeledNode = match.getNode(handle);
           if (labeledNode == null) {
             System.err.println(
                 "Error!!  There is no matched node \""
                     + handle
                     + "\"!  Did you specify such a label in the pattern?");
           } else {
             tp.printTree(labeledNode, pw);
           }
         }
       } else {
         tp.printTree(match.getMatch(), pw);
       }
       // pw.println();  // TreePrint already puts a blank line in
     } // end if (printMatches)
   } // end while match.find()
 } // end visitTree