/** * Remove traces and non-terminal decorations (e.g., "-SUBJ" in "NP-SUBJ") from a Penn * Treebank-style tree. * * @param inputTree */ public void normalizeTree(Tree inputTree) { inputTree.label().setFromString("ROOT"); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; TregexMatcher matcher; tregexOpStr = "/\\-NONE\\-/=emptynode"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); ps.add(Tsurgeon.parseOperation("prune emptynode")); matchPattern = TregexPatternFactory.getPattern(tregexOpStr); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, inputTree); Label nonterminalLabel; tregexOpStr = "/.+\\-.+/=nonterminal < __"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); while (matcher.find()) { nonterminalLabel = matcher.getNode("nonterminal"); if (nonterminalLabel == null) continue; nonterminalLabel.setFromString(tlp.basicCategory(nonterminalLabel.value())); } }
/** * Tries to match a pattern against a tree. If it succeeds, apply the surgical operations * contained in a {@link TsurgeonPattern}. * * @param matchPattern A {@link TregexPattern} to be matched against a {@link Tree}. * @param p A {@link TsurgeonPattern} to apply. * @param t the {@link Tree} to match against and perform surgery on. * @return t, which has been surgically modified. */ public static Tree processPattern(TregexPattern matchPattern, TsurgeonPattern p, Tree t) { TregexMatcher m = matchPattern.matcher(t); while (m.find()) { t = p.evaluate(t, m); if (t == null) { break; } m = matchPattern.matcher(t); } return t; }
public static boolean filter(PhraseInfo phrase) { if (phrase == null || phrase.getTree() == null) return false; Tree phraseTree = phrase.getTree(); // __ < ( NP < (PRP !< it|them) ) | < ( NP < ( NP < (PRP !< it|them) ) ) String invalidPronounPattern = " ( NP < (PRP=prp !< " + Utils.wordsConjuction(Rules.VALID_PRONOUNS) + ") ) "; String filterPattern = "__ < " + invalidPronounPattern + " | < ( NP <" + invalidPronounPattern + ")"; TregexPattern tregexPattern = TregexPattern.compile(filterPattern); TregexMatcher matcher = tregexPattern.matcher(phraseTree); if (matcher.matches()) { Proof proof = new Proof(ProofType.FAIL_PERSONAL_PRONOUN); Tree evdTree = matcher.getNode("prp"); proof.setEvidenceTree(evdTree); phrase.addProof(proof); return false; } return true; }
// todo: add an option to only print each tree once, regardless. Most useful in conjunction // with -w public void visitTree(Tree t) { treeNumber++; if (printTree) { pw.print(treeNumber + ":"); pw.println("Next tree read:"); tp.printTree(t, pw); } TregexMatcher match = p.matcher(t); if (printNonMatchingTrees) { if (match.find()) numMatches++; else tp.printTree(t, pw); return; } Tree lastMatchingRootNode = null; while (match.find()) { if (oneMatchPerRootNode) { if (lastMatchingRootNode == match.getMatch()) continue; else lastMatchingRootNode = match.getMatch(); } numMatches++; if (printFilename && treebank instanceof DiskTreebank) { DiskTreebank dtb = (DiskTreebank) treebank; pw.print("# "); pw.println(dtb.getCurrentFilename()); } if (printSubtreeCode) { pw.print(treeNumber); pw.print(':'); pw.println(match.getMatch().nodeNumber(t)); } if (printMatches) { if (reportTreeNumbers) { pw.print(treeNumber); pw.print(": "); } if (printTree) { pw.println("Found a full match:"); } if (printWholeTree) { tp.printTree(t, pw); } else if (handles != null) { if (printTree) { pw.println("Here's the node you were interested in:"); } for (String handle : handles) { Tree labeledNode = match.getNode(handle); if (labeledNode == null) { System.err.println( "Error!! There is no matched node \"" + handle + "\"! Did you specify such a label in the pattern?"); } else { tp.printTree(labeledNode, pw); } } } else { tp.printTree(match.getMatch(), pw); } // pw.println(); // TreePrint already puts a blank line in } // end if (printMatches) } // end while match.find() } // end visitTree