public static Tree processPatternsOnTree(List<Pair<TregexPattern, TsurgeonPattern>> ops, Tree t) { matchedOnTree = false; for (Pair<TregexPattern, TsurgeonPattern> op : ops) { try { if (DEBUG) { System.err.println("Running pattern " + op.first()); } TregexMatcher m = op.first().matcher(t); while (m.find()) { matchedOnTree = true; t = op.second().evaluate(t, m); if (t == null) { return null; } m = op.first().matcher(t); } } catch (NullPointerException npe) { throw new RuntimeException( "Tsurgeon.processPatternsOnTree failed to match label for pattern: " + op.first() + ", " + op.second(), npe); } } return t; }
/** * Remove traces and non-terminal decorations (e.g., "-SUBJ" in "NP-SUBJ") from a Penn * Treebank-style tree. * * @param inputTree */ public void normalizeTree(Tree inputTree) { inputTree.label().setFromString("ROOT"); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; TregexMatcher matcher; tregexOpStr = "/\\-NONE\\-/=emptynode"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); ps.add(Tsurgeon.parseOperation("prune emptynode")); matchPattern = TregexPatternFactory.getPattern(tregexOpStr); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, inputTree); Label nonterminalLabel; tregexOpStr = "/.+\\-.+/=nonterminal < __"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); while (matcher.find()) { nonterminalLabel = matcher.getNode("nonterminal"); if (nonterminalLabel == null) continue; nonterminalLabel.setFromString(tlp.basicCategory(nonterminalLabel.value())); } }
/** * Tries to match a pattern against a tree. If it succeeds, apply the surgical operations * contained in a {@link TsurgeonPattern}. * * @param matchPattern A {@link TregexPattern} to be matched against a {@link Tree}. * @param p A {@link TsurgeonPattern} to apply. * @param t the {@link Tree} to match against and perform surgery on. * @return t, which has been surgically modified. */ public static Tree processPattern(TregexPattern matchPattern, TsurgeonPattern p, Tree t) { TregexMatcher m = matchPattern.matcher(t); while (m.find()) { t = p.evaluate(t, m); if (t == null) { break; } m = matchPattern.matcher(t); } return t; }
public static boolean filter(PhraseInfo phrase) { if (phrase == null || phrase.getTree() == null) return false; Tree phraseTree = phrase.getTree(); // __ < ( NP < (PRP !< it|them) ) | < ( NP < ( NP < (PRP !< it|them) ) ) String invalidPronounPattern = " ( NP < (PRP=prp !< " + Utils.wordsConjuction(Rules.VALID_PRONOUNS) + ") ) "; String filterPattern = "__ < " + invalidPronounPattern + " | < ( NP <" + invalidPronounPattern + ")"; TregexPattern tregexPattern = TregexPattern.compile(filterPattern); TregexMatcher matcher = tregexPattern.matcher(phraseTree); if (matcher.matches()) { Proof proof = new Proof(ProofType.FAIL_PERSONAL_PRONOUN); Tree evdTree = matcher.getNode("prp"); proof.setEvidenceTree(evdTree); phrase.addProof(proof); return false; } return true; }
public String apply(TregexMatcher m) { String punc = m.getNode(key).value(); String punctClass = PunctEquivalenceClasser.getPunctClass(punc); return punctClass.equals("") ? "" : annotationMark + punctClass; }