public static Tree processPatternsOnTree(List<Pair<TregexPattern, TsurgeonPattern>> ops, Tree t) { matchedOnTree = false; for (Pair<TregexPattern, TsurgeonPattern> op : ops) { try { if (DEBUG) { System.err.println("Running pattern " + op.first()); } TregexMatcher m = op.first().matcher(t); while (m.find()) { matchedOnTree = true; t = op.second().evaluate(t, m); if (t == null) { return null; } m = op.first().matcher(t); } } catch (NullPointerException npe) { throw new RuntimeException( "Tsurgeon.processPatternsOnTree failed to match label for pattern: " + op.first() + ", " + op.second(), npe); } } return t; }
/** * Remove traces and non-terminal decorations (e.g., "-SUBJ" in "NP-SUBJ") from a Penn * Treebank-style tree. * * @param inputTree */ public void normalizeTree(Tree inputTree) { inputTree.label().setFromString("ROOT"); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; TregexMatcher matcher; tregexOpStr = "/\\-NONE\\-/=emptynode"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); ps.add(Tsurgeon.parseOperation("prune emptynode")); matchPattern = TregexPatternFactory.getPattern(tregexOpStr); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, inputTree); Label nonterminalLabel; tregexOpStr = "/.+\\-.+/=nonterminal < __"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); while (matcher.find()) { nonterminalLabel = matcher.getNode("nonterminal"); if (nonterminalLabel == null) continue; nonterminalLabel.setFromString(tlp.basicCategory(nonterminalLabel.value())); } }
/** * Tries to match a pattern against a tree. If it succeeds, apply the surgical operations * contained in a {@link TsurgeonPattern}. * * @param matchPattern A {@link TregexPattern} to be matched against a {@link Tree}. * @param p A {@link TsurgeonPattern} to apply. * @param t the {@link Tree} to match against and perform surgery on. * @return t, which has been surgically modified. */ public static Tree processPattern(TregexPattern matchPattern, TsurgeonPattern p, Tree t) { TregexMatcher m = matchPattern.matcher(t); while (m.find()) { t = p.evaluate(t, m); if (t == null) { break; } m = matchPattern.matcher(t); } return t; }