/** * Returns the positional index of the right edge of a tree <i>t</i> within a given root, as * defined by the size of the yield of all material preceding <i>t</i> plus all the material * contained in <i>t</i>. */ public static int rightEdge(Tree t, Tree root) { MutableInteger i = new MutableInteger(root.yield().size()); if (rightEdge(t, root, i)) { return i.intValue(); } else { throw new RuntimeException("Tree is not a descendent of root."); // return root.yield().size() + 1; } }
static boolean rightEdge(Tree t, Tree t1, MutableInteger i) { if (t == t1) { return true; } else if (t1.isLeaf()) { int j = t1.yield().size(); // so that empties don't add size i.set(i.intValue() - j); return false; } else { Tree[] kids = t1.children(); for (int j = kids.length - 1; j >= 0; j--) { if (rightEdge(t, kids[j], i)) { return true; } } return false; } }
static Tree getPreTerminal(Tree tree, MutableInteger i, int n) { if (i.intValue() == n) { if (tree.isPreTerminal()) { return tree; } else { return getPreTerminal(tree.children()[0], i, n); } } else { if (tree.isPreTerminal()) { i.set(i.intValue() + tree.yield().size()); return null; } else { Tree[] kids = tree.children(); for (int j = 0; j < kids.length; j++) { Tree result = getPreTerminal(kids[j], i, n); if (result != null) { return result; } } return null; } } }
/** * for testing -- CURRENTLY BROKEN!!! * * @param args input dir and output filename * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length != 3) { throw new RuntimeException("args: treebankPath trainNums testNums"); } ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams(); ctpp.charTags = true; // TODO: these options are getting clobbered by reading in the // parser object (unless it's a text file parser?) Options op = new Options(ctpp); op.doDep = false; op.testOptions.maxLength = 90; LexicalizedParser lp; try { FileFilter trainFilt = new NumberRangesFileFilter(args[1], false); lp = LexicalizedParser.trainFromTreebank(args[0], trainFilt, op); try { String filename = "chineseCharTagPCFG.ser.gz"; System.err.println("Writing parser in serialized format to file " + filename + ' '); System.err.flush(); ObjectOutputStream out = IOUtils.writeStreamFromString(filename); out.writeObject(lp); out.close(); System.err.println("done."); } catch (IOException ioe) { ioe.printStackTrace(); } } catch (IllegalArgumentException e) { lp = LexicalizedParser.loadModel(args[1], op); } FileFilter testFilt = new NumberRangesFileFilter(args[2], false); MemoryTreebank testTreebank = ctpp.memoryTreebank(); testTreebank.loadPath(new File(args[0]), testFilt); PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream("out.chi"), "GB18030"), true); WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser(); WordCatEqualityChecker eqcheck = new WordCatEqualityChecker(); EquivalenceClassEval eval = new EquivalenceClassEval(eqclass, eqcheck); // System.out.println("Preterminals:" + preterminals); System.out.println("Testing..."); for (Tree gold : testTreebank) { Tree tree; try { tree = lp.parseTree(gold.yieldHasWord()); if (tree == null) { System.out.println("Failed to parse " + gold.yieldHasWord()); continue; } } catch (Exception e) { e.printStackTrace(); continue; } gold = gold.firstChild(); pw.println(Sentence.listToString(gold.preTerminalYield())); pw.println(Sentence.listToString(gold.yield())); gold.pennPrint(pw); pw.println(tree.preTerminalYield()); pw.println(tree.yield()); tree.pennPrint(pw); // Collection allBrackets = WordCatConstituent.allBrackets(tree); // Collection goldBrackets = WordCatConstituent.allBrackets(gold); // eval.eval(allBrackets, goldBrackets); eval.displayLast(); } System.out.println(); System.out.println(); eval.display(); }