/** * Reads parse trees either from standard input or a specified file, converting them to sentences * and printing those sentences on standard output. * * <pre> * usage: [-v|-help|-usage] [-tags] [filename] * -v|-help|-usage: prints out this message * -tags: indicates to spit out one S-expression per word, of the form * (word (tag)) * filename is the file to be processed (standard input is assumed if * this argument is "-" or is not present) * </pre> */ public static void main(String[] args) { InputStream inStream = System.in; boolean tags = false; String inFile = null; for (int i = 0; i < args.length; i++) { if (args[i].equals("-help") || args[i].equals("-usage") || args[i].equals("-v")) { usage(); return; } else if (args[i].equals("-tags")) tags = true; else if (!args[i].equals("-")) inFile = args[i]; } if (inFile != null) { try { inStream = new FileInputStream(inFile); } catch (FileNotFoundException fnfe) { System.err.println(fnfe); System.exit(-1); } } try { SexpTokenizer tok = new SexpTokenizer(inStream, Language.encoding(), bufSize); OutputStream os = System.out; Writer writer = new BufferedWriter(new OutputStreamWriter(os, Language.encoding())); PrintWriter pw = new PrintWriter(writer); Sexp curr = null; while ((curr = Sexp.read(tok)) != null) pw.println(tags ? Util.collectTaggedWords(curr) : Util.collectLeaves(curr)); pw.flush(); pw.close(); } catch (Exception e) { System.out.println(e); } }
/** * Returns <code>true</code> if <code>preterminal</code> represents a terminal with one of the * following parts of speech: <tt>VB, VBD, VBG, VBN, VBP</tt> or <tt>VBZ</tt>. It is an error to * call this method with a <code>Sexp</code> object for which {@link #isPreterminal(Sexp)} returns * <code>false</code>.<br> * * @param preterminal the preterminal to test * @return <code>true</code> if <code>preterminal</code> is a verb */ public boolean isVerb(Sexp preterminal) { return isVerbTag(preterminal.list().get(0).symbol()); }
/** * Returns <code>true</code> if the specified S-expression represents a preterminal that is the * possessive part of speech. This method is intended to be used by implementations of {@link * Training#addBaseNPs(Sexp)}. */ public boolean isPossessivePreterminal(Sexp tree) { return (isPreterminal(tree) && tree.list().get(0).symbol() == possessivePos); }
/** * Returns <code>true</code> if the specified S-expression is a preterminal whose part of speech * is <code>","</code> or <code>":"</code>. */ public boolean isPuncToRaise(Sexp preterm) { return (isPreterminal(preterm) && puncToRaise.contains(preterm.list().first())); }
/** * Returns <code>true</code> if the specified S-expression represents a preterminal whose terminal * element is the null element (<code>"-NONE-"</code>) for the Penn Treebank. * * @see Training#relabelSubjectlessSentences(Sexp) */ public boolean isNullElementPreterminal(Sexp tree) { return (isPreterminal(tree) && tree.list().get(0).symbol() == nullElementPreterminal); }
/** * Returns <code>true</code> if <code>tree</code> represents a preterminal subtree (part-of-speech * tag and word). Specifically, this method returns <code>true</code> if <code>tree</code> is an * instance of <code>SexpList</code>, has a length of 2 and has a first list element of type * <code>Symbol</code>. */ public final boolean isPreterminal(Sexp tree) { return (tree.isList() && tree.list().length() == 2 && tree.list().get(0).isSymbol() && tree.list().get(1).isSymbol()); }