コード例 #1
0
ファイル: ParseToSentence.java プロジェクト: thuvh/dbparser
 /**
  * Reads parse trees either from standard input or a specified file, converting them to sentences
  * and printing those sentences on standard output.
  *
  * <pre>
  * usage: [-v|-help|-usage] [-tags] [filename]
  *         -v|-help|-usage: prints out this message
  *         -tags: indicates to spit out one S-expression per word, of the form
  *                 (word (tag))
  *         filename is the file to be processed (standard input is assumed if
  *                 this argument is "-" or is not present)
  * </pre>
  */
 public static void main(String[] args) {
   InputStream inStream = System.in;
   boolean tags = false;
   String inFile = null;
   for (int i = 0; i < args.length; i++) {
     if (args[i].equals("-help") || args[i].equals("-usage") || args[i].equals("-v")) {
       usage();
       return;
     } else if (args[i].equals("-tags")) tags = true;
     else if (!args[i].equals("-")) inFile = args[i];
   }
   if (inFile != null) {
     try {
       inStream = new FileInputStream(inFile);
     } catch (FileNotFoundException fnfe) {
       System.err.println(fnfe);
       System.exit(-1);
     }
   }
   try {
     SexpTokenizer tok = new SexpTokenizer(inStream, Language.encoding(), bufSize);
     OutputStream os = System.out;
     Writer writer = new BufferedWriter(new OutputStreamWriter(os, Language.encoding()));
     PrintWriter pw = new PrintWriter(writer);
     Sexp curr = null;
     while ((curr = Sexp.read(tok)) != null)
       pw.println(tags ? Util.collectTaggedWords(curr) : Util.collectLeaves(curr));
     pw.flush();
     pw.close();
   } catch (Exception e) {
     System.out.println(e);
   }
 }
コード例 #2
0
ファイル: Treebank.java プロジェクト: divoxx/porser
 /**
  * Returns <code>true</code> if <code>preterminal</code> represents a terminal with one of the
  * following parts of speech: <tt>VB, VBD, VBG, VBN, VBP</tt> or <tt>VBZ</tt>. It is an error to
  * call this method with a <code>Sexp</code> object for which {@link #isPreterminal(Sexp)} returns
  * <code>false</code>.<br>
  *
  * @param preterminal the preterminal to test
  * @return <code>true</code> if <code>preterminal</code> is a verb
  */
 public boolean isVerb(Sexp preterminal) {
   return isVerbTag(preterminal.list().get(0).symbol());
 }
コード例 #3
0
ファイル: Treebank.java プロジェクト: divoxx/porser
 /**
  * Returns <code>true</code> if the specified S-expression represents a preterminal that is the
  * possessive part of speech. This method is intended to be used by implementations of {@link
  * Training#addBaseNPs(Sexp)}.
  */
 public boolean isPossessivePreterminal(Sexp tree) {
   return (isPreterminal(tree) && tree.list().get(0).symbol() == possessivePos);
 }
コード例 #4
0
ファイル: Treebank.java プロジェクト: divoxx/porser
 /**
  * Returns <code>true</code> if the specified S-expression is a preterminal whose part of speech
  * is <code>&quot;,&quot;</code> or <code>&quot;:&quot;</code>.
  */
 public boolean isPuncToRaise(Sexp preterm) {
   return (isPreterminal(preterm) && puncToRaise.contains(preterm.list().first()));
 }
コード例 #5
0
ファイル: Treebank.java プロジェクト: divoxx/porser
 /**
  * Returns <code>true</code> if the specified S-expression represents a preterminal whose terminal
  * element is the null element (<code>&quot;-NONE-&quot;</code>) for the Penn Treebank.
  *
  * @see Training#relabelSubjectlessSentences(Sexp)
  */
 public boolean isNullElementPreterminal(Sexp tree) {
   return (isPreterminal(tree) && tree.list().get(0).symbol() == nullElementPreterminal);
 }
コード例 #6
0
ファイル: Treebank.java プロジェクト: divoxx/porser
 /**
  * Returns <code>true</code> if <code>tree</code> represents a preterminal subtree (part-of-speech
  * tag and word). Specifically, this method returns <code>true</code> if <code>tree</code> is an
  * instance of <code>SexpList</code>, has a length of 2 and has a first list element of type
  * <code>Symbol</code>.
  */
 public final boolean isPreterminal(Sexp tree) {
   return (tree.isList()
       && tree.list().length() == 2
       && tree.list().get(0).isSymbol()
       && tree.list().get(1).isSymbol());
 }