private static Sexp intersection(Sexp lhs, Sexp rhs) { Sexp intersectionSexp = new Sexp(INTERSECTION).add(lhs); if (rhs.first().equals(INTERSECTION)) { intersectionSexp.addAll(ArrayUtils.remove(rhs.getValues().toArray(), 0)); } else { intersectionSexp.add(rhs); } return intersectionSexp; }
private static Sexp quantifyRhs(Sexp sexp, Quantifier quantifier) { Sexp quantifierSexp = null; if (sexp.first() != null && sexp.first().equals(UNION)) { quantifierSexp = sexp.add(quantify((Sexp) sexp.removeLast(), quantifier)); } else { quantifierSexp = quantify(sexp, quantifier); } return quantifierSexp; }
private static Sexp union(Sexp lhs, Sexp... rhs) { if (lhs == null) { return union(rhs[0], (Sexp[]) ArrayUtils.remove(rhs, 0)); } else if (ArrayUtils.isEmpty(rhs)) { return lhs; } else if (lhs.first() != null && lhs.first().equals(UNION)) { for (Sexp sexp : rhs) { lhs.add(sexp); } return lhs; } return new Sexp(UNION).add(lhs).addAll(rhs); }
/** * Reads parse trees either from standard input or a specified file, converting them to sentences * and printing those sentences on standard output. * * <pre> * usage: [-v|-help|-usage] [-tags] [filename] * -v|-help|-usage: prints out this message * -tags: indicates to spit out one S-expression per word, of the form * (word (tag)) * filename is the file to be processed (standard input is assumed if * this argument is "-" or is not present) * </pre> */ public static void main(String[] args) { InputStream inStream = System.in; boolean tags = false; String inFile = null; for (int i = 0; i < args.length; i++) { if (args[i].equals("-help") || args[i].equals("-usage") || args[i].equals("-v")) { usage(); return; } else if (args[i].equals("-tags")) tags = true; else if (!args[i].equals("-")) inFile = args[i]; } if (inFile != null) { try { inStream = new FileInputStream(inFile); } catch (FileNotFoundException fnfe) { System.err.println(fnfe); System.exit(-1); } } try { SexpTokenizer tok = new SexpTokenizer(inStream, Language.encoding(), bufSize); OutputStream os = System.out; Writer writer = new BufferedWriter(new OutputStreamWriter(os, Language.encoding())); PrintWriter pw = new PrintWriter(writer); Sexp curr = null; while ((curr = Sexp.read(tok)) != null) pw.println(tags ? Util.collectTaggedWords(curr) : Util.collectLeaves(curr)); pw.flush(); pw.close(); } catch (Exception e) { System.out.println(e); } }
private static Sexp parseCharacterClass(String pattern, Sexp sexp) { Matcher matcher = Pattern.compile("(.*)(.\\-.)(.*)").matcher(pattern); if (matcher.matches()) { String[] range = matcher.group(2).split("-"); parseCharacterClass(matcher.group(1), sexp); sexp.add(new Sexp(RANGE).add(literal(range[0])).add(literal(range[1]))); parseCharacterClass(matcher.group(3), sexp); } else if (!pattern.isEmpty()) { for (String token : pattern.split("")) { sexp.add(literal(token)); } } return sexp; }
protected void compile() { Sexp curClause; goal = new NilSexp(); prog = new NilSexp(); prog2 = new NilSexp(); String codice = code.getText(); try { ProParser p = new ProParser(codice, intmsg); for (; ; ) { curClause = p.getClause(); // seleziono una singola clausola intmsg.append("compiled: " + curClause + "\n"); if (!(curClause instanceof eofToken)) { prog = Sexp.append(prog, Sexp.list1(curClause)); } if (p.atEOF()) { break; } } String codice2 = code.getText(); ProParser p2 = new ProParser(codice2, intmsg); for (; ; ) { curClause = p2.getClause(); // seleziono una singola clausola // intmsg.append( "compiled: " + curClause + "\n" ); if (!(curClause instanceof eofToken)) { prog2 = Sexp.append(prog2, Sexp.list1(curClause)); } if (p2.atEOF()) { break; } } showProg(prog, outp); // showProg( prog2, outp ); } catch (Exception e) { outp.append("error" + e + " \n"); } }
/** * Returns <code>true</code> if <code>preterminal</code> represents a terminal with one of the * following parts of speech: <tt>VB, VBD, VBG, VBN, VBP</tt> or <tt>VBZ</tt>. It is an error to * call this method with a <code>Sexp</code> object for which {@link #isPreterminal(Sexp)} returns * <code>false</code>.<br> * * @param preterminal the preterminal to test * @return <code>true</code> if <code>preterminal</code> is a verb */ public boolean isVerb(Sexp preterminal) { return isVerbTag(preterminal.list().get(0).symbol()); }
/** * Returns <code>true</code> if the specified S-expression represents a preterminal that is the * possessive part of speech. This method is intended to be used by implementations of {@link * Training#addBaseNPs(Sexp)}. */ public boolean isPossessivePreterminal(Sexp tree) { return (isPreterminal(tree) && tree.list().get(0).symbol() == possessivePos); }
/** * Returns <code>true</code> if the specified S-expression is a preterminal whose part of speech * is <code>","</code> or <code>":"</code>. */ public boolean isPuncToRaise(Sexp preterm) { return (isPreterminal(preterm) && puncToRaise.contains(preterm.list().first())); }
/** * Returns <code>true</code> if the specified S-expression represents a preterminal whose terminal * element is the null element (<code>"-NONE-"</code>) for the Penn Treebank. * * @see Training#relabelSubjectlessSentences(Sexp) */ public boolean isNullElementPreterminal(Sexp tree) { return (isPreterminal(tree) && tree.list().get(0).symbol() == nullElementPreterminal); }
/** * Returns <code>true</code> if <code>tree</code> represents a preterminal subtree (part-of-speech * tag and word). Specifically, this method returns <code>true</code> if <code>tree</code> is an * instance of <code>SexpList</code>, has a length of 2 and has a first list element of type * <code>Symbol</code>. */ public final boolean isPreterminal(Sexp tree) { return (tree.isList() && tree.list().length() == 2 && tree.list().get(0).isSymbol() && tree.list().get(1).isSymbol()); }
protected void showProg(Sexp prog, JTextArea intmsg) { if (!prog.isNull()) { intmsg.append(prog.car() + "\n"); showProg(prog.cdr(), intmsg); } }
protected void run() { if (!goal.isNull()) eval(prog); else compileGoal(true); }