// Punish chunks whose length is other than 3. private static void discriminate(ChunkRanker ranker) { ArrayList<LexChunk> chunks = ranker.getChunks(); for (LexChunk ch : chunks) { int sz = ch.size(); double weight = sz - 3; if (weight < 0) weight = -weight; weight = 1.0 - 0.2 * weight; // twiddle the confidence of the chunk TruthValue tv = ch.getTruthValue(); SimpleTruthValue stv = (SimpleTruthValue) tv; double confidence = stv.getConfidence(); confidence *= weight; stv.setConfidence(confidence); } }
/** Main entry point */ public static void main(String[] args) { String callString = "RelationExtractor" + " [-a (perform anaphora resolution)]" + " [--expand-preps (show expanded prepositions)]" + " [-h (show this help)]" + " [-i (show output for generation)]" + " [-l (show Link Grammar parse diagram)]" + " [--lang language (default en for English)]" + " [-m (show parse metadata)]" + " [--maxParseSeconds N]" + " [-n max number of parses to display]" + " [-o (show opencog scheme output)]" + " [--or (show opencog rule-based scheme output)]" + " [--pa (show phrase-based lexical chunks)]" + " [--pb (show pattern-based lexical chunks)]" + " [--pc (show relational lexical chunks)]" + " [--penn (generate Penn treebank-style POS tags)]" + " [--prolog (show prolog output)]" + " [-q (do NOT show relations)]" + " [-r (show raw output)]" + " [-s Sentence (in quotes)]" + " [--stanford (generate stanford-compatible output)]" + " [-t (show parse tree)]" + " [-v (verbose, full graph output)]" + " [--html filename (output HTML to file)]"; HashSet<String> flags = new HashSet<String>(); flags.add("-a"); flags.add("--expand-preps"); flags.add("-h"); flags.add("-i"); flags.add("-l"); flags.add("-m"); flags.add("-o"); flags.add("--or"); flags.add("--pa"); flags.add("--pb"); flags.add("--pc"); flags.add("--penn"); flags.add("--prolog"); flags.add("-q"); flags.add("-r"); flags.add("--stanford"); flags.add("-t"); flags.add("-v"); HashSet<String> opts = new HashSet<String>(); opts.add("-n"); opts.add("-s"); opts.add("--html"); opts.add("--lang"); opts.add("--maxParseSeconds"); Map<String, String> commandMap = CommandLineArgParser.parse(args, opts, flags); // Things that can be set via command line flags; cache till needed. String sentence = null; String language = "en"; int maxParses = 1; int maxParseSeconds = 6; PrintWriter html = null; // Check for optional command line arguments. try { String opt = commandMap.get("-s"); if (opt != null) sentence = opt; opt = commandMap.get("-n"); if (opt != null) maxParses = Integer.parseInt(opt); opt = commandMap.get("--html"); if (opt != null) html = new PrintWriter(new FileWriter(opt)); opt = commandMap.get("--lang"); if (opt != null) language = opt; opt = commandMap.get("--maxParseSeconds"); if (opt != null) maxParseSeconds = Integer.parseInt(opt); } catch (Exception e) { System.err.println("Unrecognized parameter."); System.err.println(callString); e.printStackTrace(); return; } if (commandMap.get("-h") != null) { System.err.println(callString); return; } // If generating OpenCog Scheme, delimit output. if (commandMap.get("-o") != null) System.out.print("scm\n"); if (html != null) html.println("<html>"); RelationExtractor re = new RelationExtractor(); // careful: set language *before* doing other things, to avoid call to init() re.setLanguage(language); re.setAllowSkippedWords(true); re.setMaxParses(maxParses); re.setMaxParseSeconds(maxParseSeconds); System.out.println("; Version: " + re.getVersion()); // Don't run anaphora if -o is set, this will be done in a // distinct stage that wipes out the first run. if ((commandMap.get("-a") != null) && (commandMap.get("-o") == null)) { re.do_anaphora_resolution = true; re.do_tree_markup = true; } if ((commandMap.get("-t") != null) || (commandMap.get("--pa") != null) || (commandMap.get("--pb") != null) || (commandMap.get("--pc") != null)) { re.do_tree_markup = true; } if (commandMap.get("--stanford") != null) { re.do_stanford = true; } if (commandMap.get("--penn") != null) { re.do_penn_tagging = true; } if (commandMap.get("--expand-preps") != null) { re.do_expand_preps = true; } // If sentence is not passed at command line, read from standard input: BufferedReader stdin = new BufferedReader(new InputStreamReader(System.in)); DocSplitter ds = DocSplitterFactory.create(); // QuotesParens is currently broken, it fails to handle possesives. // QuotesParensSentenceDetector ds = QuotesParensSentenceDetector.create(); OpenCogScheme opencog = null; if (commandMap.get("-o") != null) { opencog = new OpenCogScheme(); if (commandMap.get("-l") != null) { opencog.setShowLinkage(true); } opencog.setShowRelex(true); if (commandMap.get("-q") != null) { opencog.setShowRelex(false); } if (commandMap.get("-a") != null) { opencog.setShowAnaphora(true); } } boolean do_logic_output = false; LogicView logicView = new LogicView(); if (commandMap.get("--or") != null) { do_logic_output = true; logicView.loadRules(); } int sentence_count = 0; boolean more_input = true; while (more_input) { // If no sentence specified on the command line // (with the "-s" flag), then read it from stdin. while (sentence == null) { try { sentence = stdin.readLine(); if ((sentence == null) || "END.".equals(sentence)) { more_input = false; sentence = null; break; } } catch (IOException e) { System.err.println("Error reading sentence from the standard input!"); } // Buffer up input text, and wait for a whole, // complete sentence before continuing. ds.addText(sentence + " "); sentence = ds.getNextSentence(); } while (sentence != null) { System.out.println("; SENTENCE: [" + sentence + "]"); Sentence sntc = re.processSentence(sentence); // Crazy error condition ... the parser is broken somehow ... if (null == sntc) { sentence = ds.getNextSentence(); break; } re.doco.addSentence(sntc); if (html != null) html.printf( "<div id='relex-%d'><table><tr><td>%d: %s</td></tr><tr>\n", sentence_count, sentence_count, escape(sentence)); sentence_count++; re.stats.bin(sntc); int np = sntc.getParses().size(); if (np > maxParses) np = maxParses; // chunk ranking stuff ChunkRanker ranker = new ChunkRanker(); double parse_weight = 1.0 / ((double) np); double votes = 1.0e-20; if (commandMap.get("--pa") != null) votes += 1.0; if (commandMap.get("--pb") != null) votes += 2.0; if (commandMap.get("--pc") != null) votes += 1.0; votes = 1.0 / votes; votes *= parse_weight; // Print output int numParses = 0; for (ParsedSentence parse : sntc.getParses()) { if (commandMap.get("-o") == null) { System.out.println(sentence); System.out.println("\n====\n"); System.out.println("Parse " + (numParses + 1) + " of " + sntc.getParses().size()); } if (commandMap.get("-i") != null) { System.out.println("\n=====\n"); System.out.println(NLGInputView.printRelations(parse)); System.out.println("\n=====\n"); } if (commandMap.get("-r") != null) { System.out.println("\n====\n"); System.out.println("Dependency graph:\n"); System.out.println(RawView.printZHeads(parse.getLeft())); System.out.println("\n======\n"); } if (commandMap.get("-t") != null) { System.out.println("\n" + parse.getPhraseString()); if (html != null) html.printf("<td colspan='2'>%s</td></tr><tr>", escape(parse.getPhraseString())); } // Don't print the link string if xml output is enabled. // XML parsers choke on it. if ((commandMap.get("-l") != null) && (commandMap.get("-o") == null)) System.out.println("\n" + parse.getLinkString()); if (commandMap.get("-m") != null) { System.out.println(parse.getMetaData().toString() + "\n"); } if (commandMap.get("-o") == null) { // Print simple parse ranking Double confidence = parse.getTruthValue().getConfidence(); String prt_cnfd = confidence.toString(); prt_cnfd = prt_cnfd.substring(0, Math.min(6, prt_cnfd.length())); System.out.println("Parse confidence: " + prt_cnfd); System.out.println( "cost vector = (UNUSED=" + parse.getNumSkippedWords() + " DIS=" + parse.getDisjunctCost() + " LEN=" + parse.getLinkCost() + ")"); } // Verbose graph. if (commandMap.get("-v") != null) // System.out.println("\n" + parse.fullParseString()); System.out.println("\n" + parse.getLeft().toString(LinkView.getFilter())); if ((commandMap.get("-q") == null) && (commandMap.get("-o") == null) && re.do_apply_algs) { System.out.println("\n======\n"); System.out.println("Dependency relations:\n"); System.out.println(SimpleView.printRelations(parse)); System.out.println("\n======\n"); if (html != null) html.printf( "<td valign='top'><pre>%s</pre></td>\n", escape(SimpleView.printRelations(parse))); } if (do_logic_output) { System.out.println("\n======\n"); System.out.println("Relex2Logic output:"); System.out.println(logicView.printRelationsNew(parse)); System.out.println("\n======\n"); } if (commandMap.get("--pa") != null) { System.out.println("Phrase tree-based lexical chunks:"); LexicalChunker chunker = new PhraseChunker(); chunker.findChunks(parse); prt_chunks(chunker.getChunks()); ranker.add(chunker.getChunks(), parse.getTruthValue(), votes); } if (commandMap.get("--pb") != null) { System.out.println("Pattern-matching lexical chunks:"); LexicalChunker chunker = new PatternChunker(); chunker.findChunks(parse); prt_chunks(chunker.getChunks()); ranker.add(chunker.getChunks(), parse.getTruthValue(), 2.0 * votes); } if (commandMap.get("--pc") != null) { System.out.println("Relation-based lexical chunks:"); LexicalChunker chunker = new RelationChunker(); chunker.findChunks(parse); prt_chunks(chunker.getChunks()); ranker.add(chunker.getChunks(), parse.getTruthValue(), votes); } if (commandMap.get("--prolog") != null) { PrologList pl = new PrologList(); System.out.println( pl.toPrologList(parse.getLeft(), PrologList.getDefaultFilter(), true)); System.out.println("\n======\n"); } if (commandMap.get("--stanford") != null) { System.out.println("Stanford-style dependency relations:\n"); System.out.println(StanfordView.printRelations(parse, re.do_penn_tagging, " ")); System.out.println("\n======\n"); } if (commandMap.get("-o") != null) { opencog.setParse(parse); System.out.println(opencog.toString()); } if (html != null) html.println("</tr></table></div>"); if (++numParses >= maxParses) break; } if (0 < ranker.getChunks().size()) { discriminate(ranker); System.out.println("\nLexical Chunks:\n" + ranker.toString()); } if (re.do_anaphora_resolution && (commandMap.get("-o") == null)) { System.out.println("\nAntecedent candidates:\n" + re.antecedents.toString()); } // Print out the stats every now and then. if (sentence_count % 5 == 0) { System.err.println("\n" + re.stats.toString()); } if (commandMap.get("-s") != null) break; sentence = ds.getNextSentence(); } if (commandMap.get("-s") != null) break; } if (html != null) { html.println("</html>"); html.close(); } // Dump the list of document sentences if (commandMap.get("-o") != null) { System.out.println(opencog.printDocument(re.doco)); } System.out.println("; Bye."); if (commandMap.get("-o") != null) { System.out.println(".\nexit"); } System.exit(0); }