public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("usage: java TaggerDemo2 modelFile fileToTag"); return; } MaxentTagger tagger = new MaxentTagger(args[0]); TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep"); BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8")); PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8")); DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r); documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory); for (List<HasWord> sentence : documentPreprocessor) { List<TaggedWord> tSentence = tagger.tagSentence(sentence); pw.println(Sentence.listToString(tSentence, false)); } // print the adjectives in one more sentence. This shows how to get at words and tags in a // tagged sentence. List<HasWord> sent = Sentence.toWordList( "The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", "."); List<TaggedWord> taggedSent = tagger.tagSentence(sent); for (TaggedWord tw : taggedSent) { if (tw.tag().startsWith("JJ")) { pw.println(tw.word()); } } pw.close(); }
public ParseResult parseSentence(String sentence) { String result = ""; // see if a parser socket server is available int port = new Integer(ARKref.getProperties().getProperty("parserServerPort", "5556")); String host = "127.0.0.1"; Socket client; PrintWriter pw; BufferedReader br; String line; try { client = new Socket(host, port); pw = new PrintWriter(client.getOutputStream()); br = new BufferedReader(new InputStreamReader(client.getInputStream())); pw.println(sentence); pw.flush(); // flush to complete the transmission while ((line = br.readLine()) != null) { // if(!line.matches(".*\\S.*")){ // System.out.println(); // } if (br.ready()) { line = line.replaceAll("\n", ""); line = line.replaceAll("\\s+", " "); result += line + " "; } else { lastParseScore = new Double(line); } } br.close(); pw.close(); client.close(); System.err.println("parser output:" + result); lastParse = readTreeFromString(result); boolean success = !Strings.normalizeWhitespace(result).equals("(ROOT (. .))"); return new ParseResult(success, lastParse, lastParseScore); } catch (Exception ex) { // ex.printStackTrace(); } // if socket server not available, then use a local parser object if (parser == null) { if (DEBUG) System.err.println("Could not connect to parser server. Loading parser..."); try { Options op = new Options(); String serializedInputFileOrUrl = ClassLoader.getSystemResource( ARKref.getProperties() .getProperty("parserGrammarFile", "lib/englishPCFG.ser.gz")) .toExternalForm(); parser = LexicalizedParser.loadModel(serializedInputFileOrUrl, op); // int maxLength = new Integer(ARKref.getProperties().getProperty("parserMaxLength", // "40")).intValue(); // parser.setMaxLength(maxLength); parser.setOptionFlags("-outputFormat", "oneline"); } catch (Exception e) { e.printStackTrace(); } } try { DocumentPreprocessor dp = new DocumentPreprocessor(new StringReader(sentence)); LexicalizedParserQuery query = parser.parserQuery(); if (query.parse(dp.iterator().next())) { lastParse = query.getBestParse(); lastParseScore = query.getPCFGScore(); TreePrint tp = new TreePrint("penn", "", new PennTreebankLanguagePack()); StringWriter sb = new StringWriter(); pw = new PrintWriter(sb); tp.printTree(lastParse, pw); pw.flush(); lastParse = readTreeFromString(sb.getBuffer().toString()); return new ParseResult(true, lastParse, lastParseScore); } } catch (Exception e) { } lastParse = readTreeFromString("(ROOT (. .))"); lastParseScore = -99999.0; return new ParseResult(false, lastParse, lastParseScore); }