/* */ StringBuilder normalizeQuery(CharSequence cSeq) { /* 335 */ StringBuilder sb = new StringBuilder(); /* 336 */ sb.append(' '); /* 337 */ if (this.mTokenizerFactory == null) { /* 338 */ Strings.normalizeWhitespace(cSeq, sb); /* 339 */ sb.append(' '); /* */ } else { /* 341 */ char[] cs = Strings.toCharArray(cSeq); /* 342 */ Tokenizer tokenizer = this.mTokenizerFactory.tokenizer(cs, 0, cs.length); /* */ String nextToken; /* 344 */ while ((nextToken = tokenizer.nextToken()) != null) { /* 345 */ this.mTokenCounter.increment(nextToken); /* 346 */ sb.append(nextToken); /* 347 */ sb.append(' '); /* */ } /* */ } /* 350 */ return sb; /* */ }
public ParseResult parseSentence(String sentence) { String result = ""; // see if a parser socket server is available int port = new Integer(ARKref.getProperties().getProperty("parserServerPort", "5556")); String host = "127.0.0.1"; Socket client; PrintWriter pw; BufferedReader br; String line; try { client = new Socket(host, port); pw = new PrintWriter(client.getOutputStream()); br = new BufferedReader(new InputStreamReader(client.getInputStream())); pw.println(sentence); pw.flush(); // flush to complete the transmission while ((line = br.readLine()) != null) { // if(!line.matches(".*\\S.*")){ // System.out.println(); // } if (br.ready()) { line = line.replaceAll("\n", ""); line = line.replaceAll("\\s+", " "); result += line + " "; } else { lastParseScore = new Double(line); } } br.close(); pw.close(); client.close(); System.err.println("parser output:" + result); lastParse = readTreeFromString(result); boolean success = !Strings.normalizeWhitespace(result).equals("(ROOT (. .))"); return new ParseResult(success, lastParse, lastParseScore); } catch (Exception ex) { // ex.printStackTrace(); } // if socket server not available, then use a local parser object if (parser == null) { if (DEBUG) System.err.println("Could not connect to parser server. Loading parser..."); try { Options op = new Options(); String serializedInputFileOrUrl = ClassLoader.getSystemResource( ARKref.getProperties() .getProperty("parserGrammarFile", "lib/englishPCFG.ser.gz")) .toExternalForm(); parser = LexicalizedParser.loadModel(serializedInputFileOrUrl, op); // int maxLength = new Integer(ARKref.getProperties().getProperty("parserMaxLength", // "40")).intValue(); // parser.setMaxLength(maxLength); parser.setOptionFlags("-outputFormat", "oneline"); } catch (Exception e) { e.printStackTrace(); } } try { DocumentPreprocessor dp = new DocumentPreprocessor(new StringReader(sentence)); LexicalizedParserQuery query = parser.parserQuery(); if (query.parse(dp.iterator().next())) { lastParse = query.getBestParse(); lastParseScore = query.getPCFGScore(); TreePrint tp = new TreePrint("penn", "", new PennTreebankLanguagePack()); StringWriter sb = new StringWriter(); pw = new PrintWriter(sb); tp.printTree(lastParse, pw); pw.flush(); lastParse = readTreeFromString(sb.getBuffer().toString()); return new ParseResult(true, lastParse, lastParseScore); } } catch (Exception e) { } lastParse = readTreeFromString("(ROOT (. .))"); lastParseScore = -99999.0; return new ParseResult(false, lastParse, lastParseScore); }