Beispiel #1
0
  public static void main(String[] args) throws Exception {
    if (args.length != 2) {
      System.err.println("usage: java TaggerDemo2 modelFile fileToTag");
      return;
    }
    MaxentTagger tagger = new MaxentTagger(args[0]);
    TokenizerFactory<CoreLabel> ptbTokenizerFactory =
        PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep");
    BufferedReader r =
        new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));

    PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
    DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
    documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
    for (List<HasWord> sentence : documentPreprocessor) {
      List<TaggedWord> tSentence = tagger.tagSentence(sentence);
      pw.println(Sentence.listToString(tSentence, false));
    }

    // print the adjectives in one more sentence. This shows how to get at words and tags in a
    // tagged sentence.
    List<HasWord> sent =
        Sentence.toWordList(
            "The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
    List<TaggedWord> taggedSent = tagger.tagSentence(sent);
    for (TaggedWord tw : taggedSent) {
      if (tw.tag().startsWith("JJ")) {
        pw.println(tw.word());
      }
    }

    pw.close();
  }
  public ParseResult parseSentence(String sentence) {
    String result = "";

    // see if a parser socket server is available
    int port = new Integer(ARKref.getProperties().getProperty("parserServerPort", "5556"));
    String host = "127.0.0.1";
    Socket client;
    PrintWriter pw;
    BufferedReader br;
    String line;
    try {
      client = new Socket(host, port);

      pw = new PrintWriter(client.getOutputStream());
      br = new BufferedReader(new InputStreamReader(client.getInputStream()));
      pw.println(sentence);
      pw.flush(); // flush to complete the transmission
      while ((line = br.readLine()) != null) {
        // if(!line.matches(".*\\S.*")){
        //        System.out.println();
        // }
        if (br.ready()) {
          line = line.replaceAll("\n", "");
          line = line.replaceAll("\\s+", " ");
          result += line + " ";
        } else {
          lastParseScore = new Double(line);
        }
      }

      br.close();
      pw.close();
      client.close();

      System.err.println("parser output:" + result);

      lastParse = readTreeFromString(result);
      boolean success = !Strings.normalizeWhitespace(result).equals("(ROOT (. .))");
      return new ParseResult(success, lastParse, lastParseScore);
    } catch (Exception ex) {

      // ex.printStackTrace();
    }

    // if socket server not available, then use a local parser object
    if (parser == null) {
      if (DEBUG) System.err.println("Could not connect to parser server.  Loading parser...");
      try {
        Options op = new Options();
        String serializedInputFileOrUrl =
            ClassLoader.getSystemResource(
                    ARKref.getProperties()
                        .getProperty("parserGrammarFile", "lib/englishPCFG.ser.gz"))
                .toExternalForm();
        parser = LexicalizedParser.loadModel(serializedInputFileOrUrl, op);
        //				int maxLength = new Integer(ARKref.getProperties().getProperty("parserMaxLength",
        // "40")).intValue();
        //				parser.setMaxLength(maxLength);
        parser.setOptionFlags("-outputFormat", "oneline");
      } catch (Exception e) {
        e.printStackTrace();
      }
    }

    try {
      DocumentPreprocessor dp = new DocumentPreprocessor(new StringReader(sentence));

      LexicalizedParserQuery query = parser.parserQuery();

      if (query.parse(dp.iterator().next())) {
        lastParse = query.getBestParse();
        lastParseScore = query.getPCFGScore();
        TreePrint tp = new TreePrint("penn", "", new PennTreebankLanguagePack());
        StringWriter sb = new StringWriter();
        pw = new PrintWriter(sb);
        tp.printTree(lastParse, pw);
        pw.flush();
        lastParse = readTreeFromString(sb.getBuffer().toString());

        return new ParseResult(true, lastParse, lastParseScore);
      }
    } catch (Exception e) {
    }

    lastParse = readTreeFromString("(ROOT (. .))");
    lastParseScore = -99999.0;
    return new ParseResult(false, lastParse, lastParseScore);
  }