예제 #1
0
  public static void main(String[] args) throws IOException {
    PrintWriter out;
    if (args.length > 1) {
      out = new PrintWriter(args[1]);
    } else {
      out = new PrintWriter(System.out);
    }
    PrintWriter xmlOut = null;
    if (args.length > 2) {
      xmlOut = new PrintWriter(args[2]);
    }
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner,parse");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation annotation;
    if (args.length > 0) {
      annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[0]));
    } else {
      annotation =
          new Annotation(
              "Kosgi Santosh sent an email to Stanford University. He didn't get a reply.");
    }

    pipeline.annotate(annotation);
    pipeline.prettyPrint(annotation, out);
  }
  /** Reads an annotation from the given filename using the requested input. */
  public static List<Annotation> getAnnotations(
      StanfordCoreNLP tokenizer, Input inputFormat, String filename, boolean filterUnknown) {
    switch (inputFormat) {
      case TEXT:
        {
          String text = IOUtils.slurpFileNoExceptions(filename);
          Annotation annotation = new Annotation(text);
          tokenizer.annotate(annotation);
          List<Annotation> annotations = Generics.newArrayList();
          for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            Annotation nextAnnotation =
                new Annotation(sentence.get(CoreAnnotations.TextAnnotation.class));
            nextAnnotation.set(
                CoreAnnotations.SentencesAnnotation.class, Collections.singletonList(sentence));
            annotations.add(nextAnnotation);
          }
          return annotations;
        }
      case TREES:
        {
          List<Tree> trees;
          if (filterUnknown) {
            trees = SentimentUtils.readTreesWithGoldLabels(filename);
            trees = SentimentUtils.filterUnknownRoots(trees);
          } else {
            trees = Generics.newArrayList();
            MemoryTreebank treebank = new MemoryTreebank("utf-8");
            treebank.loadPath(filename, null);
            for (Tree tree : treebank) {
              trees.add(tree);
            }
          }

          List<Annotation> annotations = Generics.newArrayList();
          for (Tree tree : trees) {
            CoreMap sentence = new Annotation(Sentence.listToString(tree.yield()));
            sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
            List<CoreMap> sentences = Collections.singletonList(sentence);
            Annotation annotation = new Annotation("");
            annotation.set(CoreAnnotations.SentencesAnnotation.class, sentences);
            annotations.add(annotation);
          }
          return annotations;
        }
      default:
        throw new IllegalArgumentException("Unknown format " + inputFormat);
    }
  }