Пример #1
0
  /**
   * for printing labeled sentence in less verbose manner
   *
   * @return string for printing
   */
  public static String labeledSentenceToString(List<CoreLabel> labeledSentence, boolean printNer) {
    StringBuilder sb = new StringBuilder();
    sb.append("[ ");

    for (CoreLabel label : labeledSentence) {
      String word = label.getString(annotationForWord);
      String answer = label.getString(AnswerAnnotation.class);
      String tag = label.getString(PartOfSpeechAnnotation.class);

      sb.append(word).append('(').append(tag);
      if (!SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL.equals(answer)) {
        sb.append(' ').append(answer);
      }

      if (printNer) {
        sb.append(" ner:").append(label.ner());
      }
      sb.append(") ");
    }
    sb.append(']');

    return sb.toString();
  }
Пример #2
0
  public void process(String inFilepath, String outFilepath, String nerOutFile) {

    try {
      StringBuilder inText = new StringBuilder();
      StringBuilder outText = new StringBuilder();
      StringBuilder nerText = new StringBuilder();

      // read some text in the inText variable from input file
      BufferedReader reader = new BufferedReader(new FileReader(inFilepath));
      String line = null;
      while ((line = reader.readLine()) != null) {
        if (line.trim().length() == 0) continue;
        inText.append(line + "\n");
      }
      reader.close();

      // create an empty Annotation just with the given text
      Annotation document = new Annotation(inText.toString());

      // run all Annotators on this text
      pipeline.annotate(document);

      // these are all the sentences in this document
      // a CoreMap is essentially a Map that uses class objects as keys and has values with custom
      // types
      List<CoreMap> sentences = document.get(SentencesAnnotation.class);

      for (CoreMap sentence : sentences) {
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
          totalWords++;
          String pos = token.tag();
          if (tagFilter.contains(pos)) {
            remainWords++;
            String lemma = token.lemma();
            outText.append(lemma + " ");
            if (nerFilter.contains(token.ner())) {
              nerText.append(token.word() + " ");
            }
          }
        }
      }

      // write the processed text to output file
      FileWriter fw = FileUtil.open(outFilepath);
      fw.append(outText);
      FileUtil.close(fw);

      if (nerOutFile != null) {
        FileWriter fw2 = FileUtil.open(nerOutFile);
        fw2.append(nerText);
        FileUtil.close(fw2);
      }

    } catch (FileNotFoundException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }