예제 #1
0
  public static void showSdiWithCategory2(JCas jcas) {
    String wordsLine = "";
    String catsLine = "";
    int cnt = 0;
    FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator();
    while (it.hasNext()) {
      cnt += 1;
      WordAnnotation a = (WordAnnotation) it.next();

      String[] strings = center(a.getCoveredText(), a.getTag());
      wordsLine += strings[0] + " ";
      catsLine += strings[1] + " ";
      if (cnt == 20) {
        System.out.println(wordsLine);
        System.out.println(catsLine);
        System.out.println();

        wordsLine = "";
        catsLine = "";
        cnt = 0;
      }
    }
    if (cnt > 0) {
      System.out.println(wordsLine);
      System.out.println(catsLine);
    }
  }
예제 #2
0
 @Override
 public void process(JCas cas) throws AnalysisEngineProcessException {
   try {
     AnnotationIndex<Annotation> index = cas.getAnnotationIndex(WordAnnotation.type);
     FSIterator<Annotation> iterator = index.iterator();
     while (iterator.hasNext()) {
       WordAnnotation annotation = (WordAnnotation) iterator.next();
       String norm = annotation.getCoveredText();
       annotation.setLemma(norm);
       annotation.setStem(norm);
     }
   } catch (Exception e) {
     throw new AnalysisEngineProcessException(e);
   }
 }
예제 #3
0
 public static void showSdiWithCategory(JCas jcas) {
   FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator();
   int wordCnt = 0;
   while (it.hasNext()) {
     wordCnt++;
     WordAnnotation a = (WordAnnotation) it.next();
     System.out.print(a.getCoveredText() + "_" + a.getTag());
     if (wordCnt < 12) {
       System.out.print(" ");
     } else {
       System.out.println();
       wordCnt = 0;
     }
   }
   System.out.println(Joiner.on(" ").join(it));
 }
  @Override
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    UIMAProfiler.getProfiler("AnalysisEngine").start(this, "process");

    Lemmatizer mateLemmatizer = mateLemmatizerModel.getEngine();
    Tagger mateTagger = mateTaggerModel.getEngine();

    /*
     * keeps an array of annotations in memory so as to be able
     * to access them by index.
     */
    List<WordAnnotation> annotations = Lists.newArrayList();
    FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator();
    while (it.hasNext()) {
      WordAnnotation a = (WordAnnotation) it.next();
      annotations.add(a);
    }

    String[] tokens = new String[annotations.size() + 2];

    // preprends to fake words to prevent Mate from bugging on the two first words
    tokens[0] = "<root>";
    tokens[1] = "<root2>";
    for (int i = 0; i < annotations.size(); i++)
      tokens[i + 2] = annotations.get(i).getCoveredText();

    SentenceData09 mateSentence = new SentenceData09();
    mateSentence.init(tokens);

    // Run POS tagging
    mateSentence = mateTagger.apply(mateSentence);

    // Run lemmatization
    mateSentence = mateLemmatizer.apply(mateSentence);

    WordAnnotation wordAnnotation;
    for (int j = 1; j < mateSentence.length(); j++) {
      wordAnnotation = annotations.get(j - 1);
      wordAnnotation.setTag(mateSentence.ppos[j]);
      wordAnnotation.setLemma(mateSentence.plemmas[j]);
    }

    UIMAProfiler.getProfiler("AnalysisEngine").stop(this, "process");
  }