コード例 #1
0
  public List<String> annotatePOSpeech(List<String> tokenized)
      throws IOException, TreeTaggerException {
    // Point TT4J to the TreeTagger installation directory. The executable
    // is expected
    // in the "bin" subdirectory - in this example at
    // "/opt/treetagger/bin/tree-tagger"

    System.setProperty("treetagger.home", treeTaggerFolder);
    TreeTaggerWrapper<String> tt = new TreeTaggerWrapper<String>();
    final List<String> outputTreeTagger = new ArrayList<String>();

    try {
      tt.setModel(treeTaggerFolder + "lib/english.par:utf8");
      tt.setHandler(
          new TokenHandler<String>() {
            public void token(String token, String pos, String lemma) {
              outputTreeTagger.add(token + "\t" + pos + "\t" + lemma);
            }
          });
      tt.process(tokenized);
    } finally {
      tt.destroy();
    }
    return outputTreeTagger;
  }
コード例 #2
0
  public LemmatiseurHandler(String ch) {
    termes = new ArrayList();
    termesLem = new ArrayList();
    termesPos = new ArrayList();
    tt = new TreeTaggerWrapper<String>();
    // Lemmatiseur
    System.setProperty("treetagger.home", ch);
    try {
      tt.setModel("french.par:UTF8");
      tt.setHandler(
          new TokenHandler<String>() {

            @Override
            public void token(String token, String pos, String lemma) {
              // System.out.println(token + "\t" + pos + "\t" + lemma);
              termesLem.add(lemma);
              termesPos.add(pos);
            }
          });
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
コード例 #3
0
 public void destroy() throws IOException, TreeTaggerException {
   // Lemmatisation
   tt.destroy();
 }
コード例 #4
0
 public void process() throws IOException, TreeTaggerException {
   // Lemmatisation
   tt.process(termes);
 }
コード例 #5
0
  @Override
  public void process(final JCas aJCas) throws AnalysisEngineProcessException {
    final CAS cas = aJCas.getCas();

    modelProvider.configure(cas);
    posMappingProvider.configure(cas);

    TreeTaggerWrapper<Token> treetagger = modelProvider.getResource();

    try {
      List<Token> tokens = new ArrayList<Token>(select(aJCas, Token.class));
      final POS pos[] = new POS[tokens.size()];
      final Lemma lemma[] = new Lemma[tokens.size()];

      // Set the handler creating new UIMA annotations from the analyzed
      // tokens
      final AtomicInteger count = new AtomicInteger(0);
      treetagger.setHandler(
          new TokenHandler<Token>() {
            @Override
            public void token(Token aToken, String aPos, String aLemma) {
              synchronized (cas) {
                // Add the Part of Speech
                if (writePos && aPos != null) {
                  Type posTag = posMappingProvider.getTagType(aPos);
                  POS posAnno =
                      (POS) cas.createAnnotation(posTag, aToken.getBegin(), aToken.getEnd());
                  posAnno.setPosValue(internTags ? aPos.intern() : aPos);
                  aToken.setPos(posAnno);
                  pos[count.get()] = posAnno;
                }

                // Add the lemma
                if (writeLemma && aLemma != null) {
                  Lemma lemmaAnno = new Lemma(aJCas, aToken.getBegin(), aToken.getEnd());
                  lemmaAnno.setValue(internTags ? aLemma.intern() : aLemma);
                  aToken.setLemma(lemmaAnno);
                  lemma[count.get()] = lemmaAnno;
                }

                count.getAndIncrement();
              }
            }
          });

      treetagger.process(tokens);

      // Add the annotations to the indexes
      for (int i = 0; i < count.get(); i++) {
        if (pos[i] != null) {
          pos[i].addToIndexes();
        }
        if (lemma[i] != null) {
          lemma[i].addToIndexes();
        }
      }
    } catch (TreeTaggerException e) {
      throw new AnalysisEngineProcessException(e);
    } catch (IOException e) {
      throw new AnalysisEngineProcessException(e);
    }
  }