public List<String> annotatePOSpeech(List<String> tokenized) throws IOException, TreeTaggerException { // Point TT4J to the TreeTagger installation directory. The executable // is expected // in the "bin" subdirectory - in this example at // "/opt/treetagger/bin/tree-tagger" System.setProperty("treetagger.home", treeTaggerFolder); TreeTaggerWrapper<String> tt = new TreeTaggerWrapper<String>(); final List<String> outputTreeTagger = new ArrayList<String>(); try { tt.setModel(treeTaggerFolder + "lib/english.par:utf8"); tt.setHandler( new TokenHandler<String>() { public void token(String token, String pos, String lemma) { outputTreeTagger.add(token + "\t" + pos + "\t" + lemma); } }); tt.process(tokenized); } finally { tt.destroy(); } return outputTreeTagger; }
public LemmatiseurHandler(String ch) { termes = new ArrayList(); termesLem = new ArrayList(); termesPos = new ArrayList(); tt = new TreeTaggerWrapper<String>(); // Lemmatiseur System.setProperty("treetagger.home", ch); try { tt.setModel("french.par:UTF8"); tt.setHandler( new TokenHandler<String>() { @Override public void token(String token, String pos, String lemma) { // System.out.println(token + "\t" + pos + "\t" + lemma); termesLem.add(lemma); termesPos.add(pos); } }); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
public void destroy() throws IOException, TreeTaggerException { // Lemmatisation tt.destroy(); }
public void process() throws IOException, TreeTaggerException { // Lemmatisation tt.process(termes); }
@Override public void process(final JCas aJCas) throws AnalysisEngineProcessException { final CAS cas = aJCas.getCas(); modelProvider.configure(cas); posMappingProvider.configure(cas); TreeTaggerWrapper<Token> treetagger = modelProvider.getResource(); try { List<Token> tokens = new ArrayList<Token>(select(aJCas, Token.class)); final POS pos[] = new POS[tokens.size()]; final Lemma lemma[] = new Lemma[tokens.size()]; // Set the handler creating new UIMA annotations from the analyzed // tokens final AtomicInteger count = new AtomicInteger(0); treetagger.setHandler( new TokenHandler<Token>() { @Override public void token(Token aToken, String aPos, String aLemma) { synchronized (cas) { // Add the Part of Speech if (writePos && aPos != null) { Type posTag = posMappingProvider.getTagType(aPos); POS posAnno = (POS) cas.createAnnotation(posTag, aToken.getBegin(), aToken.getEnd()); posAnno.setPosValue(internTags ? aPos.intern() : aPos); aToken.setPos(posAnno); pos[count.get()] = posAnno; } // Add the lemma if (writeLemma && aLemma != null) { Lemma lemmaAnno = new Lemma(aJCas, aToken.getBegin(), aToken.getEnd()); lemmaAnno.setValue(internTags ? aLemma.intern() : aLemma); aToken.setLemma(lemmaAnno); lemma[count.get()] = lemmaAnno; } count.getAndIncrement(); } } }); treetagger.process(tokens); // Add the annotations to the indexes for (int i = 0; i < count.get(); i++) { if (pos[i] != null) { pos[i].addToIndexes(); } if (lemma[i] != null) { lemma[i].addToIndexes(); } } } catch (TreeTaggerException e) { throw new AnalysisEngineProcessException(e); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } }