public List<String> annotatePOSpeech(List<String> tokenized) throws IOException, TreeTaggerException { // Point TT4J to the TreeTagger installation directory. The executable // is expected // in the "bin" subdirectory - in this example at // "/opt/treetagger/bin/tree-tagger" System.setProperty("treetagger.home", treeTaggerFolder); TreeTaggerWrapper<String> tt = new TreeTaggerWrapper<String>(); final List<String> outputTreeTagger = new ArrayList<String>(); try { tt.setModel(treeTaggerFolder + "lib/english.par:utf8"); tt.setHandler( new TokenHandler<String>() { public void token(String token, String pos, String lemma) { outputTreeTagger.add(token + "\t" + pos + "\t" + lemma); } }); tt.process(tokenized); } finally { tt.destroy(); } return outputTreeTagger; }
public LemmatiseurHandler(String ch) { termes = new ArrayList(); termesLem = new ArrayList(); termesPos = new ArrayList(); tt = new TreeTaggerWrapper<String>(); // Lemmatiseur System.setProperty("treetagger.home", ch); try { tt.setModel("french.par:UTF8"); tt.setHandler( new TokenHandler<String>() { @Override public void token(String token, String pos, String lemma) { // System.out.println(token + "\t" + pos + "\t" + lemma); termesLem.add(lemma); termesPos.add(pos); } }); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }