@Override public void process(JCas jcas) throws AnalysisEngineProcessException { UIMAProfiler.getProfiler("AnalysisEngine").start(this, "process"); Lemmatizer mateLemmatizer = mateLemmatizerModel.getEngine(); Tagger mateTagger = mateTaggerModel.getEngine(); /* * keeps an array of annotations in memory so as to be able * to access them by index. */ List<WordAnnotation> annotations = Lists.newArrayList(); FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator(); while (it.hasNext()) { WordAnnotation a = (WordAnnotation) it.next(); annotations.add(a); } String[] tokens = new String[annotations.size() + 2]; // preprends to fake words to prevent Mate from bugging on the two first words tokens[0] = "<root>"; tokens[1] = "<root2>"; for (int i = 0; i < annotations.size(); i++) tokens[i + 2] = annotations.get(i).getCoveredText(); SentenceData09 mateSentence = new SentenceData09(); mateSentence.init(tokens); // Run POS tagging mateSentence = mateTagger.apply(mateSentence); // Run lemmatization mateSentence = mateLemmatizer.apply(mateSentence); WordAnnotation wordAnnotation; for (int j = 1; j < mateSentence.length(); j++) { wordAnnotation = annotations.get(j - 1); wordAnnotation.setTag(mateSentence.ppos[j]); wordAnnotation.setLemma(mateSentence.plemmas[j]); } UIMAProfiler.getProfiler("AnalysisEngine").stop(this, "process"); }
public synchronized List<StringAnnotation> annotate(IAnnotation<String> sentence) throws IncompatibleAnnotationException { if (!modelStarted()) { startModel(); } SentenceData09 sent = new SentenceData09(); ArrayList<StringAnnotation> annotations = new ArrayList<StringAnnotation>(); ArrayList<String> forms = new ArrayList<String>(); forms.add("<root>"); List<? extends IAnnotation<String>> tokens = StringAnnotatorEnum.TOKEN.getAnnotator().annotate(sentence); for (IAnnotation<String> token : tokens) { forms.add(token.getAnnotation()); } sent.init(forms.toArray(new String[forms.size()])); lemmatizer.apply(sent); for (int i = 0; i < tokens.size(); i++) { StringAnnotation ann = new StringAnnotation( sent.plemmas[i + 1], tokens.get(i).getStart(), tokens.get(i).getEnd()); annotations.add(ann); } return annotations; }
// how to parse a sentences and call the tools public static void main(String[] args) throws IOException { // Create a data container for a sentence SentenceData09 i = new SentenceData09(); if (args.length == 1) { // input might be a sentence: "This is another test ." StringTokenizer st = new StringTokenizer(args[0]); ArrayList<String> forms = new ArrayList<>(); forms.add("<root>"); while (st.hasMoreTokens()) { forms.add(st.nextToken()); } i.init(forms.toArray(new String[0])); } else { // provide a default sentence i.init(new String[] {"<root>", "This", "is", "a", "test", "."}); } // print the forms for (String l : i.forms) { Parser.out.println("form : " + l); } // tell the lemmatizer the location of the model is2.lemmatizer.Options optsLemmatizer = new is2.lemmatizer.Options(new String[] {"-model", "models/lemma-eng.model"}); // create a lemmatizer Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer); // lemmatize a sentence; the result is stored in the stenenceData09 i i = lemmatizer.apply(i); // output the lemmata for (String l : i.plemmas) { Parser.out.println("lemma : " + l); } // tell the tagger the location of the model is2.tag.Options optsTagger = new is2.tag.Options(new String[] {"-model", "models/tag-eng.model"}); Tagger tagger = new Tagger(optsTagger); // String pos[] =tagger.tag(i.forms, i.lemmas); // i.setPPos(pos); SentenceData09 tagged = tagger.tag(i); for (String p : tagged.ppos) { Parser.out.println("pos " + p); } // initialize the options Options optsParser = new Options(new String[] {"-model", "models/prs-eng-x.model"}); // create a parser Parser parser = new Parser(optsParser); // parse the sentence (you get a copy of the input i) SentenceData09 parse = parser.apply(tagged); Parser.out.println(parse.toString()); // create some trash on the hard drive :-) is2.io.CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt"); writer.write(i); writer.finishWriting(); }