@Override public void process(JCas jcas) throws AnalysisEngineProcessException { UIMAProfiler.getProfiler("AnalysisEngine").start(this, "process"); Lemmatizer mateLemmatizer = mateLemmatizerModel.getEngine(); Tagger mateTagger = mateTaggerModel.getEngine(); /* * keeps an array of annotations in memory so as to be able * to access them by index. */ List<WordAnnotation> annotations = Lists.newArrayList(); FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator(); while (it.hasNext()) { WordAnnotation a = (WordAnnotation) it.next(); annotations.add(a); } String[] tokens = new String[annotations.size() + 2]; // preprends to fake words to prevent Mate from bugging on the two first words tokens[0] = "<root>"; tokens[1] = "<root2>"; for (int i = 0; i < annotations.size(); i++) tokens[i + 2] = annotations.get(i).getCoveredText(); SentenceData09 mateSentence = new SentenceData09(); mateSentence.init(tokens); // Run POS tagging mateSentence = mateTagger.apply(mateSentence); // Run lemmatization mateSentence = mateLemmatizer.apply(mateSentence); WordAnnotation wordAnnotation; for (int j = 1; j < mateSentence.length(); j++) { wordAnnotation = annotations.get(j - 1); wordAnnotation.setTag(mateSentence.ppos[j]); wordAnnotation.setLemma(mateSentence.plemmas[j]); } UIMAProfiler.getProfiler("AnalysisEngine").stop(this, "process"); }
public synchronized List<StringAnnotation> annotate(IAnnotation<String> sentence) throws IncompatibleAnnotationException { if (!modelStarted()) { startModel(); } SentenceData09 sent = new SentenceData09(); ArrayList<StringAnnotation> annotations = new ArrayList<StringAnnotation>(); ArrayList<String> forms = new ArrayList<String>(); forms.add("<root>"); List<? extends IAnnotation<String>> tokens = StringAnnotatorEnum.TOKEN.getAnnotator().annotate(sentence); for (IAnnotation<String> token : tokens) { forms.add(token.getAnnotation()); } sent.init(forms.toArray(new String[forms.size()])); lemmatizer.apply(sent); for (int i = 0; i < tokens.size(); i++) { StringAnnotation ann = new StringAnnotation( sent.plemmas[i + 1], tokens.get(i).getStart(), tokens.get(i).getEnd()); annotations.add(ann); } return annotations; }
/** * Create an instance without root of the input instance * * @param instance */ public SentenceData09(SentenceData09 i) { int length = i.length() - 1; forms = new String[length]; gpos = new String[length]; ppos = new String[length]; plemmas = new String[length]; plabels = new String[length]; lemmas = new String[length]; heads = new int[length]; pheads = new int[length]; ofeats = new String[length]; pfeats = new String[length]; labels = new String[length]; fillp = new String[length]; id = new String[length]; for (int j = 0; j < length; j++) { forms[j] = i.forms[j + 1]; ppos[j] = i.ppos[j + 1]; gpos[j] = i.gpos[j + 1]; labels[j] = i.labels[j + 1]; heads[j] = i.heads[j + 1]; if (i.pheads != null) pheads[j] = i.pheads[j + 1]; if (i.plabels != null) plabels[j] = i.plabels[j + 1]; if (i.lemmas != null) lemmas[j] = i.lemmas[j + 1]; plemmas[j] = i.plemmas[j + 1]; if (i.ofeats != null) ofeats[j] = i.ofeats[j + 1]; if (i.pfeats != null) pfeats[j] = i.pfeats[j + 1]; if (i.fillp != null) fillp[j] = i.fillp[j + 1]; if (i.id != null) id[j] = i.id[j + 1]; } }
public static Results evaluate(String act_file, String pred_file) throws Exception { CONLLReader09 goldReader = new CONLLReader09(act_file, -1); CONLLReader09 predictedReader = new CONLLReader09(pred_file, -1); int total = 0, corr = 0, corrL = 0; int numsent = 0, corrsent = 0, corrsentL = 0; SentenceData09 goldInstance = goldReader.getNext(); SentenceData09 predInstance = predictedReader.getNext(); while (goldInstance != null) { int instanceLength = goldInstance.length(); if (instanceLength != predInstance.length()) { Parser.out.println("Lengths do not match on sentence " + numsent); } int[] goldHeads = goldInstance.heads; String[] goldLabels = goldInstance.labels; int[] predHeads = predInstance.pheads; String[] predLabels = predInstance.plabels; boolean whole = true; boolean wholeL = true; // NOTE: the first item is the root info added during nextInstance(), so we skip it. int punc = 0; for (int i = 1; i < instanceLength; i++) { if (predHeads[i] == goldHeads[i]) { corr++; if (goldLabels[i].equals(predLabels[i])) { corrL++; } else { // Parser.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head // "+goldHeads[i]+" child "+i); wholeL = false; } } else { // Parser.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head // "+goldHeads[i]+" child "+i); whole = false; wholeL = false; } } total += ((instanceLength - 1) - punc); // Subtract one to not score fake root token if (whole) { corrsent++; } if (wholeL) { corrsentL++; } numsent++; goldInstance = goldReader.getNext(); predInstance = predictedReader.getNext(); } Results r = new Results(); r.total = total; r.corr = corr; r.las = (float) Math.round(((double) corrL / total) * 100000) / 1000; r.ula = (float) Math.round(((double) corr / total) * 100000) / 1000; Parser.out.print("Total: " + total + " \tCorrect: " + corr + " "); Parser.out.println( "LAS: " + (double) Math.round(((double) corrL / total) * 100000) / 1000 + " \tTotal: " + (double) Math.round(((double) corrsentL / numsent) * 100000) / 1000 + " \tULA: " + (double) Math.round(((double) corr / total) * 100000) / 1000 + " \tTotal: " + (double) Math.round(((double) corrsent / numsent) * 100000) / 1000); return r; }
public void createWithRoot(SentenceData09 i) { int length = i.length(); int offset = 0; if (!i.forms[0].equals(CONLLReader09.ROOT)) { length++; offset = -1; } forms = new String[length]; gpos = new String[length]; ppos = new String[length]; plemmas = new String[length]; plabels = new String[length]; lemmas = new String[length]; heads = new int[length]; pheads = new int[length]; ofeats = new String[length]; pfeats = new String[length]; labels = new String[length]; fillp = new String[length]; id = new String[length]; feats = new String[forms.length][]; for (int j = 1; j < length; j++) { forms[j] = i.forms[j + offset]; ppos[j] = i.ppos[j + offset]; gpos[j] = i.gpos[j + offset]; labels[j] = i.labels[j + offset]; heads[j] = i.heads[j + offset]; if (i.pheads != null) pheads[j] = i.pheads[j + offset]; if (i.plabels != null) plabels[j] = i.plabels[j + offset]; if (i.lemmas != null) lemmas[j] = i.lemmas[j + offset]; plemmas[j] = i.plemmas[j + offset]; // if (i.ofeats!=null) ofeats[j] = i.ofeats[j+offset]; ofeats[j] = i.ofeats[j + offset].equals(CONLLWriter09.DASH) ? "_" : i.ofeats[j + offset]; // if (i.pfeats!=null) pfeats[j] = i.pfeats[j+offset]; if (i.pfeats != null && i.pfeats[j + offset] != null) { if (i.pfeats[j + offset].equals(CONLLWriter09.DASH)) feats[j] = null; else { feats[j] = i.pfeats[j + offset].split(CONLLReader09.PIPE); // if (info[7].equals(CONLLWriter09.DASH)) it.feats[i]=null; // else { // it.feats[i] =info[7].split(PIPE); pfeats[j] = i.pfeats[j + offset]; // } } } if (i.fillp != null) fillp[j] = i.fillp[j + offset]; if (i.id != null) id[j] = i.id[j + offset]; } forms[0] = CONLLReader09.ROOT; plemmas[0] = CONLLReader09.ROOT_LEMMA; fillp[0] = "N"; lemmas[0] = CONLLReader09.ROOT_LEMMA; gpos[0] = CONLLReader09.ROOT_POS; ppos[0] = CONLLReader09.ROOT_POS; labels[0] = CONLLReader09.NO_TYPE; heads[0] = -1; plabels[0] = CONLLReader09.NO_TYPE; pheads[0] = -1; ofeats[0] = CONLLReader09.NO_TYPE; id[0] = "0"; }
// how to parse a sentences and call the tools public static void main(String[] args) throws IOException { // Create a data container for a sentence SentenceData09 i = new SentenceData09(); if (args.length == 1) { // input might be a sentence: "This is another test ." StringTokenizer st = new StringTokenizer(args[0]); ArrayList<String> forms = new ArrayList<>(); forms.add("<root>"); while (st.hasMoreTokens()) { forms.add(st.nextToken()); } i.init(forms.toArray(new String[0])); } else { // provide a default sentence i.init(new String[] {"<root>", "This", "is", "a", "test", "."}); } // print the forms for (String l : i.forms) { Parser.out.println("form : " + l); } // tell the lemmatizer the location of the model is2.lemmatizer.Options optsLemmatizer = new is2.lemmatizer.Options(new String[] {"-model", "models/lemma-eng.model"}); // create a lemmatizer Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer); // lemmatize a sentence; the result is stored in the stenenceData09 i i = lemmatizer.apply(i); // output the lemmata for (String l : i.plemmas) { Parser.out.println("lemma : " + l); } // tell the tagger the location of the model is2.tag.Options optsTagger = new is2.tag.Options(new String[] {"-model", "models/tag-eng.model"}); Tagger tagger = new Tagger(optsTagger); // String pos[] =tagger.tag(i.forms, i.lemmas); // i.setPPos(pos); SentenceData09 tagged = tagger.tag(i); for (String p : tagged.ppos) { Parser.out.println("pos " + p); } // initialize the options Options optsParser = new Options(new String[] {"-model", "models/prs-eng-x.model"}); // create a parser Parser parser = new Parser(optsParser); // parse the sentence (you get a copy of the input i) SentenceData09 parse = parser.apply(tagged); Parser.out.println(parse.toString()); // create some trash on the hard drive :-) is2.io.CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt"); writer.write(i); writer.finishWriting(); }