@Override public void process(JCas jcas) throws AnalysisEngineProcessException { UIMAProfiler.getProfiler("AnalysisEngine").start(this, "process"); Lemmatizer mateLemmatizer = mateLemmatizerModel.getEngine(); Tagger mateTagger = mateTaggerModel.getEngine(); /* * keeps an array of annotations in memory so as to be able * to access them by index. */ List<WordAnnotation> annotations = Lists.newArrayList(); FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator(); while (it.hasNext()) { WordAnnotation a = (WordAnnotation) it.next(); annotations.add(a); } String[] tokens = new String[annotations.size() + 2]; // preprends to fake words to prevent Mate from bugging on the two first words tokens[0] = "<root>"; tokens[1] = "<root2>"; for (int i = 0; i < annotations.size(); i++) tokens[i + 2] = annotations.get(i).getCoveredText(); SentenceData09 mateSentence = new SentenceData09(); mateSentence.init(tokens); // Run POS tagging mateSentence = mateTagger.apply(mateSentence); // Run lemmatization mateSentence = mateLemmatizer.apply(mateSentence); WordAnnotation wordAnnotation; for (int j = 1; j < mateSentence.length(); j++) { wordAnnotation = annotations.get(j - 1); wordAnnotation.setTag(mateSentence.ppos[j]); wordAnnotation.setLemma(mateSentence.plemmas[j]); } UIMAProfiler.getProfiler("AnalysisEngine").stop(this, "process"); }
/** * Create an instance without root of the input instance * * @param instance */ public SentenceData09(SentenceData09 i) { int length = i.length() - 1; forms = new String[length]; gpos = new String[length]; ppos = new String[length]; plemmas = new String[length]; plabels = new String[length]; lemmas = new String[length]; heads = new int[length]; pheads = new int[length]; ofeats = new String[length]; pfeats = new String[length]; labels = new String[length]; fillp = new String[length]; id = new String[length]; for (int j = 0; j < length; j++) { forms[j] = i.forms[j + 1]; ppos[j] = i.ppos[j + 1]; gpos[j] = i.gpos[j + 1]; labels[j] = i.labels[j + 1]; heads[j] = i.heads[j + 1]; if (i.pheads != null) pheads[j] = i.pheads[j + 1]; if (i.plabels != null) plabels[j] = i.plabels[j + 1]; if (i.lemmas != null) lemmas[j] = i.lemmas[j + 1]; plemmas[j] = i.plemmas[j + 1]; if (i.ofeats != null) ofeats[j] = i.ofeats[j + 1]; if (i.pfeats != null) pfeats[j] = i.pfeats[j + 1]; if (i.fillp != null) fillp[j] = i.fillp[j + 1]; if (i.id != null) id[j] = i.id[j + 1]; } }
public static Results evaluate(String act_file, String pred_file) throws Exception { CONLLReader09 goldReader = new CONLLReader09(act_file, -1); CONLLReader09 predictedReader = new CONLLReader09(pred_file, -1); int total = 0, corr = 0, corrL = 0; int numsent = 0, corrsent = 0, corrsentL = 0; SentenceData09 goldInstance = goldReader.getNext(); SentenceData09 predInstance = predictedReader.getNext(); while (goldInstance != null) { int instanceLength = goldInstance.length(); if (instanceLength != predInstance.length()) { Parser.out.println("Lengths do not match on sentence " + numsent); } int[] goldHeads = goldInstance.heads; String[] goldLabels = goldInstance.labels; int[] predHeads = predInstance.pheads; String[] predLabels = predInstance.plabels; boolean whole = true; boolean wholeL = true; // NOTE: the first item is the root info added during nextInstance(), so we skip it. int punc = 0; for (int i = 1; i < instanceLength; i++) { if (predHeads[i] == goldHeads[i]) { corr++; if (goldLabels[i].equals(predLabels[i])) { corrL++; } else { // Parser.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head // "+goldHeads[i]+" child "+i); wholeL = false; } } else { // Parser.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head // "+goldHeads[i]+" child "+i); whole = false; wholeL = false; } } total += ((instanceLength - 1) - punc); // Subtract one to not score fake root token if (whole) { corrsent++; } if (wholeL) { corrsentL++; } numsent++; goldInstance = goldReader.getNext(); predInstance = predictedReader.getNext(); } Results r = new Results(); r.total = total; r.corr = corr; r.las = (float) Math.round(((double) corrL / total) * 100000) / 1000; r.ula = (float) Math.round(((double) corr / total) * 100000) / 1000; Parser.out.print("Total: " + total + " \tCorrect: " + corr + " "); Parser.out.println( "LAS: " + (double) Math.round(((double) corrL / total) * 100000) / 1000 + " \tTotal: " + (double) Math.round(((double) corrsentL / numsent) * 100000) / 1000 + " \tULA: " + (double) Math.round(((double) corr / total) * 100000) / 1000 + " \tTotal: " + (double) Math.round(((double) corrsent / numsent) * 100000) / 1000); return r; }
public void createWithRoot(SentenceData09 i) { int length = i.length(); int offset = 0; if (!i.forms[0].equals(CONLLReader09.ROOT)) { length++; offset = -1; } forms = new String[length]; gpos = new String[length]; ppos = new String[length]; plemmas = new String[length]; plabels = new String[length]; lemmas = new String[length]; heads = new int[length]; pheads = new int[length]; ofeats = new String[length]; pfeats = new String[length]; labels = new String[length]; fillp = new String[length]; id = new String[length]; feats = new String[forms.length][]; for (int j = 1; j < length; j++) { forms[j] = i.forms[j + offset]; ppos[j] = i.ppos[j + offset]; gpos[j] = i.gpos[j + offset]; labels[j] = i.labels[j + offset]; heads[j] = i.heads[j + offset]; if (i.pheads != null) pheads[j] = i.pheads[j + offset]; if (i.plabels != null) plabels[j] = i.plabels[j + offset]; if (i.lemmas != null) lemmas[j] = i.lemmas[j + offset]; plemmas[j] = i.plemmas[j + offset]; // if (i.ofeats!=null) ofeats[j] = i.ofeats[j+offset]; ofeats[j] = i.ofeats[j + offset].equals(CONLLWriter09.DASH) ? "_" : i.ofeats[j + offset]; // if (i.pfeats!=null) pfeats[j] = i.pfeats[j+offset]; if (i.pfeats != null && i.pfeats[j + offset] != null) { if (i.pfeats[j + offset].equals(CONLLWriter09.DASH)) feats[j] = null; else { feats[j] = i.pfeats[j + offset].split(CONLLReader09.PIPE); // if (info[7].equals(CONLLWriter09.DASH)) it.feats[i]=null; // else { // it.feats[i] =info[7].split(PIPE); pfeats[j] = i.pfeats[j + offset]; // } } } if (i.fillp != null) fillp[j] = i.fillp[j + offset]; if (i.id != null) id[j] = i.id[j + offset]; } forms[0] = CONLLReader09.ROOT; plemmas[0] = CONLLReader09.ROOT_LEMMA; fillp[0] = "N"; lemmas[0] = CONLLReader09.ROOT_LEMMA; gpos[0] = CONLLReader09.ROOT_POS; ppos[0] = CONLLReader09.ROOT_POS; labels[0] = CONLLReader09.NO_TYPE; heads[0] = -1; plabels[0] = CONLLReader09.NO_TYPE; pheads[0] = -1; ofeats[0] = CONLLReader09.NO_TYPE; id[0] = "0"; }