@Override
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    UIMAProfiler.getProfiler("AnalysisEngine").start(this, "process");

    Lemmatizer mateLemmatizer = mateLemmatizerModel.getEngine();
    Tagger mateTagger = mateTaggerModel.getEngine();

    /*
     * keeps an array of annotations in memory so as to be able
     * to access them by index.
     */
    List<WordAnnotation> annotations = Lists.newArrayList();
    FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator();
    while (it.hasNext()) {
      WordAnnotation a = (WordAnnotation) it.next();
      annotations.add(a);
    }

    String[] tokens = new String[annotations.size() + 2];

    // preprends to fake words to prevent Mate from bugging on the two first words
    tokens[0] = "<root>";
    tokens[1] = "<root2>";
    for (int i = 0; i < annotations.size(); i++)
      tokens[i + 2] = annotations.get(i).getCoveredText();

    SentenceData09 mateSentence = new SentenceData09();
    mateSentence.init(tokens);

    // Run POS tagging
    mateSentence = mateTagger.apply(mateSentence);

    // Run lemmatization
    mateSentence = mateLemmatizer.apply(mateSentence);

    WordAnnotation wordAnnotation;
    for (int j = 1; j < mateSentence.length(); j++) {
      wordAnnotation = annotations.get(j - 1);
      wordAnnotation.setTag(mateSentence.ppos[j]);
      wordAnnotation.setLemma(mateSentence.plemmas[j]);
    }

    UIMAProfiler.getProfiler("AnalysisEngine").stop(this, "process");
  }
Example #2
0
  /**
   * Create an instance without root of the input instance
   *
   * @param instance
   */
  public SentenceData09(SentenceData09 i) {

    int length = i.length() - 1;

    forms = new String[length];
    gpos = new String[length];
    ppos = new String[length];
    plemmas = new String[length];
    plabels = new String[length];
    lemmas = new String[length];
    heads = new int[length];
    pheads = new int[length];
    ofeats = new String[length];
    pfeats = new String[length];
    labels = new String[length];
    fillp = new String[length];
    id = new String[length];

    for (int j = 0; j < length; j++) {
      forms[j] = i.forms[j + 1];
      ppos[j] = i.ppos[j + 1];
      gpos[j] = i.gpos[j + 1];

      labels[j] = i.labels[j + 1];
      heads[j] = i.heads[j + 1];

      if (i.pheads != null) pheads[j] = i.pheads[j + 1];
      if (i.plabels != null) plabels[j] = i.plabels[j + 1];

      if (i.lemmas != null) lemmas[j] = i.lemmas[j + 1];

      plemmas[j] = i.plemmas[j + 1];

      if (i.ofeats != null) ofeats[j] = i.ofeats[j + 1];
      if (i.pfeats != null) pfeats[j] = i.pfeats[j + 1];

      if (i.fillp != null) fillp[j] = i.fillp[j + 1];
      if (i.id != null) id[j] = i.id[j + 1];
    }
  }
  public static Results evaluate(String act_file, String pred_file) throws Exception {

    CONLLReader09 goldReader = new CONLLReader09(act_file, -1);
    CONLLReader09 predictedReader = new CONLLReader09(pred_file, -1);

    int total = 0, corr = 0, corrL = 0;
    int numsent = 0, corrsent = 0, corrsentL = 0;
    SentenceData09 goldInstance = goldReader.getNext();
    SentenceData09 predInstance = predictedReader.getNext();

    while (goldInstance != null) {

      int instanceLength = goldInstance.length();

      if (instanceLength != predInstance.length()) {
        Parser.out.println("Lengths do not match on sentence " + numsent);
      }

      int[] goldHeads = goldInstance.heads;
      String[] goldLabels = goldInstance.labels;
      int[] predHeads = predInstance.pheads;
      String[] predLabels = predInstance.plabels;

      boolean whole = true;
      boolean wholeL = true;

      // NOTE: the first item is the root info added during nextInstance(), so we skip it.

      int punc = 0;
      for (int i = 1; i < instanceLength; i++) {
        if (predHeads[i] == goldHeads[i]) {
          corr++;

          if (goldLabels[i].equals(predLabels[i])) {
            corrL++;
          } else {
            //		Parser.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head
            // "+goldHeads[i]+" child "+i);
            wholeL = false;
          }
        } else {
          //		Parser.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head
          // "+goldHeads[i]+" child "+i);
          whole = false;
          wholeL = false;
        }
      }
      total += ((instanceLength - 1) - punc); // Subtract one to not score fake root token

      if (whole) {
        corrsent++;
      }
      if (wholeL) {
        corrsentL++;
      }
      numsent++;

      goldInstance = goldReader.getNext();
      predInstance = predictedReader.getNext();
    }

    Results r = new Results();

    r.total = total;
    r.corr = corr;
    r.las = (float) Math.round(((double) corrL / total) * 100000) / 1000;
    r.ula = (float) Math.round(((double) corr / total) * 100000) / 1000;
    Parser.out.print("Total: " + total + " \tCorrect: " + corr + " ");
    Parser.out.println(
        "LAS: "
            + (double) Math.round(((double) corrL / total) * 100000) / 1000
            + " \tTotal: "
            + (double) Math.round(((double) corrsentL / numsent) * 100000) / 1000
            + " \tULA: "
            + (double) Math.round(((double) corr / total) * 100000) / 1000
            + " \tTotal: "
            + (double) Math.round(((double) corrsent / numsent) * 100000) / 1000);

    return r;
  }
Example #4
0
  public void createWithRoot(SentenceData09 i) {

    int length = i.length();
    int offset = 0;
    if (!i.forms[0].equals(CONLLReader09.ROOT)) {
      length++;
      offset = -1;
    }

    forms = new String[length];
    gpos = new String[length];
    ppos = new String[length];
    plemmas = new String[length];
    plabels = new String[length];
    lemmas = new String[length];
    heads = new int[length];
    pheads = new int[length];
    ofeats = new String[length];
    pfeats = new String[length];
    labels = new String[length];
    fillp = new String[length];
    id = new String[length];
    feats = new String[forms.length][];

    for (int j = 1; j < length; j++) {
      forms[j] = i.forms[j + offset];
      ppos[j] = i.ppos[j + offset];
      gpos[j] = i.gpos[j + offset];

      labels[j] = i.labels[j + offset];
      heads[j] = i.heads[j + offset];

      if (i.pheads != null) pheads[j] = i.pheads[j + offset];
      if (i.plabels != null) plabels[j] = i.plabels[j + offset];

      if (i.lemmas != null) lemmas[j] = i.lemmas[j + offset];

      plemmas[j] = i.plemmas[j + offset];

      //	if (i.ofeats!=null)  ofeats[j] = i.ofeats[j+offset];

      ofeats[j] = i.ofeats[j + offset].equals(CONLLWriter09.DASH) ? "_" : i.ofeats[j + offset];

      //		if (i.pfeats!=null)	pfeats[j] = i.pfeats[j+offset];

      if (i.pfeats != null && i.pfeats[j + offset] != null) {
        if (i.pfeats[j + offset].equals(CONLLWriter09.DASH)) feats[j] = null;
        else {
          feats[j] = i.pfeats[j + offset].split(CONLLReader09.PIPE);

          //	if (info[7].equals(CONLLWriter09.DASH)) it.feats[i]=null;
          //	else {
          //	it.feats[i] =info[7].split(PIPE);
          pfeats[j] = i.pfeats[j + offset];
          //	}
        }
      }

      if (i.fillp != null) fillp[j] = i.fillp[j + offset];
      if (i.id != null) id[j] = i.id[j + offset];
    }

    forms[0] = CONLLReader09.ROOT;
    plemmas[0] = CONLLReader09.ROOT_LEMMA;
    fillp[0] = "N";
    lemmas[0] = CONLLReader09.ROOT_LEMMA;

    gpos[0] = CONLLReader09.ROOT_POS;
    ppos[0] = CONLLReader09.ROOT_POS;
    labels[0] = CONLLReader09.NO_TYPE;
    heads[0] = -1;
    plabels[0] = CONLLReader09.NO_TYPE;
    pheads[0] = -1;
    ofeats[0] = CONLLReader09.NO_TYPE;
    id[0] = "0";
  }