@Override
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    UIMAProfiler.getProfiler("AnalysisEngine").start(this, "process");

    Lemmatizer mateLemmatizer = mateLemmatizerModel.getEngine();
    Tagger mateTagger = mateTaggerModel.getEngine();

    /*
     * keeps an array of annotations in memory so as to be able
     * to access them by index.
     */
    List<WordAnnotation> annotations = Lists.newArrayList();
    FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator();
    while (it.hasNext()) {
      WordAnnotation a = (WordAnnotation) it.next();
      annotations.add(a);
    }

    String[] tokens = new String[annotations.size() + 2];

    // preprends to fake words to prevent Mate from bugging on the two first words
    tokens[0] = "<root>";
    tokens[1] = "<root2>";
    for (int i = 0; i < annotations.size(); i++)
      tokens[i + 2] = annotations.get(i).getCoveredText();

    SentenceData09 mateSentence = new SentenceData09();
    mateSentence.init(tokens);

    // Run POS tagging
    mateSentence = mateTagger.apply(mateSentence);

    // Run lemmatization
    mateSentence = mateLemmatizer.apply(mateSentence);

    WordAnnotation wordAnnotation;
    for (int j = 1; j < mateSentence.length(); j++) {
      wordAnnotation = annotations.get(j - 1);
      wordAnnotation.setTag(mateSentence.ppos[j]);
      wordAnnotation.setLemma(mateSentence.plemmas[j]);
    }

    UIMAProfiler.getProfiler("AnalysisEngine").stop(this, "process");
  }
 public synchronized List<StringAnnotation> annotate(IAnnotation<String> sentence)
     throws IncompatibleAnnotationException {
   if (!modelStarted()) {
     startModel();
   }
   SentenceData09 sent = new SentenceData09();
   ArrayList<StringAnnotation> annotations = new ArrayList<StringAnnotation>();
   ArrayList<String> forms = new ArrayList<String>();
   forms.add("<root>");
   List<? extends IAnnotation<String>> tokens =
       StringAnnotatorEnum.TOKEN.getAnnotator().annotate(sentence);
   for (IAnnotation<String> token : tokens) {
     forms.add(token.getAnnotation());
   }
   sent.init(forms.toArray(new String[forms.size()]));
   lemmatizer.apply(sent);
   for (int i = 0; i < tokens.size(); i++) {
     StringAnnotation ann =
         new StringAnnotation(
             sent.plemmas[i + 1], tokens.get(i).getStart(), tokens.get(i).getEnd());
     annotations.add(ann);
   }
   return annotations;
 }
Пример #3
0
  /**
   * Create an instance without root of the input instance
   *
   * @param instance
   */
  public SentenceData09(SentenceData09 i) {

    int length = i.length() - 1;

    forms = new String[length];
    gpos = new String[length];
    ppos = new String[length];
    plemmas = new String[length];
    plabels = new String[length];
    lemmas = new String[length];
    heads = new int[length];
    pheads = new int[length];
    ofeats = new String[length];
    pfeats = new String[length];
    labels = new String[length];
    fillp = new String[length];
    id = new String[length];

    for (int j = 0; j < length; j++) {
      forms[j] = i.forms[j + 1];
      ppos[j] = i.ppos[j + 1];
      gpos[j] = i.gpos[j + 1];

      labels[j] = i.labels[j + 1];
      heads[j] = i.heads[j + 1];

      if (i.pheads != null) pheads[j] = i.pheads[j + 1];
      if (i.plabels != null) plabels[j] = i.plabels[j + 1];

      if (i.lemmas != null) lemmas[j] = i.lemmas[j + 1];

      plemmas[j] = i.plemmas[j + 1];

      if (i.ofeats != null) ofeats[j] = i.ofeats[j + 1];
      if (i.pfeats != null) pfeats[j] = i.pfeats[j + 1];

      if (i.fillp != null) fillp[j] = i.fillp[j + 1];
      if (i.id != null) id[j] = i.id[j + 1];
    }
  }
  public static Results evaluate(String act_file, String pred_file) throws Exception {

    CONLLReader09 goldReader = new CONLLReader09(act_file, -1);
    CONLLReader09 predictedReader = new CONLLReader09(pred_file, -1);

    int total = 0, corr = 0, corrL = 0;
    int numsent = 0, corrsent = 0, corrsentL = 0;
    SentenceData09 goldInstance = goldReader.getNext();
    SentenceData09 predInstance = predictedReader.getNext();

    while (goldInstance != null) {

      int instanceLength = goldInstance.length();

      if (instanceLength != predInstance.length()) {
        Parser.out.println("Lengths do not match on sentence " + numsent);
      }

      int[] goldHeads = goldInstance.heads;
      String[] goldLabels = goldInstance.labels;
      int[] predHeads = predInstance.pheads;
      String[] predLabels = predInstance.plabels;

      boolean whole = true;
      boolean wholeL = true;

      // NOTE: the first item is the root info added during nextInstance(), so we skip it.

      int punc = 0;
      for (int i = 1; i < instanceLength; i++) {
        if (predHeads[i] == goldHeads[i]) {
          corr++;

          if (goldLabels[i].equals(predLabels[i])) {
            corrL++;
          } else {
            //		Parser.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head
            // "+goldHeads[i]+" child "+i);
            wholeL = false;
          }
        } else {
          //		Parser.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head
          // "+goldHeads[i]+" child "+i);
          whole = false;
          wholeL = false;
        }
      }
      total += ((instanceLength - 1) - punc); // Subtract one to not score fake root token

      if (whole) {
        corrsent++;
      }
      if (wholeL) {
        corrsentL++;
      }
      numsent++;

      goldInstance = goldReader.getNext();
      predInstance = predictedReader.getNext();
    }

    Results r = new Results();

    r.total = total;
    r.corr = corr;
    r.las = (float) Math.round(((double) corrL / total) * 100000) / 1000;
    r.ula = (float) Math.round(((double) corr / total) * 100000) / 1000;
    Parser.out.print("Total: " + total + " \tCorrect: " + corr + " ");
    Parser.out.println(
        "LAS: "
            + (double) Math.round(((double) corrL / total) * 100000) / 1000
            + " \tTotal: "
            + (double) Math.round(((double) corrsentL / numsent) * 100000) / 1000
            + " \tULA: "
            + (double) Math.round(((double) corr / total) * 100000) / 1000
            + " \tTotal: "
            + (double) Math.round(((double) corrsent / numsent) * 100000) / 1000);

    return r;
  }
Пример #5
0
  public void createWithRoot(SentenceData09 i) {

    int length = i.length();
    int offset = 0;
    if (!i.forms[0].equals(CONLLReader09.ROOT)) {
      length++;
      offset = -1;
    }

    forms = new String[length];
    gpos = new String[length];
    ppos = new String[length];
    plemmas = new String[length];
    plabels = new String[length];
    lemmas = new String[length];
    heads = new int[length];
    pheads = new int[length];
    ofeats = new String[length];
    pfeats = new String[length];
    labels = new String[length];
    fillp = new String[length];
    id = new String[length];
    feats = new String[forms.length][];

    for (int j = 1; j < length; j++) {
      forms[j] = i.forms[j + offset];
      ppos[j] = i.ppos[j + offset];
      gpos[j] = i.gpos[j + offset];

      labels[j] = i.labels[j + offset];
      heads[j] = i.heads[j + offset];

      if (i.pheads != null) pheads[j] = i.pheads[j + offset];
      if (i.plabels != null) plabels[j] = i.plabels[j + offset];

      if (i.lemmas != null) lemmas[j] = i.lemmas[j + offset];

      plemmas[j] = i.plemmas[j + offset];

      //	if (i.ofeats!=null)  ofeats[j] = i.ofeats[j+offset];

      ofeats[j] = i.ofeats[j + offset].equals(CONLLWriter09.DASH) ? "_" : i.ofeats[j + offset];

      //		if (i.pfeats!=null)	pfeats[j] = i.pfeats[j+offset];

      if (i.pfeats != null && i.pfeats[j + offset] != null) {
        if (i.pfeats[j + offset].equals(CONLLWriter09.DASH)) feats[j] = null;
        else {
          feats[j] = i.pfeats[j + offset].split(CONLLReader09.PIPE);

          //	if (info[7].equals(CONLLWriter09.DASH)) it.feats[i]=null;
          //	else {
          //	it.feats[i] =info[7].split(PIPE);
          pfeats[j] = i.pfeats[j + offset];
          //	}
        }
      }

      if (i.fillp != null) fillp[j] = i.fillp[j + offset];
      if (i.id != null) id[j] = i.id[j + offset];
    }

    forms[0] = CONLLReader09.ROOT;
    plemmas[0] = CONLLReader09.ROOT_LEMMA;
    fillp[0] = "N";
    lemmas[0] = CONLLReader09.ROOT_LEMMA;

    gpos[0] = CONLLReader09.ROOT_POS;
    ppos[0] = CONLLReader09.ROOT_POS;
    labels[0] = CONLLReader09.NO_TYPE;
    heads[0] = -1;
    plabels[0] = CONLLReader09.NO_TYPE;
    pheads[0] = -1;
    ofeats[0] = CONLLReader09.NO_TYPE;
    id[0] = "0";
  }
  //	how to parse a sentences and call the tools
  public static void main(String[] args) throws IOException {

    // Create a data container for a sentence
    SentenceData09 i = new SentenceData09();

    if (args.length == 1) { // input might be a sentence: "This is another test ."
      StringTokenizer st = new StringTokenizer(args[0]);
      ArrayList<String> forms = new ArrayList<>();

      forms.add("<root>");
      while (st.hasMoreTokens()) {
        forms.add(st.nextToken());
      }

      i.init(forms.toArray(new String[0]));

    } else {
      // provide a default sentence
      i.init(new String[] {"<root>", "This", "is", "a", "test", "."});
    }

    // print the forms
    for (String l : i.forms) {
      Parser.out.println("form : " + l);
    }

    // tell the lemmatizer the location of the model
    is2.lemmatizer.Options optsLemmatizer =
        new is2.lemmatizer.Options(new String[] {"-model", "models/lemma-eng.model"});

    // create a lemmatizer
    Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer);

    // lemmatize a sentence; the result is stored in the stenenceData09 i
    i = lemmatizer.apply(i);

    // output the lemmata
    for (String l : i.plemmas) {
      Parser.out.println("lemma : " + l);
    }

    // tell the tagger the location of the model
    is2.tag.Options optsTagger =
        new is2.tag.Options(new String[] {"-model", "models/tag-eng.model"});
    Tagger tagger = new Tagger(optsTagger);

    //		String pos[] =tagger.tag(i.forms, i.lemmas);
    //		i.setPPos(pos);

    SentenceData09 tagged = tagger.tag(i);
    for (String p : tagged.ppos) {
      Parser.out.println("pos " + p);
    }

    // initialize the options
    Options optsParser = new Options(new String[] {"-model", "models/prs-eng-x.model"});

    // create a parser
    Parser parser = new Parser(optsParser);

    // parse the sentence (you get a copy of the input i)
    SentenceData09 parse = parser.apply(tagged);

    Parser.out.println(parse.toString());

    // create some trash on the hard drive :-)
    is2.io.CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt");

    writer.write(i);
    writer.finishWriting();
  }