コード例 #1
0
  @Override
  public void process(JCas jcas) throws AnalysisEngineProcessException {
    UIMAProfiler.getProfiler("AnalysisEngine").start(this, "process");

    Lemmatizer mateLemmatizer = mateLemmatizerModel.getEngine();
    Tagger mateTagger = mateTaggerModel.getEngine();

    /*
     * keeps an array of annotations in memory so as to be able
     * to access them by index.
     */
    List<WordAnnotation> annotations = Lists.newArrayList();
    FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator();
    while (it.hasNext()) {
      WordAnnotation a = (WordAnnotation) it.next();
      annotations.add(a);
    }

    String[] tokens = new String[annotations.size() + 2];

    // preprends to fake words to prevent Mate from bugging on the two first words
    tokens[0] = "<root>";
    tokens[1] = "<root2>";
    for (int i = 0; i < annotations.size(); i++)
      tokens[i + 2] = annotations.get(i).getCoveredText();

    SentenceData09 mateSentence = new SentenceData09();
    mateSentence.init(tokens);

    // Run POS tagging
    mateSentence = mateTagger.apply(mateSentence);

    // Run lemmatization
    mateSentence = mateLemmatizer.apply(mateSentence);

    WordAnnotation wordAnnotation;
    for (int j = 1; j < mateSentence.length(); j++) {
      wordAnnotation = annotations.get(j - 1);
      wordAnnotation.setTag(mateSentence.ppos[j]);
      wordAnnotation.setLemma(mateSentence.plemmas[j]);
    }

    UIMAProfiler.getProfiler("AnalysisEngine").stop(this, "process");
  }
コード例 #2
0
 public synchronized List<StringAnnotation> annotate(IAnnotation<String> sentence)
     throws IncompatibleAnnotationException {
   if (!modelStarted()) {
     startModel();
   }
   SentenceData09 sent = new SentenceData09();
   ArrayList<StringAnnotation> annotations = new ArrayList<StringAnnotation>();
   ArrayList<String> forms = new ArrayList<String>();
   forms.add("<root>");
   List<? extends IAnnotation<String>> tokens =
       StringAnnotatorEnum.TOKEN.getAnnotator().annotate(sentence);
   for (IAnnotation<String> token : tokens) {
     forms.add(token.getAnnotation());
   }
   sent.init(forms.toArray(new String[forms.size()]));
   lemmatizer.apply(sent);
   for (int i = 0; i < tokens.size(); i++) {
     StringAnnotation ann =
         new StringAnnotation(
             sent.plemmas[i + 1], tokens.get(i).getStart(), tokens.get(i).getEnd());
     annotations.add(ann);
   }
   return annotations;
 }
コード例 #3
0
  //	how to parse a sentences and call the tools
  public static void main(String[] args) throws IOException {

    // Create a data container for a sentence
    SentenceData09 i = new SentenceData09();

    if (args.length == 1) { // input might be a sentence: "This is another test ."
      StringTokenizer st = new StringTokenizer(args[0]);
      ArrayList<String> forms = new ArrayList<>();

      forms.add("<root>");
      while (st.hasMoreTokens()) {
        forms.add(st.nextToken());
      }

      i.init(forms.toArray(new String[0]));

    } else {
      // provide a default sentence
      i.init(new String[] {"<root>", "This", "is", "a", "test", "."});
    }

    // print the forms
    for (String l : i.forms) {
      Parser.out.println("form : " + l);
    }

    // tell the lemmatizer the location of the model
    is2.lemmatizer.Options optsLemmatizer =
        new is2.lemmatizer.Options(new String[] {"-model", "models/lemma-eng.model"});

    // create a lemmatizer
    Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer);

    // lemmatize a sentence; the result is stored in the stenenceData09 i
    i = lemmatizer.apply(i);

    // output the lemmata
    for (String l : i.plemmas) {
      Parser.out.println("lemma : " + l);
    }

    // tell the tagger the location of the model
    is2.tag.Options optsTagger =
        new is2.tag.Options(new String[] {"-model", "models/tag-eng.model"});
    Tagger tagger = new Tagger(optsTagger);

    //		String pos[] =tagger.tag(i.forms, i.lemmas);
    //		i.setPPos(pos);

    SentenceData09 tagged = tagger.tag(i);
    for (String p : tagged.ppos) {
      Parser.out.println("pos " + p);
    }

    // initialize the options
    Options optsParser = new Options(new String[] {"-model", "models/prs-eng-x.model"});

    // create a parser
    Parser parser = new Parser(optsParser);

    // parse the sentence (you get a copy of the input i)
    SentenceData09 parse = parser.apply(tagged);

    Parser.out.println(parse.toString());

    // create some trash on the hard drive :-)
    is2.io.CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt");

    writer.write(i);
    writer.finishWriting();
  }