/** * POS tags the parsed tokens by using the pos tagger. Annotations are added based on the posModel * and already created adhoc tags. * * @param tokenList * @param posTagger * @param posModel * @param adhocTags * @param language */ private void posTag( List<Token> tokenList, POSTagger posTagger, TagSet<PosTag> posModel, Map<String, PosTag> adhocTags, String language) { String[] tokenTexts = new String[tokenList.size()]; for (int i = 0; i < tokenList.size(); i++) { tokenTexts[i] = tokenList.get(i).getSpan(); } // get the topK POS tags and props and copy it over to the 2dim Arrays Sequence[] posSequences = posTagger.topKSequences(tokenTexts); // extract the POS tags and props for the current token from the // posSequences. // NOTE: Sequence includes always POS tags for all Tokens. If // less then posSequences.length are available it adds the // best match for all followings. // We do not want such copies. PosTag[] actPos = new PosTag[posSequences.length]; double[] actProp = new double[posSequences.length]; for (int i = 0; i < tokenTexts.length; i++) { Token token = tokenList.get(i); boolean done = false; int j = 0; while (j < posSequences.length && !done) { String p = posSequences[j].getOutcomes().get(i); done = j > 0 && p.equals(actPos[0].getTag()); if (!done) { actPos[j] = getPosTag(posModel, adhocTags, p, language); actProp[j] = posSequences[j].getProbs()[i]; j++; } } // create the POS values token.addAnnotations(POS_ANNOTATION, Value.values(actPos, actProp, j)); } }