/** * Uses the annotations in the CAS and extracts the tokens and their lemmas from the text and * hypothesis views * * @param aJCas The JCas object of the text and hypothesis, after tokenization and lemmatization. * @throws CASException */ private void getTokenAnnotations(JCas aJCas) throws CASException { // Get the text and hypothesis views JCas textView = aJCas.getView(LAP_ImplBase.TEXTVIEW); JCas hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); // Get the tokens textTokens = new ArrayList<Token>(JCasUtil.select(textView, Token.class)); hypoTokens = new ArrayList<Token>(JCasUtil.select(hypoView, Token.class)); }
public void process(JCas jCas) throws AnalysisEngineProcessException { TriageScore doc = JCasUtil.selectSingle(jCas, TriageScore.class); String code = doc.getInOutCode(); File outFile = new File(baseData.getPath() + "/" + code + ".txt"); try { PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(outFile, true))); out.print(doc.getVpdmfId() + " "); for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence); if (tokens.size() <= 0) { continue; } List<String> tokenStrings = JCasUtil.toText(tokens); for (int i = 0; i < tokens.size(); i++) { out.print(tokenStrings.get(i) + " "); } } out.print("\n"); out.close(); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } }
public static <T extends TOP> ArrayList<T> fromFSListToCollection( FSList list, Class<T> classType) { Collection<T> myCollection = JCasUtil.select(list, classType); /* * for(T element:myCollection){ System.out.println(.getText()); } */ return new ArrayList<T>(myCollection); }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { // String documentText = aJCas.getDocumentText(); for (TestDocument doc : JCasUtil.select(aJCas, TestDocument.class)) { annotateText(doc.getCoveredText(), aJCas, doc.getBegin()); } for (Question question : JCasUtil.select(aJCas, Question.class)) { annotateText(question.getCoveredText(), aJCas, question.getBegin()); } for (Answer answer : JCasUtil.select(aJCas, Answer.class)) { annotateText(answer.getCoveredText(), aJCas, answer.getBegin()); } try { // this calls Dbpedia remotely so I let it sleep for every document Thread.sleep(1000); } catch (InterruptedException e) { e.printStackTrace(); } }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { beforeProcess(jCas); try { for (AnnotationFS anno : JCasUtil.select(jCas, annotationType)) { List<String> lemmas = Lists.newLinkedList(); for (Token token : JCasUtil.selectCovered(jCas, Token.class, anno)) { Word w = token2WordIdx.get(token); if (w == null) { lemmas.add(token.getCoveredText()); } else { lemmas.add(MorphCasUtils.getOnlyWordform(w).getLemma()); } } outEntries.add(lemmaJoiner.join(lemmas)); } } finally { afterProcess(jCas); } }
public void entityProcessComplete(CAS aCas, EntityProcessStatus aStatus) { if (aStatus != null) { if (aStatus.isException()) { System.err.println("Error on process CAS call to remote service:"); List<Exception> exceptions = aStatus.getExceptions(); for (int i = 0; i < exceptions.size(); i++) { ((Throwable) exceptions.get(i)).printStackTrace(); } } try { JCas cas = aCas.getJCas(); for(Token token : JCasUtil.select(cas, Token.class)) { System.out.println(token.getCoveredText() + " " + token.getPos().getPosValue()); } } catch (CASException e) { e.printStackTrace(); } } }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { Annotation document = this.processor.process(jCas.getDocumentText()); String lastNETag = "O"; int lastNEBegin = -1; int lastNEEnd = -1; for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) { // create the token annotation int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class); int end = tokenAnn.get(CharacterOffsetEndAnnotation.class); String pos = tokenAnn.get(PartOfSpeechAnnotation.class); String lemma = tokenAnn.get(LemmaAnnotation.class); Token token = new Token(jCas, begin, end); token.setPos(pos); token.setLemma(lemma); token.addToIndexes(); // hackery to convert token-level named entity tag into phrase-level tag String neTag = tokenAnn.get(NamedEntityTagAnnotation.class); if (neTag.equals("O") && !lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } else { if (lastNETag.equals("O")) { lastNEBegin = begin; } else if (lastNETag.equals(neTag)) { // do nothing - begin was already set } else { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); lastNEBegin = begin; } lastNEEnd = end; } lastNETag = neTag; } if (!lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } // add sentences and trees for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) { // add the sentence annotation int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class); int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class); Sentence sentence = new Sentence(jCas, sentBegin, sentEnd); sentence.addToIndexes(); // add the syntactic tree annotation List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class); Tree tree = sentenceAnn.get(TreeAnnotation.class); if (tree.children().length != 1) { throw new RuntimeException("Expected single root node, found " + tree); } tree = tree.firstChild(); tree.indexSpans(0); TopTreebankNode root = new TopTreebankNode(jCas); root.setTreebankParse(tree.toString()); // TODO: root.setTerminals(v) this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns); // get the dependencies SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class); // convert Stanford nodes to UIMA annotations List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence); Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>(); for (IndexedWord stanfordNode : dependencies.vertexSet()) { int indexBegin = stanfordNode.get(BeginIndexAnnotation.class); int indexEnd = stanfordNode.get(EndIndexAnnotation.class); int tokenBegin = tokens.get(indexBegin).getBegin(); int tokenEnd = tokens.get(indexEnd - 1).getEnd(); DependencyNode node; if (dependencies.getRoots().contains(stanfordNode)) { node = new TopDependencyNode(jCas, tokenBegin, tokenEnd); } else { node = new DependencyNode(jCas, tokenBegin, tokenEnd); } stanfordToUima.put(stanfordNode, node); } // create relation annotations for each Stanford dependency ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create(); ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create(); for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) { DependencyRelation relation = new DependencyRelation(jCas); DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor()); DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent()); String relationType = stanfordEdge.getRelation().toString(); if (head == null || child == null || relationType == null) { throw new RuntimeException( String.format( "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation, child, head)); } relation.setHead(head); relation.setChild(child); relation.setRelation(relationType); relation.addToIndexes(); headRelations.put(child, relation); childRelations.put(head, relation); } // set the relations for each node annotation for (DependencyNode node : stanfordToUima.values()) { List<DependencyRelation> heads = headRelations.get(node); node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size())); if (heads != null) { FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads); } List<DependencyRelation> children = childRelations.get(node); node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size())); if (children != null) { FSCollectionFactory.fillArrayFS(node.getChildRelations(), children); } node.addToIndexes(); } } // map from spans to named entity mentions Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>(); for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention); } // add mentions for all entities identified by the coreference system List<NamedEntity> entities = new ArrayList<NamedEntity>(); List<List<Token>> sentenceTokens = new ArrayList<List<Token>>(); for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence)); } Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class); for (CorefChain chain : corefChains.values()) { List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>(); for (CorefMention corefMention : chain.getMentionsInTextualOrder()) { // figure out the character span of the token List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1); int begin = tokens.get(corefMention.startIndex - 1).getBegin(); int end = tokens.get(corefMention.endIndex - 2).getEnd(); // use an existing named entity mention when possible; otherwise create a new one NamedEntityMention mention = spanMentionMap.get(new Span(begin, end)); if (mention == null) { mention = new NamedEntityMention(jCas, begin, end); mention.addToIndexes(); } mentions.add(mention); } // create an entity for the mentions Collections.sort( mentions, new Comparator<NamedEntityMention>() { @Override public int compare(NamedEntityMention m1, NamedEntityMention m2) { return m1.getBegin() - m2.getBegin(); } }); // create mentions and add them to entity NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, mentions.size())); int index = 0; for (NamedEntityMention mention : mentions) { mention.setMentionedEntity(entity); entity.setMentions(index, mention); index += 1; } entities.add(entity); } // add singleton entities for any named entities not picked up by coreference system for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { if (mention.getMentionedEntity() == null) { NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, 1)); entity.setMentions(0, mention); mention.setMentionedEntity(entity); entity.getMentions(); entities.add(entity); } } // sort entities by document order Collections.sort( entities, new Comparator<NamedEntity>() { @Override public int compare(NamedEntity o1, NamedEntity o2) { return getFirstBegin(o1) - getFirstBegin(o2); } private int getFirstBegin(NamedEntity entity) { int min = Integer.MAX_VALUE; for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) { if (mention.getBegin() < min) { min = mention.getBegin(); } } return min; } }); // add entities to document for (NamedEntity entity : entities) { entity.addToIndexes(); } }
@Override public void annotate(JCas aJCas) throws PairAnnotatorComponentException { try { // create possible group labels instances for this jcas StringList localEntailment = createStringList( aJCas, new ArrayList<String>() { private static final long serialVersionUID = 1L; { add(GROUP_LABEL_SAME_PREDICATE_TRUTH); add(GROUP_LABEL_LOCAL_ENTAILMENT); } }); StringList localContradiction = createStringList( aJCas, new ArrayList<String>() { private static final long serialVersionUID = 1L; { add(GROUP_LABEL_OPPOSITE_PREDICATE_TRUTH); add(GROUP_LABEL_LOCAL_CONTRADICTION); } }); StringList emptyGroupLabel = new EmptyStringList(aJCas); // Get the text and hypothesis views textView = aJCas.getView(LAP_ImplBase.TEXTVIEW); hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); // Record annotations memoTextAnnots = new HashMap<Class<? extends PredicateTruth>, Collection<? extends Annotation>>(); memoHypoAnnots = new HashMap<Class<? extends PredicateTruth>, Collection<? extends Annotation>>(); for (Class<? extends PredicateTruth> ptType : ptTypes) { memoTextAnnots.put(ptType, JCasUtil.select(textView, ptType)); memoHypoAnnots.put(ptType, JCasUtil.select(hypoView, ptType)); } // add alignment links // Agreeing Positive Predicate Truth // PT+ <-> PT+ createPredicateTruthLinks( PredicateTruthPositive.class, PredicateTruthPositive.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION, ALIGNEMNT_TYPE_AGREEING_POSITIVE, localEntailment); // Agreeing Negative Predicate Truth // PT- <-> PT- createPredicateTruthLinks( PredicateTruthNegative.class, PredicateTruthNegative.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION, ALIGNEMNT_TYPE_AGREEING_NEGATIVE, localEntailment); // Disagreeing Predicate Truth // PT+ <-> PT- createPredicateTruthLinks( PredicateTruthPositive.class, PredicateTruthNegative.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION, ALIGNEMNT_TYPE_DISAGREEING, localContradiction); // PT- <-> PT+ createPredicateTruthLinks( PredicateTruthNegative.class, PredicateTruthPositive.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION, ALIGNEMNT_TYPE_DISAGREEING, localContradiction); // Non Matching Predicate Truth // PT+ <-> PT? createPredicateTruthLinks( PredicateTruthPositive.class, PredicateTruthUncertain.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION, ALIGNEMNT_TYPE_NON_MATCHING, emptyGroupLabel); // PT- <-> PT? createPredicateTruthLinks( PredicateTruthNegative.class, PredicateTruthUncertain.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION, ALIGNEMNT_TYPE_NON_MATCHING, emptyGroupLabel); // PT? <-> PT+ createPredicateTruthLinks( PredicateTruthUncertain.class, PredicateTruthPositive.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION, ALIGNEMNT_TYPE_NON_MATCHING, emptyGroupLabel); // PT? <-> PT- createPredicateTruthLinks( PredicateTruthUncertain.class, PredicateTruthNegative.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION, ALIGNEMNT_TYPE_NON_MATCHING, emptyGroupLabel); } catch (CASException e) { throw new PairAnnotatorComponentException(e); } }
@Test public void test() throws Exception { try { // Map token infos by ID tokensById = new LinkedHashMap<Integer, TestTokenInfo>(EXPECTED_TOKENS.length); for (TestTokenInfo info : EXPECTED_TOKENS) { tokensById.put(info.id, info); } // Run LAP LAPAccess lap = getLAP(); JCas mainJcas = lap.generateSingleTHPairCAS(TEXT, HYPOTHESIS); JCas jcas = mainJcas.getView(LAP_ImplBase.TEXTVIEW); // Verify sentences Iterator<TestSentenceInfo> iterSentence = Arrays.asList(EXPECTED_SENTENCES).iterator(); for (Sentence sentence : JCasUtil.select(jcas, Sentence.class)) { verifySentence(sentence, iterSentence.next()); } // Map generated Tokens to infos - by their order! expectedByGeneratedToken = new LinkedHashMap<Token, TestTokenInfo>(EXPECTED_TOKENS.length); Iterator<TestTokenInfo> iterToken = tokensById.values().iterator(); for (Token token : JCasUtil.select(jcas, Token.class)) { expectedByGeneratedToken.put(token, iterToken.next()); } // For each Token, find all its dependencies governors = new LinkedHashMap<Token, Set<TestDependencyInfo>>(expectedByGeneratedToken.size()); for (Token token : expectedByGeneratedToken.keySet()) { governors.put(token, new HashSet<TestDependencyInfo>()); } for (Dependency dep : JCasUtil.select(jcas, Dependency.class)) { int governorId = expectedByGeneratedToken.get(dep.getGovernor()).id; TestDependencyInfo depInfo = new TestDependencyInfo(dep.getDependencyType(), governorId); governors.get(dep.getDependent()).add(depInfo); } // Verify tokens for (Entry<Token, TestTokenInfo> entry : expectedByGeneratedToken.entrySet()) { verifyToken(entry.getKey(), entry.getValue()); } // Verify coref groups corefChainsByFirstMentionStart = new TreeMap<Integer, CoreferenceChain>(); for (CoreferenceChain chain : JCasUtil.select(jcas, CoreferenceChain.class)) { // use this map in order to order chain by a predefined order - the start offset of its // first CoreferenceLink corefChainsByFirstMentionStart.put(chain.getFirst().getBegin(), chain); } if (corefChainsByFirstMentionStart.size() != EXPECTED_COREF.length) { throw new LAPVerificationException( "Bad amount of coreference chains, expected " + EXPECTED_COREF.length + ", got " + corefChainsByFirstMentionStart.size()); } Iterator<TestCorefMentionInfo[]> iterCorefGroups = Arrays.asList(EXPECTED_COREF).iterator(); for (CoreferenceChain chain : corefChainsByFirstMentionStart.values()) { Iterator<TestCorefMentionInfo> iterCoref = Arrays.asList(iterCorefGroups.next()).iterator(); for (CoreferenceLink link = chain.getFirst(); link != null; link = link.getNext()) { verifyCorefLink(link, iterCoref.next()); } } } catch (Exception e) { ExceptionUtil.outputException(e, System.out); throw e; } }