/** * Returns average token length of chunks in a view * * @param view the view of the JCas * @return average token length of all chunks */ private double getAverageNounPhraseTokenLength(JCas view) { int totalNumber = 0; for (Chunk chunk : JCasUtil.select(view, Chunk.class)) { totalNumber += JCasUtil.selectCovered(view, Token.class, chunk).size(); } return totalNumber / (double) JCasUtil.select(view, Chunk.class).size(); }
@Override public Set<Feature> extract(JCas view) throws TextClassificationException { int nrOfSpellingErrors = JCasUtil.select(view, SpellingAnomaly.class).size(); int nrOfTokens = JCasUtil.select(view, Token.class).size(); double ratio = 0.0; if (nrOfTokens > 0) { ratio = (double) nrOfSpellingErrors / nrOfTokens; } return new Feature("SpellingErrorRatio", ratio).asSet(); }
@Override public synchronized String nextSentence() { if (sentences == null || !sentences.hasNext()) { try { if (getReader().hasNext()) { CAS cas = resource.retrieve(); try { getReader().getNext(cas); } catch (Exception e) { log.warn("Done iterating returning an empty string"); return ""; } resource.getAnalysisEngine().process(cas); List<String> list = new ArrayList<>(); for (Sentence sentence : JCasUtil.select(cas.getJCas(), Sentence.class)) { list.add(sentence.getCoveredText()); } sentences = list.iterator(); // needs to be next cas while (!sentences.hasNext()) { // sentence is empty; go to another cas if (reader.hasNext()) { cas.reset(); getReader().getNext(cas); resource.getAnalysisEngine().process(cas); for (Sentence sentence : JCasUtil.select(cas.getJCas(), Sentence.class)) { list.add(sentence.getCoveredText()); } sentences = list.iterator(); } else return null; } String ret = sentences.next(); if (this.getPreProcessor() != null) ret = this.getPreProcessor().preProcess(ret); return ret; } return null; } catch (Exception e) { throw new RuntimeException(e); } } else { String ret = sentences.next(); if (this.getPreProcessor() != null) ret = this.getPreProcessor().preProcess(ret); return ret; } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { // Keeping track of the ranges of different relation candidates so we wont have duplicate // text snippets for different candidates on the same text Map<IndexRange, IndexRange> rangeMappings = new HashMap<IndexRange, IndexRange>(); if (aggregateJCas == null) aggregateJCas = getEmptyJCas(); CasCopier copier = new CasCopier(aJCas.getCas(), aggregateJCas.getCas()); Iterator<RelationCandidate> iter = JCasUtil.iterator(aJCas, RelationCandidate.class); while (iter.hasNext()) { RelationCandidate candidate = iter.next(); RelationCandidate candidateCopy = (RelationCandidate) copier.copyFs(candidate); // See if we already have this candidate in the aggregate jcas IndexRange candidateRange = new IndexRange(candidate); // The offset between the old jcas and the new of this relation candidate int offset = 0; if (rangeMappings.containsKey(candidateRange)) { offset = rangeMappings.get(candidateRange).getStart() - candidateRange.getStart(); updateAnnotation(candidateCopy, offset); // No need to copy features, has already been done } else { offset = content.length() - candidateRange.getStart(); updateAnnotation(candidateCopy, offset); rangeMappings.put(candidateRange, new IndexRange(candidateCopy)); // For every feature we want to copy for (Class<? extends Annotation> feature : features) { // Iterating over the annotations of this feature type covered by this relation candidate for (Annotation annotation : JCasUtil.selectCovered(aJCas, feature, candidate)) { Annotation cAnnotation = (Annotation) copier.copyFs(annotation); // Updating the indices of the annotation updateAnnotation(cAnnotation, offset); aggregateJCas.addFsToIndexes(cAnnotation); } } // Adding the text content of the relation candidate to the new cas content.append(candidate.getCoveredText()); } aggregateJCas.addFsToIndexes(candidateCopy); } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { String documentId = DocumentMetaData.get(aJCas).getDocumentId(); Class[] types = {Claim.class, Premise.class, Backing.class, Rebuttal.class, Refutation.class}; for (Class type : types) { for (Object o : JCasUtil.select(aJCas, type)) { ArgumentComponent argumentComponent = (ArgumentComponent) o; // non-implicit components int end = argumentComponent.getEnd(); int begin = argumentComponent.getBegin(); if (end > begin) { List<Sentence> sentences = JCasUtil2.selectOverlapping(Sentence.class, argumentComponent, aJCas); String filename = documentId + "_s" + sentences.size() + "_" + argumentComponent.getClass().getSimpleName() + "_" + begin + "_" + end + ".txt"; StringBuilder sb = new StringBuilder(); for (Sentence sentence : sentences) { List<String> tokens = new ArrayList<>(); for (Token token : JCasUtil.selectCovered(Token.class, sentence)) { tokens.add(token.getCoveredText()); } sb.append(StringUtils.join(tokens, " ")); sb.append("\n"); } try { FileUtils.write(new File(outputFolder, filename), sb.toString().trim()); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } } } }
public void process(JCas jcas) throws AnalysisEngineProcessException { logger.debug("scoring with model {}", modelName); AnswerStats astats = new AnswerStats(jcas); List<AnswerScore> answers = new LinkedList<AnswerScore>(); for (Answer a : JCasUtil.select(jcas, Answer.class)) { AnswerFV fv = new AnswerFV(a, astats); double fvec[] = reorderByLabels(fv.getFV()); double res = model.prior; for (Tree t : model.forest) { res += model.learning_rate * classifyWithOneTree(fvec, t, 0); } res = (1.0 / (1.0 + Math.exp(-res))); answers.add(new AnswerScore(a, res)); } /* Reindex the touched answer info(s). */ for (AnswerScore as : answers) { as.a.removeFromIndexes(); as.a.setConfidence(as.score); as.a.addToIndexes(); } }
// @Test public void allAggregationStrategies_1segment_expectCorrectRanking() throws Exception { String testDocument = "foo bar baz"; List<Class<? extends AggregationStrategy>> aggregationStrategies = new ArrayList<Class<? extends AggregationStrategy>>(); aggregationStrategies.add(MaximumAggregation.class); for (Class<? extends AggregationStrategy> aggregationStrategy : aggregationStrategies) { AnalysisEngineDescription aed = AnalysisEngineFactory.createPrimitiveDescription( BookIndexPhraseAggregationAnnotator.class); bindResource(aed, RankedPhraseAggregationAnnotator.AGGREGATION_STRATEGY, aggregationStrategy); AnalysisEngine ae = createPrimitive(aed); JCas jcas = setup_1segment(testDocument, ae); ae.process(jcas); List<String> expectedBookIndexPhrases = new ArrayList<String>(); expectedBookIndexPhrases.add("bar"); expectedBookIndexPhrases.add("foo"); expectedBookIndexPhrases.add("baz"); List<String> resultBookIndexPhrases = new ArrayList<String>(); for (BookIndexPhrase b : JCasUtil.select(jcas, BookIndexPhrase.class)) { resultBookIndexPhrases.add(b.getPhrase()); } assertEquals(expectedBookIndexPhrases, resultBookIndexPhrases); } }
@Override public List<Feature> extract(JCas jcas) throws TextClassificationException { double nbToken = 0; double minToken = -1; // Sizes in letter double maxToken = 0; double meanToken = 0; for (Token token : JCasUtil.select(jcas, Token.class)) { nbToken++; if (minToken < 0) { minToken = token.getCoveredText().length(); // gets the size value of the first // token } if (minToken > token.getCoveredText().length()) { minToken = token.getCoveredText().length(); } if (maxToken < token.getCoveredText().length()) { maxToken = token.getCoveredText().length(); } meanToken += token.getCoveredText().length(); } try { meanToken /= nbToken; } catch (Exception e) { meanToken = 0; } List<Feature> featList = new ArrayList<Feature>(); featList.addAll(Arrays.asList(new Feature("nb_" + TOKEN, nbToken))); featList.addAll(Arrays.asList(new Feature("max_" + TOKEN + "_size", maxToken))); featList.addAll(Arrays.asList(new Feature("min_" + TOKEN + "_size", minToken))); featList.addAll(Arrays.asList(new Feature("mean_" + TOKEN + "_size", meanToken))); return featList; }
@Override protected void doProcess(JCas jCas) throws AnalysisEngineProcessException { Connection conn = postgresResource.getConnection(); try { // Insert document and metadata into database Integer docKey = executeDocInsert(jCas); for (Metadata md : JCasUtil.select(jCas, Metadata.class)) { executeDocMetadataInsert(docKey, md); } processEntities(jCas, docKey); conn.commit(); } catch (SQLException | BaleenException e) { getMonitor().error("Unable to insert document into Postgres database", e); if (conn != null) { try { conn.rollback(); } catch (SQLException e2) { getMonitor() .error( "Unable to rollback insertion - state of the database may have been left inconsistent", e2); } } } }
@Override protected void tableCellSenseArray( FSArray senses, SenseType senseIdClass, Collection<Sense> highlightSense) throws IOException { output.write("\t\t<td class='" + senseIdClass + "'>"); if (senses == null || senses.size() == 0) { output.write(NA); } else { boolean firstSense = true; for (Sense s : JCasUtil.select(senses, Sense.class)) { if (firstSense == false) { output.write("<br />"); } if (highlightSense != null && !highlightSense.contains(s)) { output.write("<span style='color: #888'>"); } output.write(String.format("%.2f", s.getConfidence()) + "\t" + s.getId()); if (highlightSense != null && !highlightSense.contains(s)) { output.write("</span>"); } firstSense = false; } } output.write("</td>"); output.newLine(); }
public JCas getPreliminarCas( Analyzer analyzer, JCas emptyCas, String sentenceId, String sentence) { this.preliminaryCas.reset(); /** Without this the annotator fails badly */ sentence = sentence.replaceAll("/", ""); sentence = sentence.replaceAll("~", ""); // Carry out preliminary analysis Analyzable content = new SimpleContent(sentenceId, sentence, ArabicAnalyzer.ARABIC_LAN); analyzer.analyze(this.preliminaryCas, content); // Copy data to a new CAS and use normalized text as DocumentText emptyCas.reset(); emptyCas.setDocumentLanguage(ArabicAnalyzer.ARABIC_LAN); CasCopier.copyCas(this.preliminaryCas.getCas(), emptyCas.getCas(), false); String normalizedText = JCasUtil.selectSingle(this.preliminaryCas, NormalizedText.class).getText(); emptyCas.setDocumentText(normalizedText); return emptyCas; }
protected void tableCellTestResults(FSArray goldSenseArray, FSArray testSenseArray) throws IOException { Set<Sense> bestTestSenses = null; SenseType senseType; if (goldSenseArray == null) { senseType = SenseType.GOLDNA; } else if (testSenseArray == null || testSenseArray.size() == 0) { senseType = SenseType.TESTNA; } else { senseType = SenseType.CORRECT; Set<String> goldSenseIds = new TreeSet<String>(); for (Sense s : JCasUtil.select(goldSenseArray, Sense.class)) { goldSenseIds.add(s.getId()); } bestTestSenses = getBestSenses(testSenseArray); for (Sense s : bestTestSenses) { if (!goldSenseIds.contains(s.getId())) { senseType = SenseType.INCORRECT; break; } } } tableCellSenseArray(testSenseArray, senseType, bestTestSenses); }
@Override public void process(JCas cas) throws AnalysisEngineProcessException { LOG.debug(getHeaderDocId(cas) + "\t" + cas.getDocumentText()); for (BrainRegion br : JCasUtil.select(cas, BrainRegion.class)) { LOG.debug(br.getCoveredText()); } }
@Override public List<Feature> extract(JCas jcas) throws TextClassificationException { List<Feature> featList = new ArrayList<Feature>(); double numSentences = JCasUtil.select(jcas, Sentence.class).size(); if (numSentences == 0) { featList.add( new Feature( FN_TOKENS_PER_SENTENCE, new MissingValue(MissingValueNonNominalType.NUMERIC))); } else { double numTokens = JCasUtil.select(jcas, Token.class).size(); double ratio = numTokens / numSentences; featList.add(new Feature(FN_TOKENS_PER_SENTENCE, ratio)); } return featList; }
@Test public void testProcess() throws AnalysisEngineProcessException, ResourceInitializationException { final String text = "The fox jumps over the dog."; jCas.setDocumentText(text); processJCas(); final Collection<Sentence> select = JCasUtil.select(jCas, Sentence.class); final Sentence s1 = select.iterator().next(); final List<Dependency> dependencies = JCasUtil.selectCovered(jCas, Dependency.class, s1); // We could test the output here, but its so model dependent its not // worth it, as long as annotations have been created" // 7 = 6 words + 1 punctuation, each should have a dependency assertEquals(7, dependencies.size()); }
@Test public void test() throws AnalysisEngineProcessException, ResourceInitializationException { final String text = "The fox jumps over the dog."; jCas.setDocumentText(text); processJCas(); final Collection<Sentence> select = JCasUtil.select(jCas, Sentence.class); final Sentence s1 = select.iterator().next(); final List<PhraseChunk> phrases = JCasUtil.selectCovered(jCas, PhraseChunk.class, s1); Assert.assertEquals(4, phrases.size()); Assert.assertEquals("The fox", phrases.get(0).getCoveredText()); Assert.assertEquals("jumps over the dog", phrases.get(1).getCoveredText()); Assert.assertEquals("over the dog", phrases.get(2).getCoveredText()); Assert.assertEquals("the dog", phrases.get(3).getCoveredText()); }
/** * Collects all the child nodes of the Tokens. * * @param jCas * @return */ public static HashMap<Token, Set<Dependency>> getChildNodesMap(JCas jCas) { HashMap<Token, Set<Dependency>> map = new HashMap<Token, Set<Dependency>>(); Collection<Dependency> deps = JCasUtil.select(jCas, Dependency.class); for (Dependency dep : deps) { if (!map.containsKey(dep.getGovernor())) { map.put(dep.getGovernor(), new HashSet<Dependency>()); } map.get(dep.getGovernor()).add(dep); } return map; }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { if (windowClass != null) { for (Annotation window : JCasUtil.select(jCas, windowClass)) { String text = window.getCoveredText(); createParentheticals(jCas, text, window.getBegin()); } } else { String text = jCas.getDocumentText(); createParentheticals(jCas, text, 0); } }
@Override public Set<Feature> extract(JCas view, TextClassificationUnit classificationUnit) throws TextClassificationException { boolean isCompound = false; POS pos = JCasUtil.selectCovered(Token.class, classificationUnit).get(0).getPos(); String word = JCasUtil.selectCovered(Lemma.class, classificationUnit).get(0).getValue().toLowerCase(); // only check for noun compounds if (pos instanceof N) { try { isCompound = isCompound(word); } catch (ResourceInitializationException e) { throw new TextClassificationException(e); } } return new Feature(IS_COMPOUND, isCompound).asSet(); }
public void initializeSentenceBoundaryDetection(JCas jCas) { // System.out.println(getClass().getName() + " being called via a Ruta script.."); Collection<Part> parts = JCasUtil.select(jCas, Part.class); initialize(); for (Part part : parts) { // System.out.println("Section: " + part.getSectionName() + " Part: " + // part.getPartNumber()); if (part != null) { String scope = part.getCoveredText(); if (scope != null && !scope.trim().isEmpty()) { sentenceBoundaryTokenizer(jCas, part, scope); } } } }
public static void main(String[] args) throws Exception { JCas jCas = JCasFactory.createJCas(); jCas.setDocumentLanguage("de"); jCas.setDocumentText( "Die Fossillagerstätte Geiseltal befindet sich im ehemaligen Braunkohlerevier des Geiseltales südlich der Stadt Halle in Sachsen-Anhalt. Sie ist eine bedeutende Fundstelle heute ausgestorbener Pflanzen und Tiere aus der Zeit des Mittleren Eozäns vor 48 bis 41 Millionen Jahren. Im Geiseltal wurde nachweislich seit 1698 erstmals Kohle gefördert, die ersten Fossilien kamen aber erst Anfang des 20. Jahrhunderts eher zufällig zu Tage. Planmäßige wissenschaftliche Ausgrabungen begannen 1925 seitens der Martin-Luther-Universität Halle-Wittenberg. Unterbrochen durch den Zweiten Weltkrieg, können die Untersuchungen in zwei Forschungsphasen untergliedert werden. Aufgrund der zunehmenden Auskohlung der Rohstofflager kamen die Ausgrabungen Mitte der 1980er allmählich zum Erliegen und endeten endgültig zu Beginn des dritten Jahrtausends."); SimplePipeline.runPipeline( jCas, AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class), AnalysisEngineFactory.createEngineDescription(StanfordNamedEntityRecognizer.class), AnalysisEngineFactory.createEngineDescription(CasDumpWriter.class)); for (NamedEntity ne : JCasUtil.select(jCas, NamedEntity.class)) { System.out.println("Found NE: " + ne.getValue() + ", " + ne.getCoveredText()); } }
@Override public void process(JCas cas) throws AnalysisEngineProcessException { // Return early if no Constituent is present if (!JCasUtil.exists(cas, Constituent.class)) return; // Lazy loading if (this.classifier == null) { init(); } String tree = ts.serializeTree(RichTree.getConstituencyTree(cas), this.parameterList); String example = "|BT| " + tree + " |ET|"; String questionClass = this.classifier.getMostConfidentModel(example); addQuestionClassAnnotation(cas, questionClass); }
private void processEntities(JCas jCas, Integer docKey) throws SQLException { // Insert entities Map<ReferenceTarget, List<Entity>> coreferenceEntities = new HashMap<>(); for (Entity ent : JCasUtil.select(jCas, Entity.class)) { ReferenceTarget rt = ent.getReferent(); if (rt == null) { rt = new ReferenceTarget(jCas); } List<Entity> entities = coreferenceEntities.getOrDefault(rt, new ArrayList<>()); entities.add(ent); coreferenceEntities.put(rt, entities); } for (List<Entity> entities : coreferenceEntities.values()) { processCoreferencedEntities(docKey, entities); } }
private void convert(JCas aJCas, PrintWriter aOut) { Type chunkType = JCasUtil.getType(aJCas, Chunk.class); Feature chunkValue = chunkType.getFeatureByBaseName("chunkValue"); for (Sentence sentence : select(aJCas, Sentence.class)) { HashMap<Token, Row> ctokens = new LinkedHashMap<Token, Row>(); // Tokens List<Token> tokens = selectCovered(Token.class, sentence); // Chunks IobEncoder encoder = new IobEncoder(aJCas.getCas(), chunkType, chunkValue); for (int i = 0; i < tokens.size(); i++) { Row row = new Row(); row.id = i + 1; row.token = tokens.get(i); row.chunk = encoder.encode(tokens.get(i)); ctokens.put(row.token, row); } // Write sentence in CONLL 2006 format for (Row row : ctokens.values()) { String pos = UNUSED; if (writePos && (row.token.getPos() != null)) { POS posAnno = row.token.getPos(); pos = posAnno.getPosValue(); } String chunk = UNUSED; if (writeChunk && (row.chunk != null)) { chunk = encoder.encode(row.token); } aOut.printf("%s %s %s\n", row.token.getCoveredText(), pos, chunk); } aOut.println(); } }
@Test public void testRead() throws Exception { // read List<JCas> l = asList(createReader(MongoCollectionReader.class, BlueUima.PARAM_DB_CONNECTION, conn)); assertEquals(1, l.size()); JCas jCas = l.get(0); assertEquals(UimaTests.TEST_SENTENCE, jCas.getDocumentText()); for (Annotation a : JCasUtil.select(jCas, Annotation.class)) { System.out.println(a); } BiolexiconDictTerm b = selectSingle(jCas, BiolexiconDictTerm.class); assertNotNull(b); assertEquals("theId", b.getEntityId()); Header h = selectSingle(jCas, Header.class); assertNotNull(h); assertEquals("17", h.getDocId()); }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { CAS cas = aJCas.getCas(); for (AnnotationFS cover : CasUtil.select(cas, CasUtil.getAnnotationType(cas, annotationType))) { // If there is a constraint, check if it matches if (constraint != null) { JXPathContext ctx = JXPathContext.newContext(cover); boolean match = ctx.iterate(constraint).hasNext(); if (!match) { continue; } } // If the target type is a token, use it directly, otherwise select the covered tokens Collection<Token> tokens; if (cover instanceof Token) { tokens = Collections.singleton((Token) cover); } else { tokens = JCasUtil.selectCovered(aJCas, Token.class, cover); } for (Token token : tokens) { try { String semanticField = semanticFieldResource.getSemanticTag(token); SemanticField semanticFieldAnnotation = new SemanticField(aJCas, token.getBegin(), token.getEnd()); semanticFieldAnnotation.setValue(semanticField); semanticFieldAnnotation.addToIndexes(); } catch (ResourceAccessException e) { throw new AnalysisEngineProcessException(e); } } } }
public synchronized void process(JCas jcas) throws AnalysisEngineProcessException { JCas questionView; try { questionView = jcas; } catch (Exception e) { throw new AnalysisEngineProcessException(e); } QuestionInfo qi = JCasUtil.selectSingle(questionView, QuestionInfo.class); /*{"qId": "...", "sv": "...", "LAT" : [ {...}, {...}, {...}]} */ String line = "{\"qId\": " + "\"" + qi.getQuestionId() + "\"" + ", " + "\"SV\": "; String SVtmp = "["; for (Iterator SVIterator = JCasUtil.select(jcas, SV.class).iterator(); SVIterator.hasNext(); ) { SV sv = (SV) SVIterator.next(); SVtmp += "\"" + sv.getCoveredText() + "\""; if (SVIterator.hasNext()) { SVtmp += ", "; } } SVtmp += "], "; line += SVtmp; line += "\"LAT\": "; String LATtmp = "["; for (Iterator iterator = JCasUtil.select(jcas, LAT.class).iterator(); iterator.hasNext(); ) { LAT l = (LAT) iterator.next(); /*{"synset" : "...", "text" : "...", "specificity" : "..." "type" : "..."}*/ LATtmp += "{"; if (l.getSynset() != 0) { // only add synset when it is not zero LATtmp += "\"synset\": " + "\"" + l.getSynset() + "\", "; } // add the rest LATtmp += "\"text\": \"" + l.getText() + "\"," + " \"specificity\": \"" + l.getSpecificity() + "\", " + "\"type\": " + "\"" + l.getClass().getSimpleName() + "\"}"; // not last, add comma if (iterator.hasNext()) { LATtmp += ", "; } } LATtmp += "], "; line += LATtmp; line += "\"Concept\": "; String Concepttmp = "["; for (Iterator iterator = JCasUtil.select(jcas, Concept.class).iterator(); iterator.hasNext(); ) { Concept c = (Concept) iterator.next(); Concepttmp += "{"; Concepttmp += "\"fullLabel\": \"" + c.getFullLabel().replaceAll("\"", "\\\"") + "\", "; Concepttmp += "\"cookedLabel\": \"" + c.getCookedLabel().replaceAll("\"", "\\\"") + "\", "; Concepttmp += "\"pageID\": \"" + c.getPageID() + "\""; Concepttmp += "}"; // not last, add comma if (iterator.hasNext()) { Concepttmp += ", "; } } Concepttmp += "], "; line += Concepttmp; line += "}"; output(line); // Question q = QuestionDashboard.getInstance().get(qi.getQuestionId()); // QuestionDashboard.getInstance().finishQuestion(q); }
public static Tree createStanfordTree(Annotation root, TreeFactory tFact) { JCas aJCas; try { aJCas = root.getCAS().getJCas(); } catch (CASException e) { throw new IllegalStateException("Unable to get JCas from JCas wrapper"); } // define the new (root) node Tree rootNode; // before we can create a node, we must check if we have any children (we have to know // whether to create a node or a leaf - not very dynamic) if (root instanceof Constituent && !isLeaf((Constituent) root)) { Constituent node = (Constituent) root; List<Tree> childNodes = new ArrayList<Tree>(); // get childNodes from child annotations FSArray children = node.getChildren(); for (int i = 0; i < children.size(); i++) { childNodes.add(createStanfordTree(node.getChildren(i), tFact)); } // now create the node with its children rootNode = tFact.newTreeNode(node.getConstituentType(), childNodes); } else { // Handle leaf annotations // Leafs are always Token-annotations // We also have to insert a Preterminal node with the value of the // POS-Annotation on the token // because the POS is not directly stored within the treee Token wordAnnotation = (Token) root; // create leaf-node for the tree Tree wordNode = tFact.newLeaf(wordAnnotation.getCoveredText()); // create information about preceding and trailing whitespaces in the leaf node StringBuilder preWhitespaces = new StringBuilder(); StringBuilder trailWhitespaces = new StringBuilder(); List<Token> precedingTokenList = selectPreceding(aJCas, Token.class, wordAnnotation, 1); List<Token> followingTokenList = selectFollowing(aJCas, Token.class, wordAnnotation, 1); if (precedingTokenList.size() > 0) { Token precedingToken = precedingTokenList.get(0); int precedingWhitespaces = wordAnnotation.getBegin() - precedingToken.getEnd(); for (int i = 0; i < precedingWhitespaces; i++) { preWhitespaces.append(" "); } } if (followingTokenList.size() > 0) { Token followingToken = followingTokenList.get(0); int trailingWhitespaces = followingToken.getBegin() - wordAnnotation.getEnd(); for (int i = 0; i < trailingWhitespaces; i++) { trailWhitespaces.append(" "); } } // write whitespace information as CoreAnnotation.BeforeAnnotation and // CoreAnnotation.AfterAnnotation to the node add annotation to list and write back to // node label ((CoreLabel) wordNode.label()) .set(CoreAnnotations.BeforeAnnotation.class, preWhitespaces.toString()); ((CoreLabel) wordNode.label()) .set(CoreAnnotations.AfterAnnotation.class, trailWhitespaces.toString()); // get POS-annotation // get the token that is covered by the POS List<POS> coveredPos = JCasUtil.selectCovered(aJCas, POS.class, wordAnnotation); // the POS should only cover one token assert coveredPos.size() == 1; POS pos = coveredPos.get(0); // create POS-Node in the tree and attach word-node to it rootNode = tFact.newTreeNode(pos.getPosValue(), Arrays.asList((new Tree[] {wordNode}))); } return rootNode; }
public void process(JCas jcas) throws AnalysisEngineProcessException { String rtn = ""; JCas questionView, answerHitlist; try { questionView = jcas.getView("Question"); answerHitlist = jcas.getView("AnswerHitlist"); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } QuestionInfo qi = JCasUtil.selectSingle(questionView, QuestionInfo.class); FSIndex idx = answerHitlist.getJFSIndexRepository().getIndex("SortedAnswers"); FSIterator answers = idx.iterator(); if (answers.hasNext()) { // int counter = 0; int i = 1; while (answers.hasNext()) { Answer answer = (Answer) answers.next(); StringBuilder sb = new StringBuilder(); sb.append(i++); sb.append(". "); sb.append(answer.getText()); sb.append(" (conf. "); sb.append(answer.getConfidence()); sb.append(")"); /* PRINT the passages assigned to this answer sb.append("\n"); for(int ID: answer.getPassageIDs().toArray()){ sb.append(" "); sb.append(counter++); sb.append(". "); sb.append(QuestionDashboard.getInstance().getPassage(ID)); sb.append(" ("); sb.append(ID); sb.append(")"); sb.append("\n"); } counter = 0; */ if (answer.getResources() != null) { for (FeatureStructure resfs : answer.getResources().toArray()) { sb.append(" "); sb.append(((AnswerResource) resfs).getIri()); } } System.out.println(sb.toString()); rtn = rtn + sb.toString() + "\n"; } } else { System.out.println("No answer found."); rtn = "No answer found."; } Question q = QuestionDashboard.getInstance().get(qi.getQuestionId()); // q.setAnswers(answers); XXX QuestionDashboard.getInstance().finishQuestion(q); final_answer = rtn; }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { UsenetDocument document = JCasUtil.select(jCas, UsenetDocument.class).iterator().next(); System.out.println( "classified " + ViewUriUtil.getURI(jCas) + " as " + document.getCategory() + "."); }