@Override public void process(JCas cas) throws AnalysisEngineProcessException { this.setSource(cas); this.setTarget(cas); AnnotationIndex<Annotation> index = cas.getAnnotationIndex(this.getSourceType()); FSIterator<Annotation> iter = index.iterator(); while (iter.hasNext()) { Annotation annotation = iter.next(); String source = null; if (this.getSourceFeature() == null) { source = annotation.getCoveredText(); } else { source = annotation.getStringValue(this.getSourceFeature()); } if (source != null) { String target = this.getMapping().get(source); if (target != null) { if (this.update().booleanValue()) { this.update(cas, annotation, this.getTargetFeature(), target); } else { this.create( cas, this.getTargetFeature(), annotation.getBegin(), annotation.getEnd(), target); } } } } }
/** * Metodo per il process della Cas generata con l'upload del documenti in 'Nuovo pattern'. * * @param cas cas da analizzare * @throws SQLException eccezione durante l'estrazione dei dati */ private void processPatternAttachmentCas(final JCas cas) throws SQLException { HpmDao dao = new HpmDao(); // il valore della property è l'hpmPatternId String kpeopleTagPattern = getKpeopleTagPattern(cas); // recupero e salvo su db gli attachments associati alla creazione pattern List<Document> docs = new ArrayList<Document>(); // recupero l'email dell'utente che ha generato il pattern String email = getUserAuthorAnnotation(cas).getEmail(); AnnotationIndex<Annotation> aaIdx = cas.getAnnotationIndex(AttachmentAnnotation.type); FSIterator<Annotation> itAa = aaIdx.iterator(); while (itAa.hasNext()) { AttachmentAnnotation aa = (AttachmentAnnotation) itAa.next(); Document document = new Document(); document.setAttachmentType(new AttachmentType(2)); document.setGuid(aa.getUrlAttachment()); document.setHashcode(aa.getHashcode()); document.setTemplate(false); document.setHpmAttachmentId(aa.getId()); document.setName(aa.getAttachmentName()); docs.add(document); } docs = dao.savePatternDocument(docs, email, kpeopleTagPattern); }
private void clean(JCas cas) { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(SingleWordTermAnnotation.type); FSIterator<Annotation> iterator = index.iterator(); while (iterator.hasNext()) { SingleWordTermAnnotation annotation = (SingleWordTermAnnotation) iterator.next(); this.clean(cas, annotation); } }
private void display(JCas cas) { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(SourceDocumentInformation.type); FSIterator<Annotation> iterator = index.iterator(); if (iterator.hasNext()) { SourceDocumentInformation sdi = (SourceDocumentInformation) iterator.next(); this.getContext().getLogger().log(Level.INFO, "Cleaning terms of " + sdi.getUri()); } }
private void select(JCas cas) { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermAnnotation.type); FSIterator<Annotation> iterator = index.iterator(); while (iterator.hasNext()) { TermAnnotation annotation = (TermAnnotation) iterator.next(); if (annotation.getOccurrences() <= this.getThreshold().intValue()) { this.getAnnotations().add(annotation); } } }
@Override public void process(JCas cas) throws AnalysisEngineProcessException { try { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(WordAnnotation.type); FSIterator<Annotation> iterator = index.iterator(); while (iterator.hasNext()) { WordAnnotation annotation = (WordAnnotation) iterator.next(); String norm = annotation.getCoveredText(); annotation.setLemma(norm); annotation.setStem(norm); } } catch (Exception e) { throw new AnalysisEngineProcessException(e); } }
/** * aggiunge le proprietà presenti. * * @param event eventoda salvare * @param cas oggetto Cas */ private void addProperties(final Event event, final JCas cas) { AnnotationIndex<Annotation> annIdx = cas.getAnnotationIndex(PropertyAnnotation.type); FSIterator<Annotation> it = annIdx.iterator(); while (it.hasNext()) { PropertyAnnotation annotation = (PropertyAnnotation) it.next(); String key = annotation.getKey(); String value = annotation.getValue(); logger.debug("Aggiunta proprietà\nKey: " + key + "\nValue: " + value); event.getProperties().put(key, value); } }
private void clean(JCas cas, SingleWordTermAnnotation annotation) { Set<TermComponentAnnotation> delete = new HashSet<TermComponentAnnotation>(); AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermComponentAnnotation.type); FSIterator<Annotation> iterator = index.subiterator(annotation); while (iterator.hasNext()) { TermComponentAnnotation component = (TermComponentAnnotation) iterator.next(); FSIterator<Annotation> subiterator = index.subiterator(component); while (subiterator.hasNext()) { delete.add((TermComponentAnnotation) subiterator.next()); } } for (TermComponentAnnotation del : delete) { del.removeFromIndexes(); } }
@Override protected void generateAnnotations(JCas jcas) throws AnalysisEngineProcessException, FeaturePathException { // CAS is necessary to retrieve values CAS currCAS = jcas.getCas(); for (String path : paths) { // Separate Typename and featurepath String[] segments = path.split("/", 2); String typeName = segments[0]; // Try to get the type from the typesystem of the CAS Type t = currCAS.getTypeSystem().getType(typeName); if (t == null) { throw new IllegalStateException("Type [" + typeName + "] not found in type system"); } // get an fpi object and initialize it // initialize the FeaturePathInfo with the corresponding part initializeFeaturePathInfoFrom(fp, segments); // get the annotations AnnotationIndex<?> idx = currCAS.getAnnotationIndex(t); FSIterator<?> iterator = idx.iterator(); while (iterator.hasNext()) { AnnotationFS fs = (AnnotationFS) iterator.next(); try { if (this.filterFeaturePath != null) { // check annotation filter condition if (this.filterFeaturePathInfo.match(fs, this.filterCondition)) { createStemAnnotation(jcas, fs); } } else { // no annotation filter specified createStemAnnotation(jcas, fs); } } catch (AnalysisEngineProcessException e) { // TODO Auto-generated catch block throw new IllegalStateException("error occured while creating a stem annotation", e); } } } }
/** * Recupera il valore della proprietà kpeopletagpattern. * * @param cas cas da analizzare * @return valore della proprietà kpeopletagpattern */ private String getKpeopleTagPattern(final JCas cas) { AnnotationIndex<Annotation> annIdx = cas.getAnnotationIndex(PropertyAnnotation.type); FSIterator<Annotation> it = annIdx.iterator(); String kpeopletagpattern = null; while (it.hasNext()) { PropertyAnnotation annotation = (PropertyAnnotation) it.next(); String key = annotation.getKey(); String value = annotation.getValue(); if (key.equals("kpeopletagpattern")) { kpeopletagpattern = value; } } return kpeopletagpattern; }
private void adjust(JCas cas) { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermAnnotation.type); FSIterator<Annotation> iterator = index.iterator(); while (iterator.hasNext()) { TermAnnotation annotation = (TermAnnotation) iterator.next(); if (annotation.getVariants() != null) { int occ = annotation.getOccurrences(); double freq = annotation.getFrequency(); double spec = annotation.getSpecificity(); for (int i = 0; i < annotation.getVariants().size(); i++) { occ += annotation.getVariants(i).getOccurrences(); freq += annotation.getVariants(i).getFrequency(); spec += annotation.getVariants(i).getSpecificity(); } annotation.setOccurrences(occ); annotation.setFrequency(freq); annotation.setSpecificity(spec); } } }
/** * aggiunge oggetti di tipo Document all'event. Nel caso di Communication associo anche la email * ai documenti. * * @param event oggetto a cui associare i Document * @param email email da salvare * @param cas cas da elaborare */ private void addDocuments(final Event event, final Email email, final JCas cas) { AnnotationIndex<Annotation> aaIdx = cas.getAnnotationIndex(AttachmentAnnotation.type); FSIterator<Annotation> itAa = aaIdx.iterator(); while (itAa.hasNext()) { AttachmentAnnotation aa = (AttachmentAnnotation) itAa.next(); Document document = new Document(); document.setAttachmentType(new AttachmentType(2)); document.setGuid(aa.getUrlAttachment()); document.setHashcode(aa.getHashcode()); document.setAuthor(aa.getAuthor()); document.setTemplate(false); document.setHpmAttachmentId(aa.getId()); document.setName(aa.getAttachmentName()); event.getAttachments().add(document); // associo il documento alla mail (per il legame EMAIL-DOCUMENT) if (email != null) { email.getDocuments().add(document); } } }
/** Outputs Trigrams for an input Annotation. */ public void extractNgramsFromAnnotation(Annotation annotation, JCas aJCas) { AnnotationIndex<Annotation> tokens = aJCas.getAnnotationIndex(Token.type); Iterator<Annotation> tIterator = tokens.subiterator(annotation); Annotation token = null; Annotation prev = null; Annotation prev_prev = null; while (tIterator.hasNext()) { prev_prev = prev; prev = token; token = tIterator.next(); if (tokenIsInAnnotation(annotation, prev) && tokenIsInAnnotation(annotation, prev_prev)) { NGram ngram = new NGram(aJCas); FSArray ngArray = new FSArray(aJCas, 3); ngArray.set(0, prev_prev); ngArray.set(1, prev); ngArray.set(2, token); ngram.setBegin(prev_prev.getBegin()); ngram.setEnd(token.getEnd()); ngram.setElements(ngArray); ngram.setElementType("edu.cmu.deiis.types.Token"); ngram.setConfidence(1D); ngram.setCasProcessorId(PROCESSOR_ID); ngram.addToIndexes(); } } }
@Test public void test() throws Exception { String html = "<Parent>\n"; html += "<Child1>Some content</Child1>\n"; html += "<Child2 attribute=“someValue” />\n"; html += "<Child3>More content.</Child3>\n"; html += "</Parent>\n"; URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml"); if (urlA == null) { urlA = HtmlAnnotator.class .getClassLoader() .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml"); } URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml"); if (urlC == null) { urlC = HtmlAnnotator.class .getClassLoader() .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml"); } XMLInputSource inA = new XMLInputSource(urlA); ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA); AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA); aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false); aeA.reconfigure(); XMLInputSource inC = new XMLInputSource(urlC); ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC); AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC); aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false); aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true); aeC.setConfigParameterValue( HtmlConverter.PARAM_GAP_INDUCING_TAGS, new String[] {"child1", "child2", "child3"}); aeC.setConfigParameterValue(HtmlConverter.PARAM_GAP_TEXT, "$"); aeC.reconfigure(); CAS cas = aeA.newCAS(); Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG"); AnnotationIndex<AnnotationFS> ai = null; FSIterator<AnnotationFS> iterator = null; cas.setDocumentText(html); aeA.process(cas); aeC.process(cas); CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW); assertEquals("$Some content$$More content.", plainTextCas.getDocumentText()); ai = plainTextCas.getAnnotationIndex(tagType); iterator = ai.iterator(); assertEquals(4, ai.size()); assertEquals("$Some content$$More content.", iterator.next().getCoveredText()); assertEquals("$Some content", iterator.next().getCoveredText()); assertEquals("$", iterator.next().getCoveredText()); assertEquals("$More content.", iterator.next().getCoveredText()); cas.release(); }
@Test public void testExpandOffsets() throws Exception { String html = "<Parent>\n"; html += "<Child1>Some content</Child1>\n"; html += "<Child2 attribute=“someValue” />\n"; html += "<Child3>More content.</Child3>\n"; html += "</Parent>\n"; URL urlA = HtmlAnnotator.class.getClassLoader().getResource("HtmlAnnotator.xml"); if (urlA == null) { urlA = HtmlAnnotator.class .getClassLoader() .getResource("org/apache/uima/ruta/engine/HtmlAnnotator.xml"); } URL urlC = HtmlAnnotator.class.getClassLoader().getResource("HtmlConverter.xml"); if (urlC == null) { urlC = HtmlAnnotator.class .getClassLoader() .getResource("org/apache/uima/ruta/engine/HtmlConverter.xml"); } XMLInputSource inA = new XMLInputSource(urlA); ResourceSpecifier specifierA = UIMAFramework.getXMLParser().parseResourceSpecifier(inA); AnalysisEngine aeA = UIMAFramework.produceAnalysisEngine(specifierA); aeA.setConfigParameterValue(HtmlAnnotator.PARAM_ONLY_CONTENT, false); aeA.reconfigure(); XMLInputSource inC = new XMLInputSource(urlC); ResourceSpecifier specifierC = UIMAFramework.getXMLParser().parseResourceSpecifier(inC); AnalysisEngine aeC = UIMAFramework.produceAnalysisEngine(specifierC); aeC.setConfigParameterValue(HtmlConverter.PARAM_SKIP_WHITESPACES, false); aeC.setConfigParameterValue(HtmlConverter.PARAM_PROCESS_ALL, true); aeC.setConfigParameterValue(HtmlConverter.PARAM_EXPAND_OFFSETS, true); aeC.reconfigure(); CAS cas = aeA.newCAS(); Type tagType = cas.getTypeSystem().getType(HtmlAnnotator.NAMESPACE + "TAG"); Feature expandedFeature = tagType.getFeatureByBaseName("expandedOffsets"); AnnotationIndex<AnnotationFS> ai = null; FSIterator<AnnotationFS> iterator = null; cas.setDocumentText(html); aeA.process(cas); aeC.process(cas); CAS plainTextCas = cas.getView(HtmlConverter.DEFAULT_MODIFIED_VIEW); assertEquals("Some contentMore content.", plainTextCas.getDocumentText()); ai = plainTextCas.getAnnotationIndex(tagType); iterator = ai.iterator(); assertEquals(4, ai.size()); AnnotationFS next = null; next = iterator.next(); assertEquals(false, next.getBooleanValue(expandedFeature)); assertEquals("Some contentMore content.", next.getCoveredText()); next = iterator.next(); assertEquals(false, next.getBooleanValue(expandedFeature)); assertEquals("Some content", next.getCoveredText()); next = iterator.next(); boolean b1 = next.getBooleanValue(expandedFeature); assertEquals("More content.", next.getCoveredText()); next = iterator.next(); boolean b2 = next.getBooleanValue(expandedFeature); assertEquals("More content.", next.getCoveredText()); // for one of these two annotation (with same offsets) the feature must be set to true assertEquals(true, b1 || b2); cas.release(); }