@Override public void process(JCas cas) throws AnalysisEngineProcessException { this.setSource(cas); this.setTarget(cas); AnnotationIndex<Annotation> index = cas.getAnnotationIndex(this.getSourceType()); FSIterator<Annotation> iter = index.iterator(); while (iter.hasNext()) { Annotation annotation = iter.next(); String source = null; if (this.getSourceFeature() == null) { source = annotation.getCoveredText(); } else { source = annotation.getStringValue(this.getSourceFeature()); } if (source != null) { String target = this.getMapping().get(source); if (target != null) { if (this.update().booleanValue()) { this.update(cas, annotation, this.getTargetFeature(), target); } else { this.create( cas, this.getTargetFeature(), annotation.getBegin(), annotation.getEnd(), target); } } } } }
public static Optional<SourceDocumentInformation> getSourceDocumentAnnotation(JCas jCas) { // zgere FSIterator<Annotation> iterator = jCas.getAnnotationIndex(SourceDocumentInformation.type).iterator(); if (iterator.hasNext()) return Optional.of((SourceDocumentInformation) iterator.next()); else return Optional.absent(); }
public void process(JCas aJCas) throws AnalysisEngineProcessException { if (!init) { try { initialize(); } catch (ResourceInitializationException e) { // TODO Auto-generated catch block e.printStackTrace(); } init = true; } // retreive the filename of the input file from the CAS FSIterator it = aJCas.getAnnotationIndex(Product.type).iterator(); File outFile = null; if (it.hasNext()) { Product fileLoc = (Product) it.next(); outFile = new File(mOutputDir, fileLoc.getName() + ".xml"); } if (outFile == null) { outFile = new File(mOutputDir, "doc" + mDocNum++ + ".xml"); } // serialize XCAS and write to output file try { writeXCas(aJCas.getCas(), outFile); } catch (IOException e) { System.err.println("Could not write to output file"); e.printStackTrace(); } catch (SAXException e) { System.out.println("SAX Failure"); e.printStackTrace(); } }
/** * Return the input file from the CAS Assumes that it has the sourceDocumentInformation (set by * FileSystemCollectionReader or documentAnalyzer.sh) null otherwise */ public static File retrieveSourceDocumentFile(JCas aJCas) throws AnalysisEngineProcessException { FSIterator<Annotation> sourceDocumentInformationFSIterator = aJCas .getAnnotationIndex( JCasSofaViewUtils.getJCasType( aJCas, DEFAULT_SOURCE_DOCUMENT_INFORMATION_ANNOTATION)) .iterator(); File inFile = null; if (sourceDocumentInformationFSIterator.hasNext()) { SourceDocumentInformation theSourceDocumentInformation = (SourceDocumentInformation) sourceDocumentInformationFSIterator.next(); try { inFile = new File(new URL(theSourceDocumentInformation.getUri()).getPath()); // System.out.println("Debug: SourceDocumentInformation File Name "+ inFileName); } catch (MalformedURLException e) { // invalid URL, use default processing below String errmsg = "Error: MalformedURLException !"; throw new AnalysisEngineProcessException(errmsg, new Object[] {}, e); // e.printStackTrace(); } } return inFile; }
public static void showSdiWithCategory2(JCas jcas) { String wordsLine = ""; String catsLine = ""; int cnt = 0; FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator(); while (it.hasNext()) { cnt += 1; WordAnnotation a = (WordAnnotation) it.next(); String[] strings = center(a.getCoveredText(), a.getTag()); wordsLine += strings[0] + " "; catsLine += strings[1] + " "; if (cnt == 20) { System.out.println(wordsLine); System.out.println(catsLine); System.out.println(); wordsLine = ""; catsLine = ""; cnt = 0; } } if (cnt > 0) { System.out.println(wordsLine); System.out.println(catsLine); } }
/** * Metodo per il process della Cas generata con l'upload del documenti in 'Nuovo pattern'. * * @param cas cas da analizzare * @throws SQLException eccezione durante l'estrazione dei dati */ private void processPatternAttachmentCas(final JCas cas) throws SQLException { HpmDao dao = new HpmDao(); // il valore della property è l'hpmPatternId String kpeopleTagPattern = getKpeopleTagPattern(cas); // recupero e salvo su db gli attachments associati alla creazione pattern List<Document> docs = new ArrayList<Document>(); // recupero l'email dell'utente che ha generato il pattern String email = getUserAuthorAnnotation(cas).getEmail(); AnnotationIndex<Annotation> aaIdx = cas.getAnnotationIndex(AttachmentAnnotation.type); FSIterator<Annotation> itAa = aaIdx.iterator(); while (itAa.hasNext()) { AttachmentAnnotation aa = (AttachmentAnnotation) itAa.next(); Document document = new Document(); document.setAttachmentType(new AttachmentType(2)); document.setGuid(aa.getUrlAttachment()); document.setHashcode(aa.getHashcode()); document.setTemplate(false); document.setHpmAttachmentId(aa.getId()); document.setName(aa.getAttachmentName()); docs.add(document); } docs = dao.savePatternDocument(docs, email, kpeopleTagPattern); }
/** * Restituisce l'annotation ActionTypeAnnotation. * * @param cas CAS da elaborare * @return annotation di tipo ActionTypeAnnotation */ private ActionTypeAnnotation getActionTypeAnnotation(final JCas cas) { Type annotationType = cas.getTypeSystem().getType(ActionTypeAnnotation.class.getCanonicalName()); FSIterator<Annotation> it = cas.getAnnotationIndex(annotationType).iterator(); return (ActionTypeAnnotation) it.next(); }
public static void showJCas(JCas jcas) { FSIterator<Annotation> it = jcas.getAnnotationIndex().iterator(); Map<String, MutableInt> counters = new TreeMap<String, MutableInt>(); int total = 0; while (it.hasNext()) { total += 1; String annoType = "rien"; try { Annotation annotation = (Annotation) it.next(); annoType = annotation.getType().getName(); } catch (NullPointerException e) { it.moveToNext(); annoType = e.getClass().getCanonicalName(); } if (counters.get(annoType) == null) { counters.put(annoType, new MutableInt(1)); } else { counters.get(annoType).increment(); } } System.out.println( "Total annotation in JCas (ID: " + System.identityHashCode(jcas) + "): " + total); for (String annoType : counters.keySet()) { System.out.println(annoType + ": " + counters.get(annoType)); } }
private void clean(JCas cas) { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(SingleWordTermAnnotation.type); FSIterator<Annotation> iterator = index.iterator(); while (iterator.hasNext()) { SingleWordTermAnnotation annotation = (SingleWordTermAnnotation) iterator.next(); this.clean(cas, annotation); } }
private void display(JCas cas) { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(SourceDocumentInformation.type); FSIterator<Annotation> iterator = index.iterator(); if (iterator.hasNext()) { SourceDocumentInformation sdi = (SourceDocumentInformation) iterator.next(); this.getContext().getLogger().log(Level.INFO, "Cleaning terms of " + sdi.getUri()); } }
public static int countType(JCas jcas, int type) { FSIterator<Annotation> it = jcas.getAnnotationIndex(type).iterator(); int cnt = 0; while (it.hasNext()) { cnt++; it.next(); } return cnt; }
private void select(JCas cas) { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermAnnotation.type); FSIterator<Annotation> iterator = index.iterator(); while (iterator.hasNext()) { TermAnnotation annotation = (TermAnnotation) iterator.next(); if (annotation.getOccurrences() <= this.getThreshold().intValue()) { this.getAnnotations().add(annotation); } } }
private static String getResult(JCas jcas, String sourceLang, String interLang) { FSIterator iter = jcas.getAnnotationIndex(Target.type).iterator(); String result = ""; while (iter.isValid()) { FeatureStructure fs = iter.get(); Target transText = (Target) fs; result = transText.getContent(); iter.moveToNext(); } return result; }
public static String getTermSuiteCasFileName(JCas jcas) { FSIterator<Annotation> it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator(); if (it.hasNext()) { SourceDocumentInformation sdi = (SourceDocumentInformation) it.next(); Iterator<String> iterator = Splitter.on("/").split(sdi.getUri() == null ? "(no uri)" : sdi.getUri()).iterator(); String name = null; while (iterator.hasNext()) name = iterator.next(); return name; } else return null; }
/** * Restituisce l'annotation BodyAnnotation. * * @param cas CAS da elaborare * @return annotation di tipo BodyAnnotation */ private BodyAnnotation getBodyAnnotation(final JCas cas) { Type type = cas.getTypeSystem().getType(BodyAnnotation.class.getCanonicalName()); BodyAnnotation ann = null; FSIterator<Annotation> it = cas.getAnnotationIndex(type).iterator(); if (it.hasNext()) { ann = (BodyAnnotation) it.next(); } return ann; }
public static void showTermFreq(JCas jcas, int num) { FSIterator<Annotation> it = jcas.getAnnotationIndex(TermOccAnnotation.type).iterator(); int cnt = 0; while (it.hasNext()) { cnt += 1; TermOccAnnotation annotation = (TermOccAnnotation) it.next(); if (cnt == num) { System.out.println("TermOccAnnotation n°" + num + ": " + annotation); break; } } }
/** * TODO :: 1. construct the global word dictionary 2. keep the word frequency for each sentence * * <p>Creates two dictionaries queryDictionary and answerDictionary * * <p>queryDictionary is list of maps with key as the words in the question and value as the count * of the word in the question sentence. Similarly answerDictionary is list of maps with key as * the words in the answer and value as the count of the word in the answer sentence. */ @Override public void processCas(CAS aCas) throws ResourceProcessException { JCas jcas; try { jcas = aCas.getJCas(); } catch (CASException e) { throw new ResourceProcessException(e); } FSIterator it = jcas.getAnnotationIndex(Document.type).iterator(); if (it.hasNext()) { Document doc = (Document) it.next(); // Make sure that your previous annotators have populated this in CAS FSList fsTokenList = doc.getTokenList(); ArrayList<Token> tokenList = Utils.fromFSListToCollection(fsTokenList, Token.class); HashMap<String, Integer> myMap = new HashMap<String, Integer>(); HashMap<String, Integer> myMap2 = new HashMap<String, Integer>(); // if question then fill QuesqIdList, QuesrelList & queryDictionary if (doc.getRelevanceValue() == 99) { QuesqIdList.add(doc.getQueryID()); QuesrelList.add(doc.getRelevanceValue()); for (int k = 0; k < tokenList.size(); k++) { myMap.put(tokenList.get(k).getText(), tokenList.get(k).getFrequency()); } queryDictionary.add(myMap); } // if answer then fill AnsqIdList, AnsrelList & answerDictionary else { AnsqIdList.add(doc.getQueryID()); AnsrelList.add(doc.getRelevanceValue()); for (int k = 0; k < tokenList.size(); k++) { myMap2.put(tokenList.get(k).getText(), tokenList.get(k).getFrequency()); } answerDictionary.add(myMap2); if (1 == doc.getRelevanceValue()) { GoldAnswerStringList.put(doc.getQueryID(), doc.getText()); } } // Do something useful here /*for(int i=0;i<tokenList.size();i++) System.out.print(tokenList.get(i).getText().toString()+"=>" + tokenList.get(i).getFrequency()+"\t"); System.out.println();*/ } }
/** * aggiunge le proprietà presenti. * * @param event eventoda salvare * @param cas oggetto Cas */ private void addProperties(final Event event, final JCas cas) { AnnotationIndex<Annotation> annIdx = cas.getAnnotationIndex(PropertyAnnotation.type); FSIterator<Annotation> it = annIdx.iterator(); while (it.hasNext()) { PropertyAnnotation annotation = (PropertyAnnotation) it.next(); String key = annotation.getKey(); String value = annotation.getValue(); logger.debug("Aggiunta proprietà\nKey: " + key + "\nValue: " + value); event.getProperties().put(key, value); } }
@Override public void process(JCas cas) throws AnalysisEngineProcessException { try { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(WordAnnotation.type); FSIterator<Annotation> iterator = index.iterator(); while (iterator.hasNext()) { WordAnnotation annotation = (WordAnnotation) iterator.next(); String norm = annotation.getCoveredText(); annotation.setLemma(norm); annotation.setStem(norm); } } catch (Exception e) { throw new AnalysisEngineProcessException(e); } }
/** * Imposta il campo emailBody per l'oggetto Email. * * @param cas cas da elaborare * @return valore del campo */ private String getEmailBody(final JCas cas) { Type annotationType = cas.getTypeSystem().getType(BodyAnnotation.class.getCanonicalName()); FSIterator<Annotation> it = cas.getAnnotationIndex(annotationType).iterator(); String emailBody = ""; if (it.hasNext()) { BodyAnnotation ann = (BodyAnnotation) it.next(); String value = ann.getValue(); if (value != null) { emailBody = value; } } return emailBody; }
public static void showSdiWithCategory(JCas jcas) { FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator(); int wordCnt = 0; while (it.hasNext()) { wordCnt++; WordAnnotation a = (WordAnnotation) it.next(); System.out.print(a.getCoveredText() + "_" + a.getTag()); if (wordCnt < 12) { System.out.print(" "); } else { System.out.println(); wordCnt = 0; } } System.out.println(Joiner.on(" ").join(it)); }
@Override protected void generateAnnotations(JCas jcas) throws AnalysisEngineProcessException, FeaturePathException { // CAS is necessary to retrieve values CAS currCAS = jcas.getCas(); for (String path : paths) { // Separate Typename and featurepath String[] segments = path.split("/", 2); String typeName = segments[0]; // Try to get the type from the typesystem of the CAS Type t = currCAS.getTypeSystem().getType(typeName); if (t == null) { throw new IllegalStateException("Type [" + typeName + "] not found in type system"); } // get an fpi object and initialize it // initialize the FeaturePathInfo with the corresponding part initializeFeaturePathInfoFrom(fp, segments); // get the annotations AnnotationIndex<?> idx = currCAS.getAnnotationIndex(t); FSIterator<?> iterator = idx.iterator(); while (iterator.hasNext()) { AnnotationFS fs = (AnnotationFS) iterator.next(); try { if (this.filterFeaturePath != null) { // check annotation filter condition if (this.filterFeaturePathInfo.match(fs, this.filterCondition)) { createStemAnnotation(jcas, fs); } } else { // no annotation filter specified createStemAnnotation(jcas, fs); } } catch (AnalysisEngineProcessException e) { // TODO Auto-generated catch block throw new IllegalStateException("error occured while creating a stem annotation", e); } } } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { UIMAProfiler.getProfiler("AnalysisEngine").start(this, "process"); Lemmatizer mateLemmatizer = mateLemmatizerModel.getEngine(); Tagger mateTagger = mateTaggerModel.getEngine(); /* * keeps an array of annotations in memory so as to be able * to access them by index. */ List<WordAnnotation> annotations = Lists.newArrayList(); FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator(); while (it.hasNext()) { WordAnnotation a = (WordAnnotation) it.next(); annotations.add(a); } String[] tokens = new String[annotations.size() + 2]; // preprends to fake words to prevent Mate from bugging on the two first words tokens[0] = "<root>"; tokens[1] = "<root2>"; for (int i = 0; i < annotations.size(); i++) tokens[i + 2] = annotations.get(i).getCoveredText(); SentenceData09 mateSentence = new SentenceData09(); mateSentence.init(tokens); // Run POS tagging mateSentence = mateTagger.apply(mateSentence); // Run lemmatization mateSentence = mateLemmatizer.apply(mateSentence); WordAnnotation wordAnnotation; for (int j = 1; j < mateSentence.length(); j++) { wordAnnotation = annotations.get(j - 1); wordAnnotation.setTag(mateSentence.ppos[j]); wordAnnotation.setLemma(mateSentence.plemmas[j]); } UIMAProfiler.getProfiler("AnalysisEngine").stop(this, "process"); }
/** * Recupera il valore della proprietà kpeopletagpattern. * * @param cas cas da analizzare * @return valore della proprietà kpeopletagpattern */ private String getKpeopleTagPattern(final JCas cas) { AnnotationIndex<Annotation> annIdx = cas.getAnnotationIndex(PropertyAnnotation.type); FSIterator<Annotation> it = annIdx.iterator(); String kpeopletagpattern = null; while (it.hasNext()) { PropertyAnnotation annotation = (PropertyAnnotation) it.next(); String key = annotation.getKey(); String value = annotation.getValue(); if (key.equals("kpeopletagpattern")) { kpeopletagpattern = value; } } return kpeopletagpattern; }
@Override public void doProcess(JCas aJCas) throws AnalysisEngineProcessException { List<Entity> toRemove = new ArrayList<Entity>(); FSIterator<Annotation> iter = aJCas.getAnnotationIndex(Entity.type).iterator(); while (iter.hasNext()) { Entity e = (Entity) iter.next(); if (e.getConfidence() < confidenceThreshold && (!ignoreZeroConfidence || e.getConfidence() > 0.0)) { toRemove.add(e); getMonitor() .debug( "Low confidence entity found (ID: {}) - this entity will be removed", e.getInternalId()); } } removeFromJCasIndex(toRemove); }
private void adjust(JCas cas) { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermAnnotation.type); FSIterator<Annotation> iterator = index.iterator(); while (iterator.hasNext()) { TermAnnotation annotation = (TermAnnotation) iterator.next(); if (annotation.getVariants() != null) { int occ = annotation.getOccurrences(); double freq = annotation.getFrequency(); double spec = annotation.getSpecificity(); for (int i = 0; i < annotation.getVariants().size(); i++) { occ += annotation.getVariants(i).getOccurrences(); freq += annotation.getVariants(i).getFrequency(); spec += annotation.getVariants(i).getSpecificity(); } annotation.setOccurrences(occ); annotation.setFrequency(freq); annotation.setSpecificity(spec); } } }
/** * Imposta il campo fieldTo per l'oggetto Email. * * @param email email da popolare * @param cas cas da cui estrarre le annotations */ private void addEmailTo(final Email email, final JCas cas) { Type annotationType = cas.getTypeSystem().getType(UserReceiverToAnnotation.class.getCanonicalName()); FSIterator<Annotation> it = cas.getAnnotationIndex(annotationType).iterator(); // uso la mappa per evitare l'inserimento di email duplicate Hashtable<String, String> emailMap = new Hashtable<String, String>(); while (it.hasNext()) { UserReceiverToAnnotation ann = (UserReceiverToAnnotation) it.next(); emailMap.put(ann.getEmail(), "Y"); } Enumeration<String> en = emailMap.keys(); while (en.hasMoreElements()) { String element = en.nextElement(); email.getEmailTo().add(element); } }
public static void main(String[] args) throws IOException, InvalidXMLException, CASException, ResourceInitializationException, SAXException { JCas jcas = null; // Leer el descriptor del anotador XMLParser xmlParser = UIMAFramework.getXMLParser(); XMLInputSource in = new XMLInputSource("desc/ej4/MetricsAnnotatorPipeline.xml"); // Crear un AE en base al descriptor AnalysisEngineDescription tsDesc = xmlParser.parseAnalysisEngineDescription(in); // Obtener el CAS jcas = CasCreationUtils.createCas(tsDesc).getJCas(); if (jcas != null) { // De-serializar la anotacion de un fichero FileInputStream inputStream = null; inputStream = new FileInputStream("resources/annotation.xmi"); XmiCasDeserializer.deserialize(inputStream, jcas.getCas()); // Obtener el texto de la anotacion String sofaString = jcas.getDocumentText(); System.out.println(sofaString); // Usar las anotaciones del fichero FSIterator it = jcas.getAnnotationIndex(Metric.type).iterator(); while (it.isValid()) { Metric metric = (Metric) it.get(); Number number = metric.getNumber(); Unit unit = metric.getUnit(); Double value = (number.getIsDouble()) ? number.getAbsoluteDoubleValue() : Double.valueOf(number.getAbsoluteIntegerValue()); System.out.println("==================="); System.out.println("Metric: " + metric.getCoveredText()); System.out.println("Real value: " + value * number.getSign() * unit.getMultiplier()); System.out.println("Base unit: " + unit.getBaseUnit()); it.moveToNext(); } } }
/** * Processes the CAS which was populated by the TextAnalysisEngines. <br> * In this case, the CAS is converted to XMI and written into the output file . * * @param aCAS a CAS which has been populated by the TAEs * @throws ResourceProcessException if there is an error in processing the Resource * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS) */ public void processCas(CAS aCAS) throws ResourceProcessException { String modelFileName = null; JCas jcas; try { jcas = aCAS.getJCas(); } catch (CASException e) { throw new ResourceProcessException(e); } // retrieve the filename of the input file from the CAS FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator(); File outFile = null; if (it.hasNext()) { SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next(); File inFile; try { inFile = new File(new URL(fileLoc.getUri()).getPath()); String outFileName = inFile.getName(); if (fileLoc.getOffsetInSource() > 0) { outFileName += ("_" + fileLoc.getOffsetInSource()); } outFileName += ".xmi"; outFile = new File(mOutputDir, outFileName); modelFileName = mOutputDir.getAbsolutePath() + "/" + inFile.getName() + ".ecore"; } catch (MalformedURLException e1) { // invalid URL, use default processing below } } if (outFile == null) { outFile = new File(mOutputDir, "doc" + mDocNum++ + ".xmi"); } // serialize XCAS and write to output file try { writeXmi(jcas.getCas(), outFile, modelFileName); } catch (IOException e) { throw new ResourceProcessException(e); } catch (SAXException e) { throw new ResourceProcessException(e); } }
/** * abnerNER would analyze words and give confidence * * @param args sentences to be processed * @param arg0 information input * @throws AnalysisEngineProcessException */ public static void abnerNER(String[] args, JCas arg0) { HashMap<String, Double> ConfMap = new HashMap<String, Double>(); /** use abner to find gene names from words */ Tagger t = new Tagger(); for (int i = 1; i < args.length; ++i) { String s = args[i]; String[][] ents = t.getEntities(s); /** use HashMap to store words selected by Abner */ for (int j = 0; j < ents[0].length; j++) { ConfMap.put(ents[0][j], 1.0); } } // TODO Auto-generated method stub FSIterator<org.apache.uima.jcas.tcas.Annotation> ite = arg0.getAnnotationIndex(NameTag.type).iterator(); while (ite.hasNext()) { /** get the words selected by LingPipe */ String name = ((NameTag) ite.get()).getText(); /** set the confidence for words selected by both LingPipe and Abner as 1 */ if (ConfMap.containsKey(name)) { ((NameTag) ite.get()).setConfidenceAbner(1.0); } else { ((NameTag) ite.get()).setConfidenceAbner(0.0); } ite.next(); } }