private void improveFrenchSentences(JCas jcas) { HashSet<de.unihd.dbs.uima.types.heideltime.Sentence> hsRemoveAnnotations = new HashSet<de.unihd.dbs.uima.types.heideltime.Sentence>(); HashSet<de.unihd.dbs.uima.types.heideltime.Sentence> hsAddAnnotations = new HashSet<de.unihd.dbs.uima.types.heideltime.Sentence>(); HashSet<String> hsSentenceBeginnings = new HashSet<String>(); hsSentenceBeginnings.add("J.-C."); hsSentenceBeginnings.add("J-C."); hsSentenceBeginnings.add("NSJC"); Boolean changes = true; while (changes) { changes = false; FSIndex annoHeidelSentences = jcas.getAnnotationIndex(de.unihd.dbs.uima.types.heideltime.Sentence.type); FSIterator iterHeidelSent = annoHeidelSentences.iterator(); while (iterHeidelSent.hasNext()) { de.unihd.dbs.uima.types.heideltime.Sentence s1 = (de.unihd.dbs.uima.types.heideltime.Sentence) iterHeidelSent.next(); if ((s1.getCoveredText().endsWith("av.")) || (s1.getCoveredText().endsWith("Av.")) || (s1.getCoveredText().endsWith("apr.")) || (s1.getCoveredText().endsWith("Apr.")) || (s1.getCoveredText().endsWith("avant.")) || (s1.getCoveredText().endsWith("Avant."))) { if (iterHeidelSent.hasNext()) { de.unihd.dbs.uima.types.heideltime.Sentence s2 = (de.unihd.dbs.uima.types.heideltime.Sentence) iterHeidelSent.next(); iterHeidelSent.moveToPrevious(); for (String beg : hsSentenceBeginnings) { if (s2.getCoveredText().startsWith(beg)) { de.unihd.dbs.uima.types.heideltime.Sentence s3 = new de.unihd.dbs.uima.types.heideltime.Sentence(jcas); s3.setBegin(s1.getBegin()); s3.setEnd(s2.getEnd()); hsAddAnnotations.add(s3); hsRemoveAnnotations.add(s1); hsRemoveAnnotations.add(s2); changes = true; break; } } } } } for (de.unihd.dbs.uima.types.heideltime.Sentence s : hsRemoveAnnotations) { s.removeFromIndexes(jcas); } hsRemoveAnnotations.clear(); for (de.unihd.dbs.uima.types.heideltime.Sentence s : hsAddAnnotations) { s.addToIndexes(jcas); } hsAddAnnotations.clear(); } }
private void clean(JCas cas, SingleWordTermAnnotation annotation) { Set<TermComponentAnnotation> delete = new HashSet<TermComponentAnnotation>(); AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermComponentAnnotation.type); FSIterator<Annotation> iterator = index.subiterator(annotation); while (iterator.hasNext()) { TermComponentAnnotation component = (TermComponentAnnotation) iterator.next(); FSIterator<Annotation> subiterator = index.subiterator(component); while (subiterator.hasNext()) { delete.add((TermComponentAnnotation) subiterator.next()); } } for (TermComponentAnnotation del : delete) { del.removeFromIndexes(); } }
/** * Metodo per il process della Cas generata con l'upload del documenti in 'Nuovo pattern'. * * @param cas cas da analizzare * @throws SQLException eccezione durante l'estrazione dei dati */ private void processPatternAttachmentCas(final JCas cas) throws SQLException { HpmDao dao = new HpmDao(); // il valore della property è l'hpmPatternId String kpeopleTagPattern = getKpeopleTagPattern(cas); // recupero e salvo su db gli attachments associati alla creazione pattern List<Document> docs = new ArrayList<Document>(); // recupero l'email dell'utente che ha generato il pattern String email = getUserAuthorAnnotation(cas).getEmail(); AnnotationIndex<Annotation> aaIdx = cas.getAnnotationIndex(AttachmentAnnotation.type); FSIterator<Annotation> itAa = aaIdx.iterator(); while (itAa.hasNext()) { AttachmentAnnotation aa = (AttachmentAnnotation) itAa.next(); Document document = new Document(); document.setAttachmentType(new AttachmentType(2)); document.setGuid(aa.getUrlAttachment()); document.setHashcode(aa.getHashcode()); document.setTemplate(false); document.setHpmAttachmentId(aa.getId()); document.setName(aa.getAttachmentName()); docs.add(document); } docs = dao.savePatternDocument(docs, email, kpeopleTagPattern); }
public static Optional<SourceDocumentInformation> getSourceDocumentAnnotation(JCas jCas) { // zgere FSIterator<Annotation> iterator = jCas.getAnnotationIndex(SourceDocumentInformation.type).iterator(); if (iterator.hasNext()) return Optional.of((SourceDocumentInformation) iterator.next()); else return Optional.absent(); }
public static void showJCas(JCas jcas) { FSIterator<Annotation> it = jcas.getAnnotationIndex().iterator(); Map<String, MutableInt> counters = new TreeMap<String, MutableInt>(); int total = 0; while (it.hasNext()) { total += 1; String annoType = "rien"; try { Annotation annotation = (Annotation) it.next(); annoType = annotation.getType().getName(); } catch (NullPointerException e) { it.moveToNext(); annoType = e.getClass().getCanonicalName(); } if (counters.get(annoType) == null) { counters.put(annoType, new MutableInt(1)); } else { counters.get(annoType).increment(); } } System.out.println( "Total annotation in JCas (ID: " + System.identityHashCode(jcas) + "): " + total); for (String annoType : counters.keySet()) { System.out.println(annoType + ": " + counters.get(annoType)); } }
@Override public void process(JCas cas) throws AnalysisEngineProcessException { this.setSource(cas); this.setTarget(cas); AnnotationIndex<Annotation> index = cas.getAnnotationIndex(this.getSourceType()); FSIterator<Annotation> iter = index.iterator(); while (iter.hasNext()) { Annotation annotation = iter.next(); String source = null; if (this.getSourceFeature() == null) { source = annotation.getCoveredText(); } else { source = annotation.getStringValue(this.getSourceFeature()); } if (source != null) { String target = this.getMapping().get(source); if (target != null) { if (this.update().booleanValue()) { this.update(cas, annotation, this.getTargetFeature(), target); } else { this.create( cas, this.getTargetFeature(), annotation.getBegin(), annotation.getEnd(), target); } } } } }
/** * Return the input file from the CAS Assumes that it has the sourceDocumentInformation (set by * FileSystemCollectionReader or documentAnalyzer.sh) null otherwise */ public static File retrieveSourceDocumentFile(JCas aJCas) throws AnalysisEngineProcessException { FSIterator<Annotation> sourceDocumentInformationFSIterator = aJCas .getAnnotationIndex( JCasSofaViewUtils.getJCasType( aJCas, DEFAULT_SOURCE_DOCUMENT_INFORMATION_ANNOTATION)) .iterator(); File inFile = null; if (sourceDocumentInformationFSIterator.hasNext()) { SourceDocumentInformation theSourceDocumentInformation = (SourceDocumentInformation) sourceDocumentInformationFSIterator.next(); try { inFile = new File(new URL(theSourceDocumentInformation.getUri()).getPath()); // System.out.println("Debug: SourceDocumentInformation File Name "+ inFileName); } catch (MalformedURLException e) { // invalid URL, use default processing below String errmsg = "Error: MalformedURLException !"; throw new AnalysisEngineProcessException(errmsg, new Object[] {}, e); // e.printStackTrace(); } } return inFile; }
/** * abnerNER would analyze words and give confidence * * @param args sentences to be processed * @param arg0 information input * @throws AnalysisEngineProcessException */ public static void abnerNER(String[] args, JCas arg0) { HashMap<String, Double> ConfMap = new HashMap<String, Double>(); /** use abner to find gene names from words */ Tagger t = new Tagger(); for (int i = 1; i < args.length; ++i) { String s = args[i]; String[][] ents = t.getEntities(s); /** use HashMap to store words selected by Abner */ for (int j = 0; j < ents[0].length; j++) { ConfMap.put(ents[0][j], 1.0); } } // TODO Auto-generated method stub FSIterator<org.apache.uima.jcas.tcas.Annotation> ite = arg0.getAnnotationIndex(NameTag.type).iterator(); while (ite.hasNext()) { /** get the words selected by LingPipe */ String name = ((NameTag) ite.get()).getText(); /** set the confidence for words selected by both LingPipe and Abner as 1 */ if (ConfMap.containsKey(name)) { ((NameTag) ite.get()).setConfidenceAbner(1.0); } else { ((NameTag) ite.get()).setConfidenceAbner(0.0); } ite.next(); } }
public void process(JCas aJCas) throws AnalysisEngineProcessException { if (!init) { try { initialize(); } catch (ResourceInitializationException e) { // TODO Auto-generated catch block e.printStackTrace(); } init = true; } // retreive the filename of the input file from the CAS FSIterator it = aJCas.getAnnotationIndex(Product.type).iterator(); File outFile = null; if (it.hasNext()) { Product fileLoc = (Product) it.next(); outFile = new File(mOutputDir, fileLoc.getName() + ".xml"); } if (outFile == null) { outFile = new File(mOutputDir, "doc" + mDocNum++ + ".xml"); } // serialize XCAS and write to output file try { writeXCas(aJCas.getCas(), outFile); } catch (IOException e) { System.err.println("Could not write to output file"); e.printStackTrace(); } catch (SAXException e) { System.out.println("SAX Failure"); e.printStackTrace(); } }
public static void showSdiWithCategory2(JCas jcas) { String wordsLine = ""; String catsLine = ""; int cnt = 0; FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator(); while (it.hasNext()) { cnt += 1; WordAnnotation a = (WordAnnotation) it.next(); String[] strings = center(a.getCoveredText(), a.getTag()); wordsLine += strings[0] + " "; catsLine += strings[1] + " "; if (cnt == 20) { System.out.println(wordsLine); System.out.println(catsLine); System.out.println(); wordsLine = ""; catsLine = ""; cnt = 0; } } if (cnt > 0) { System.out.println(wordsLine); System.out.println(catsLine); } }
private void clean(JCas cas) { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(SingleWordTermAnnotation.type); FSIterator<Annotation> iterator = index.iterator(); while (iterator.hasNext()) { SingleWordTermAnnotation annotation = (SingleWordTermAnnotation) iterator.next(); this.clean(cas, annotation); } }
private void display(JCas cas) { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(SourceDocumentInformation.type); FSIterator<Annotation> iterator = index.iterator(); if (iterator.hasNext()) { SourceDocumentInformation sdi = (SourceDocumentInformation) iterator.next(); this.getContext().getLogger().log(Level.INFO, "Cleaning terms of " + sdi.getUri()); } }
public static int countType(JCas jcas, int type) { FSIterator<Annotation> it = jcas.getAnnotationIndex(type).iterator(); int cnt = 0; while (it.hasNext()) { cnt++; it.next(); } return cnt; }
@Override public void process(final JCas jCas) throws AnalysisEngineProcessException { beforeRuleProcessing(jCas); if (!this.labelFeature.equals(NO_SET_LABEL)) { // Must set labels FSIterator<Annotation> it = jCas.getAnnotationIndex(getIteratedType(jCas)).iterator(); Feature feat = this.getIteratedType(jCas).getFeatureByBaseName(this.labelFeature); while (it.hasNext()) { Annotation word = (Annotation) it.next(); word.setStringValue(feat, this.resource.getMatchingLabelString(word)); } } RecognitionHandler recognitionHandler = new RecognitionHandler() { @Override public void recognizedEpisode(RegexOccurrence episode) { ruleMatched(jCas, episode); } }; for (final Rule rule : this.resource.getRules()) { rule.getAutomaton().setAllowOverlappingInstances(this.allowOverlappingOccurrences); rule.getAutomaton().addRecognitionHandler(recognitionHandler); rule.getAutomaton().reset(); } FSIterator<Annotation> it = jCas.getAnnotationIndex(getIteratedType(jCas)).iterator(); while (it.hasNext()) { Annotation word = (Annotation) it.next(); boolean allRulesFailed = true; for (Rule rule : this.resource.getRules()) { rule.getAutomaton().nextAnnotation(word); allRulesFailed &= rule.getAutomaton().currentInstancesNum() == 0; } if (allRulesFailed) allRulesFailed(jCas); } for (Rule rule : this.resource.getRules()) rule.getAutomaton().finish(); for (final Rule rule : this.resource.getRules()) rule.getAutomaton().removeRecognitionHandler(recognitionHandler); afterRuleProcessing(jCas); }
private void select(JCas cas) { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermAnnotation.type); FSIterator<Annotation> iterator = index.iterator(); while (iterator.hasNext()) { TermAnnotation annotation = (TermAnnotation) iterator.next(); if (annotation.getOccurrences() <= this.getThreshold().intValue()) { this.getAnnotations().add(annotation); } } }
public static String getTermSuiteCasFileName(JCas jcas) { FSIterator<Annotation> it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator(); if (it.hasNext()) { SourceDocumentInformation sdi = (SourceDocumentInformation) it.next(); Iterator<String> iterator = Splitter.on("/").split(sdi.getUri() == null ? "(no uri)" : sdi.getUri()).iterator(); String name = null; while (iterator.hasNext()) name = iterator.next(); return name; } else return null; }
/** * Restituisce l'annotation BodyAnnotation. * * @param cas CAS da elaborare * @return annotation di tipo BodyAnnotation */ private BodyAnnotation getBodyAnnotation(final JCas cas) { Type type = cas.getTypeSystem().getType(BodyAnnotation.class.getCanonicalName()); BodyAnnotation ann = null; FSIterator<Annotation> it = cas.getAnnotationIndex(type).iterator(); if (it.hasNext()) { ann = (BodyAnnotation) it.next(); } return ann; }
public static void showTermFreq(JCas jcas, int num) { FSIterator<Annotation> it = jcas.getAnnotationIndex(TermOccAnnotation.type).iterator(); int cnt = 0; while (it.hasNext()) { cnt += 1; TermOccAnnotation annotation = (TermOccAnnotation) it.next(); if (cnt == num) { System.out.println("TermOccAnnotation n°" + num + ": " + annotation); break; } } }
/** * TODO :: 1. construct the global word dictionary 2. keep the word frequency for each sentence * * <p>Creates two dictionaries queryDictionary and answerDictionary * * <p>queryDictionary is list of maps with key as the words in the question and value as the count * of the word in the question sentence. Similarly answerDictionary is list of maps with key as * the words in the answer and value as the count of the word in the answer sentence. */ @Override public void processCas(CAS aCas) throws ResourceProcessException { JCas jcas; try { jcas = aCas.getJCas(); } catch (CASException e) { throw new ResourceProcessException(e); } FSIterator it = jcas.getAnnotationIndex(Document.type).iterator(); if (it.hasNext()) { Document doc = (Document) it.next(); // Make sure that your previous annotators have populated this in CAS FSList fsTokenList = doc.getTokenList(); ArrayList<Token> tokenList = Utils.fromFSListToCollection(fsTokenList, Token.class); HashMap<String, Integer> myMap = new HashMap<String, Integer>(); HashMap<String, Integer> myMap2 = new HashMap<String, Integer>(); // if question then fill QuesqIdList, QuesrelList & queryDictionary if (doc.getRelevanceValue() == 99) { QuesqIdList.add(doc.getQueryID()); QuesrelList.add(doc.getRelevanceValue()); for (int k = 0; k < tokenList.size(); k++) { myMap.put(tokenList.get(k).getText(), tokenList.get(k).getFrequency()); } queryDictionary.add(myMap); } // if answer then fill AnsqIdList, AnsrelList & answerDictionary else { AnsqIdList.add(doc.getQueryID()); AnsrelList.add(doc.getRelevanceValue()); for (int k = 0; k < tokenList.size(); k++) { myMap2.put(tokenList.get(k).getText(), tokenList.get(k).getFrequency()); } answerDictionary.add(myMap2); if (1 == doc.getRelevanceValue()) { GoldAnswerStringList.put(doc.getQueryID(), doc.getText()); } } // Do something useful here /*for(int i=0;i<tokenList.size();i++) System.out.print(tokenList.get(i).getText().toString()+"=>" + tokenList.get(i).getFrequency()+"\t"); System.out.println();*/ } }
@Override public void process(JCas cas) throws AnalysisEngineProcessException { try { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(WordAnnotation.type); FSIterator<Annotation> iterator = index.iterator(); while (iterator.hasNext()) { WordAnnotation annotation = (WordAnnotation) iterator.next(); String norm = annotation.getCoveredText(); annotation.setLemma(norm); annotation.setStem(norm); } } catch (Exception e) { throw new AnalysisEngineProcessException(e); } }
/** * aggiunge le proprietà presenti. * * @param event eventoda salvare * @param cas oggetto Cas */ private void addProperties(final Event event, final JCas cas) { AnnotationIndex<Annotation> annIdx = cas.getAnnotationIndex(PropertyAnnotation.type); FSIterator<Annotation> it = annIdx.iterator(); while (it.hasNext()) { PropertyAnnotation annotation = (PropertyAnnotation) it.next(); String key = annotation.getKey(); String value = annotation.getValue(); logger.debug("Aggiunta proprietà\nKey: " + key + "\nValue: " + value); event.getProperties().put(key, value); } }
/** * Imposta il campo emailBody per l'oggetto Email. * * @param cas cas da elaborare * @return valore del campo */ private String getEmailBody(final JCas cas) { Type annotationType = cas.getTypeSystem().getType(BodyAnnotation.class.getCanonicalName()); FSIterator<Annotation> it = cas.getAnnotationIndex(annotationType).iterator(); String emailBody = ""; if (it.hasNext()) { BodyAnnotation ann = (BodyAnnotation) it.next(); String value = ann.getValue(); if (value != null) { emailBody = value; } } return emailBody; }
public static void showSdiWithCategory(JCas jcas) { FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator(); int wordCnt = 0; while (it.hasNext()) { wordCnt++; WordAnnotation a = (WordAnnotation) it.next(); System.out.print(a.getCoveredText() + "_" + a.getTag()); if (wordCnt < 12) { System.out.print(" "); } else { System.out.println(); wordCnt = 0; } } System.out.println(Joiner.on(" ").join(it)); }
@Override protected void generateAnnotations(JCas jcas) throws AnalysisEngineProcessException, FeaturePathException { // CAS is necessary to retrieve values CAS currCAS = jcas.getCas(); for (String path : paths) { // Separate Typename and featurepath String[] segments = path.split("/", 2); String typeName = segments[0]; // Try to get the type from the typesystem of the CAS Type t = currCAS.getTypeSystem().getType(typeName); if (t == null) { throw new IllegalStateException("Type [" + typeName + "] not found in type system"); } // get an fpi object and initialize it // initialize the FeaturePathInfo with the corresponding part initializeFeaturePathInfoFrom(fp, segments); // get the annotations AnnotationIndex<?> idx = currCAS.getAnnotationIndex(t); FSIterator<?> iterator = idx.iterator(); while (iterator.hasNext()) { AnnotationFS fs = (AnnotationFS) iterator.next(); try { if (this.filterFeaturePath != null) { // check annotation filter condition if (this.filterFeaturePathInfo.match(fs, this.filterCondition)) { createStemAnnotation(jcas, fs); } } else { // no annotation filter specified createStemAnnotation(jcas, fs); } } catch (AnalysisEngineProcessException e) { // TODO Auto-generated catch block throw new IllegalStateException("error occured while creating a stem annotation", e); } } } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { UIMAProfiler.getProfiler("AnalysisEngine").start(this, "process"); Lemmatizer mateLemmatizer = mateLemmatizerModel.getEngine(); Tagger mateTagger = mateTaggerModel.getEngine(); /* * keeps an array of annotations in memory so as to be able * to access them by index. */ List<WordAnnotation> annotations = Lists.newArrayList(); FSIterator<Annotation> it = jcas.getAnnotationIndex(WordAnnotation.type).iterator(); while (it.hasNext()) { WordAnnotation a = (WordAnnotation) it.next(); annotations.add(a); } String[] tokens = new String[annotations.size() + 2]; // preprends to fake words to prevent Mate from bugging on the two first words tokens[0] = "<root>"; tokens[1] = "<root2>"; for (int i = 0; i < annotations.size(); i++) tokens[i + 2] = annotations.get(i).getCoveredText(); SentenceData09 mateSentence = new SentenceData09(); mateSentence.init(tokens); // Run POS tagging mateSentence = mateTagger.apply(mateSentence); // Run lemmatization mateSentence = mateLemmatizer.apply(mateSentence); WordAnnotation wordAnnotation; for (int j = 1; j < mateSentence.length(); j++) { wordAnnotation = annotations.get(j - 1); wordAnnotation.setTag(mateSentence.ppos[j]); wordAnnotation.setLemma(mateSentence.plemmas[j]); } UIMAProfiler.getProfiler("AnalysisEngine").stop(this, "process"); }
/** * Recupera il valore della proprietà kpeopletagpattern. * * @param cas cas da analizzare * @return valore della proprietà kpeopletagpattern */ private String getKpeopleTagPattern(final JCas cas) { AnnotationIndex<Annotation> annIdx = cas.getAnnotationIndex(PropertyAnnotation.type); FSIterator<Annotation> it = annIdx.iterator(); String kpeopletagpattern = null; while (it.hasNext()) { PropertyAnnotation annotation = (PropertyAnnotation) it.next(); String key = annotation.getKey(); String value = annotation.getValue(); if (key.equals("kpeopletagpattern")) { kpeopletagpattern = value; } } return kpeopletagpattern; }
@Override public void doProcess(JCas aJCas) throws AnalysisEngineProcessException { List<Entity> toRemove = new ArrayList<Entity>(); FSIterator<Annotation> iter = aJCas.getAnnotationIndex(Entity.type).iterator(); while (iter.hasNext()) { Entity e = (Entity) iter.next(); if (e.getConfidence() < confidenceThreshold && (!ignoreZeroConfidence || e.getConfidence() > 0.0)) { toRemove.add(e); getMonitor() .debug( "Low confidence entity found (ID: {}) - this entity will be removed", e.getInternalId()); } } removeFromJCasIndex(toRemove); }
/** * Imposta il campo fieldTo per l'oggetto Email. * * @param email email da popolare * @param cas cas da cui estrarre le annotations */ private void addEmailTo(final Email email, final JCas cas) { Type annotationType = cas.getTypeSystem().getType(UserReceiverToAnnotation.class.getCanonicalName()); FSIterator<Annotation> it = cas.getAnnotationIndex(annotationType).iterator(); // uso la mappa per evitare l'inserimento di email duplicate Hashtable<String, String> emailMap = new Hashtable<String, String>(); while (it.hasNext()) { UserReceiverToAnnotation ann = (UserReceiverToAnnotation) it.next(); emailMap.put(ann.getEmail(), "Y"); } Enumeration<String> en = emailMap.keys(); while (en.hasMoreElements()) { String element = en.nextElement(); email.getEmailTo().add(element); } }
private void adjust(JCas cas) { AnnotationIndex<Annotation> index = cas.getAnnotationIndex(TermAnnotation.type); FSIterator<Annotation> iterator = index.iterator(); while (iterator.hasNext()) { TermAnnotation annotation = (TermAnnotation) iterator.next(); if (annotation.getVariants() != null) { int occ = annotation.getOccurrences(); double freq = annotation.getFrequency(); double spec = annotation.getSpecificity(); for (int i = 0; i < annotation.getVariants().size(); i++) { occ += annotation.getVariants(i).getOccurrences(); freq += annotation.getVariants(i).getFrequency(); spec += annotation.getVariants(i).getSpecificity(); } annotation.setOccurrences(occ); annotation.setFrequency(freq); annotation.setSpecificity(spec); } } }
/** * Processes the CAS which was populated by the TextAnalysisEngines. <br> * In this case, the CAS is converted to XMI and written into the output file . * * @param aCAS a CAS which has been populated by the TAEs * @throws ResourceProcessException if there is an error in processing the Resource * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS) */ public void processCas(CAS aCAS) throws ResourceProcessException { String modelFileName = null; JCas jcas; try { jcas = aCAS.getJCas(); } catch (CASException e) { throw new ResourceProcessException(e); } // retrieve the filename of the input file from the CAS FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator(); File outFile = null; if (it.hasNext()) { SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next(); File inFile; try { inFile = new File(new URL(fileLoc.getUri()).getPath()); String outFileName = inFile.getName(); if (fileLoc.getOffsetInSource() > 0) { outFileName += ("_" + fileLoc.getOffsetInSource()); } outFileName += ".xmi"; outFile = new File(mOutputDir, outFileName); modelFileName = mOutputDir.getAbsolutePath() + "/" + inFile.getName() + ".ecore"; } catch (MalformedURLException e1) { // invalid URL, use default processing below } } if (outFile == null) { outFile = new File(mOutputDir, "doc" + mDocNum++ + ".xmi"); } // serialize XCAS and write to output file try { writeXmi(jcas.getCas(), outFile, modelFileName); } catch (IOException e) { throw new ResourceProcessException(e); } catch (SAXException e) { throw new ResourceProcessException(e); } }