@Override public void execute() throws ExecutionException { interrupted = false; // quit if setup failed if (gracefulExit) { gracefulExit("Plugin was not initialised correctly. Exiting gracefully ... "); return; } AnnotationSet inputAS = (inputASName == null || inputASName.trim().length() == 0) ? document.getAnnotations() : document.getAnnotations(inputASName); AnnotationSet outputAS = (outputASName == null || outputASName.trim().length() == 0) ? document.getAnnotations() : document.getAnnotations(outputASName); AnnotationSet sentenceAS = null; if (sentenceType != null && !sentenceType.isEmpty()) { sentenceAS = inputAS.get(sentenceType); } // Document content String docContent = document.getContent().toString(); int docLen = docContent.length(); // For matching purposes replace all whitespace characters with a single space docContent = docContent.replaceAll("[\\s\\xA0\\u2007\\u202F]", " "); fireStatusChanged("Locating anatomy, disease and procedure mentions in " + document.getName()); fireProgressChanged(0); if (sentenceAS != null) { for (Annotation sentence : sentenceAS) { Long sentStartOffset = sentence.getStartNode().getOffset(); Long sentEndOffset = sentence.getEndNode().getOffset(); // Converting the sentence to lower case prevents the need to use case-insenstive regex // matching, which should give a small performance boost String sentenceContent = docContent .substring(sentStartOffset.intValue(), sentEndOffset.intValue()) .toLowerCase(Locale.ENGLISH); if (diseaseType != null && !diseaseType.isEmpty()) { doMatch( patternMap.get("disease_suffix"), sentenceContent, inputAS, outputAS, "suffDisease", sentStartOffset, docLen); doMatch( patternMap.get("disease_abbrevs"), sentenceContent, inputAS, outputAS, "preDisease", sentStartOffset, docLen); doMatch( patternMap.get("disease_named_syndrome"), sentenceContent, inputAS, outputAS, "namedDisease", sentStartOffset, docLen); doMatch( patternMap.get("disease_sense"), sentenceContent, inputAS, outputAS, "tmpDiseaseSense", sentStartOffset, docLen); doMatch( patternMap.get("disease_sense_context"), sentenceContent, inputAS, outputAS, "tmpDiseaseSenseContext", sentStartOffset, docLen); doMatch( patternMap.get("disease_generic_context"), sentenceContent, inputAS, outputAS, "poDisease", sentStartOffset, docLen); doMatch( patternMap.get("disease_anatomy_context"), sentenceContent, inputAS, outputAS, "tmpDisease", sentStartOffset, docLen); } if (procedureType != null && !procedureType.isEmpty()) { doMatch( patternMap.get("procedure_suffix"), sentenceContent, inputAS, outputAS, "poProcedure", sentStartOffset, docLen); doMatch( patternMap.get("procedure_key"), sentenceContent, inputAS, outputAS, "poProcedure", sentStartOffset, docLen); doMatch( patternMap.get("procedure_anatomy_context"), sentenceContent, inputAS, outputAS, "tmpProcedure", sentStartOffset, docLen); } if (symptomType != null && !symptomType.isEmpty()) { doMatch( patternMap.get("symptom_key"), sentenceContent, inputAS, outputAS, "poSymptom", sentStartOffset, docLen); } if (testType != null && !testType.isEmpty()) { doMatch( patternMap.get("test_key"), sentenceContent, inputAS, outputAS, "poTest", sentStartOffset, docLen); } if (anatomyType != null && !anatomyType.isEmpty()) { doMatch( patternMap.get("anatomy_suffix_adjective"), sentenceContent, inputAS, outputAS, "tmpAnatSuffAdj", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_suffix"), sentenceContent, inputAS, outputAS, "tmpAnatSuff", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_prefix"), sentenceContent, inputAS, outputAS, "tmpAnatPre", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_position"), sentenceContent, inputAS, outputAS, "tmpAnatPos", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_space_region_junction"), sentenceContent, inputAS, outputAS, "tmpAnatSpace", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_part_adjective"), sentenceContent, inputAS, outputAS, "tmpAnatAdj", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_latin_noun"), sentenceContent, inputAS, outputAS, "tmpAnatLatin", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_muscle"), sentenceContent, inputAS, outputAS, "tmpAnatMuscle", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_part"), sentenceContent, inputAS, outputAS, "tmpAnatPart", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_fluid"), sentenceContent, inputAS, outputAS, "tmpAnatFluid", sentStartOffset, docLen); } } // Run JAPE transducer to clean up the output fireStatusChanged( "Processing anatomical, disease and procedure mentions in " + document.getName()); try { japeTransducer.setDocument(document); japeTransducer.setInputASName(inputASName); japeTransducer.setOutputASName(outputASName); japeTransducer.addProgressListener(this); japeTransducer.execute(); } catch (ExecutionException re) { gate.util.Err.println("Unable to run " + japeURL); gracefulExit = true; } finally { japeTransducer.setDocument(null); } // rename temporary annotations if (!debug) { renameAnnotations(outputAS, "tmpAnatomicalTerm", anatomyType); renameAnnotations(outputAS, "suffDisease", diseaseType); renameAnnotations(outputAS, "poDisease", diseaseType); renameAnnotations(outputAS, "preDisease", diseaseType); renameAnnotations(outputAS, "poProcedure", procedureType); renameAnnotations(outputAS, "poSymptom", symptomType); renameAnnotations(outputAS, "poTest", testType); } } else { gracefulExit("No sentences to process!"); } // want list of disease key words plus symptoms such as oedema? or just diseases fireProcessFinished(); } // end execute()
@Override public synchronized void interrupt() { super.interrupt(); japeTransducer.interrupt(); }
@Override public Resource init() throws ResourceInstantiationException { gracefulExit = false; if (configFileURL == null) { gracefulExit = true; gate.util.Err.println("No configuration file provided!"); } if (japeURL == null) { gracefulExit = true; gate.util.Err.println("No JAPE grammar file provided!"); } // create the init params for the JAPE transducer FeatureMap params = Factory.newFeatureMap(); params.put(Transducer.TRANSD_GRAMMAR_URL_PARAMETER_NAME, japeURL); // Code borrowed from Mark Greenwood's Measurements PR if (japeTransducer == null) { // if this is the first time we are running init then actually create a // new transducer as we don't already have one FeatureMap hidden = Factory.newFeatureMap(); Gate.setHiddenAttribute(hidden, true); japeTransducer = (Transducer) Factory.createResource("gate.creole.Transducer", params, hidden); } else { // we are being run through a call to reInit so simply re-init the // underlying JAPE transducer japeTransducer.setParameterValues(params); japeTransducer.reInit(); } ConfigReader config = new ConfigReader(configFileURL); gracefulExit = config.config(); try { HashMap<String, String> options = config.getOptions(); patternMap = new HashMap<String, Pattern>(); addSuffixPattern("disease_suffix", options); addWordPattern("disease_abbrevs", options); addWordPattern("disease_sense", options); addWordExtraPattern("disease_sense_context", options); addPossessiveWordPattern("disease_named_syndrome", options); addWordExtraPattern("disease_generic_context", options); addWordExtraPattern("disease_anatomy_context", options); addSuffixPluralPattern("procedure_suffix", options); addWordPluralPattern("procedure_key", options); addWordExtraPattern("procedure_anatomy_context", options); addWordPluralPattern("symptom_key", options); addWordPattern("test_key", options); addSuffixPattern("anatomy_suffix_adjective", options); addSuffixPattern("anatomy_suffix", options); addPrefixPattern("anatomy_prefix", options); addWordPattern("anatomy_position", options); addWordPluralPattern("anatomy_space_region_junction", options); addWordPattern("anatomy_part_adjective", options); addWordPattern("anatomy_latin_noun", options); addWordPattern("anatomy_muscle", options); addWordPluralPattern("anatomy_part", options); addWordPluralPattern("anatomy_fluid", options); } catch (NullPointerException ne) { gracefulExit = true; gate.util.Err.println( "Missing or unset configuration options. Please check configuration file."); } return this; } // end init()