/** * @param key * @param options */ private void addSuffixPluralPattern(String key, HashMap<String, String> options) { String option = options.get(key); if (option != null) { patternMap.put( key, Pattern.compile("\\b(\\w{" + minPrefixLength + ",})(" + option + ")s?\\b")); } }
/** * @param key * @param options */ private void addWordPattern(String key, HashMap<String, String> options) { String option = options.get(key); if (option != null) { patternMap.put(key, Pattern.compile("\\b(" + option + ")\\b")); } }
@Override public void execute() throws ExecutionException { interrupted = false; // quit if setup failed if (gracefulExit) { gracefulExit("Plugin was not initialised correctly. Exiting gracefully ... "); return; } AnnotationSet inputAS = (inputASName == null || inputASName.trim().length() == 0) ? document.getAnnotations() : document.getAnnotations(inputASName); AnnotationSet outputAS = (outputASName == null || outputASName.trim().length() == 0) ? document.getAnnotations() : document.getAnnotations(outputASName); AnnotationSet sentenceAS = null; if (sentenceType != null && !sentenceType.isEmpty()) { sentenceAS = inputAS.get(sentenceType); } // Document content String docContent = document.getContent().toString(); int docLen = docContent.length(); // For matching purposes replace all whitespace characters with a single space docContent = docContent.replaceAll("[\\s\\xA0\\u2007\\u202F]", " "); fireStatusChanged("Locating anatomy, disease and procedure mentions in " + document.getName()); fireProgressChanged(0); if (sentenceAS != null) { for (Annotation sentence : sentenceAS) { Long sentStartOffset = sentence.getStartNode().getOffset(); Long sentEndOffset = sentence.getEndNode().getOffset(); // Converting the sentence to lower case prevents the need to use case-insenstive regex // matching, which should give a small performance boost String sentenceContent = docContent .substring(sentStartOffset.intValue(), sentEndOffset.intValue()) .toLowerCase(Locale.ENGLISH); if (diseaseType != null && !diseaseType.isEmpty()) { doMatch( patternMap.get("disease_suffix"), sentenceContent, inputAS, outputAS, "suffDisease", sentStartOffset, docLen); doMatch( patternMap.get("disease_abbrevs"), sentenceContent, inputAS, outputAS, "preDisease", sentStartOffset, docLen); doMatch( patternMap.get("disease_named_syndrome"), sentenceContent, inputAS, outputAS, "namedDisease", sentStartOffset, docLen); doMatch( patternMap.get("disease_sense"), sentenceContent, inputAS, outputAS, "tmpDiseaseSense", sentStartOffset, docLen); doMatch( patternMap.get("disease_sense_context"), sentenceContent, inputAS, outputAS, "tmpDiseaseSenseContext", sentStartOffset, docLen); doMatch( patternMap.get("disease_generic_context"), sentenceContent, inputAS, outputAS, "poDisease", sentStartOffset, docLen); doMatch( patternMap.get("disease_anatomy_context"), sentenceContent, inputAS, outputAS, "tmpDisease", sentStartOffset, docLen); } if (procedureType != null && !procedureType.isEmpty()) { doMatch( patternMap.get("procedure_suffix"), sentenceContent, inputAS, outputAS, "poProcedure", sentStartOffset, docLen); doMatch( patternMap.get("procedure_key"), sentenceContent, inputAS, outputAS, "poProcedure", sentStartOffset, docLen); doMatch( patternMap.get("procedure_anatomy_context"), sentenceContent, inputAS, outputAS, "tmpProcedure", sentStartOffset, docLen); } if (symptomType != null && !symptomType.isEmpty()) { doMatch( patternMap.get("symptom_key"), sentenceContent, inputAS, outputAS, "poSymptom", sentStartOffset, docLen); } if (testType != null && !testType.isEmpty()) { doMatch( patternMap.get("test_key"), sentenceContent, inputAS, outputAS, "poTest", sentStartOffset, docLen); } if (anatomyType != null && !anatomyType.isEmpty()) { doMatch( patternMap.get("anatomy_suffix_adjective"), sentenceContent, inputAS, outputAS, "tmpAnatSuffAdj", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_suffix"), sentenceContent, inputAS, outputAS, "tmpAnatSuff", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_prefix"), sentenceContent, inputAS, outputAS, "tmpAnatPre", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_position"), sentenceContent, inputAS, outputAS, "tmpAnatPos", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_space_region_junction"), sentenceContent, inputAS, outputAS, "tmpAnatSpace", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_part_adjective"), sentenceContent, inputAS, outputAS, "tmpAnatAdj", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_latin_noun"), sentenceContent, inputAS, outputAS, "tmpAnatLatin", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_muscle"), sentenceContent, inputAS, outputAS, "tmpAnatMuscle", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_part"), sentenceContent, inputAS, outputAS, "tmpAnatPart", sentStartOffset, docLen); doMatch( patternMap.get("anatomy_fluid"), sentenceContent, inputAS, outputAS, "tmpAnatFluid", sentStartOffset, docLen); } } // Run JAPE transducer to clean up the output fireStatusChanged( "Processing anatomical, disease and procedure mentions in " + document.getName()); try { japeTransducer.setDocument(document); japeTransducer.setInputASName(inputASName); japeTransducer.setOutputASName(outputASName); japeTransducer.addProgressListener(this); japeTransducer.execute(); } catch (ExecutionException re) { gate.util.Err.println("Unable to run " + japeURL); gracefulExit = true; } finally { japeTransducer.setDocument(null); } // rename temporary annotations if (!debug) { renameAnnotations(outputAS, "tmpAnatomicalTerm", anatomyType); renameAnnotations(outputAS, "suffDisease", diseaseType); renameAnnotations(outputAS, "poDisease", diseaseType); renameAnnotations(outputAS, "preDisease", diseaseType); renameAnnotations(outputAS, "poProcedure", procedureType); renameAnnotations(outputAS, "poSymptom", symptomType); renameAnnotations(outputAS, "poTest", testType); } } else { gracefulExit("No sentences to process!"); } // want list of disease key words plus symptoms such as oedema? or just diseases fireProcessFinished(); } // end execute()