private static boolean retainTerm(LoomTerm term) { if (termRetainer.isEmpty()) { // There are no term labels to compare. return termRetainer.add(term); } LoomTerm retainedTerm = termRetainer.get(0); if (retainedTerm.matchLabel(term)) { return termRetainer.add(term); } return false; }
/** * Construct an ontology {@link LoomTerm}, by reading a line of text that contains: label type * ontologyID virtualOntologyID. * * @throws IOException */ private static LoomTerm readTerm() throws IOException { LoomTerm iTerm = null; String iLine = iReader.readLine(); while (iLine != null) { log.trace("iLine = " + iLine); // Skip blank or comment lines if (iLine.length() == 0 || iLine.startsWith("#")) { iLine = iReader.readLine(); continue; } iTerm = new LoomTerm(); iTerm.parseTerm(iLine, delimiter); break; } return iTerm; }
/** * @param args args[0] may contain a file path for input, which can be used when calling this * class from a shell script. */ public static void main(String[] args) { try { // Configure input file(s). if (args.length > 0) { iFileName = args[0]; } createInputStream(iFileName); // Configure output file(s). if (args.length > 1) { oFileName = args[1]; } if (oFileName != null) { // Create the first of N output files. createOutputStream(oFileName); } log.trace("iFileName = " + iFileName); // Populate an array of term labels to be ignored. These are used in // processMatchingTerms(). skipLabels.add("none"); skipLabels.add("other"); skipLabels.add("unidentified"); skipLabels.add("unknown"); skipLabels.add("unspecified"); // The label 'all' is often poorly used in ontologies, so // it's vague and often generates poor matches in LOOM. skipLabels.add("all"); // The label 'chair' has been found to match 'chairman' // or 'chairperson' erroneously. skipLabels.add("chair"); // Skip matches between 'cell' and 'cellulartelephone' cellphone[0] = "cellulartelephone"; cellphone[1] = "cell"; /* * Process sorted terms. The algorithm reads N consecutive lines * that have matching labels and reports all their matching terms. */ boolean retain = true; while (retain) { LoomTerm iTerm = readTerm(); if (iTerm == null) { // At the EOF for the terms to be processed. break; } log.trace("iTerm = " + iTerm.toString()); // Retain this term if it's label matches a retained term. retain = retainTerm(iTerm); if (!retain) { // This iTerm doesn't match any retained terms, so // process and clear the terms already retained. processMatchingTerms(); // Start a new retention set. retain = retainTerm(iTerm); } log.trace("retain = " + retain); } // Process any remaining set of retained terms (at EOF). processMatchingTerms(); } catch (Exception e) { log.fatal(e.toString()); e.printStackTrace(); System.exit(1); } }
private static void processMatchingTerms() throws Exception { // Report all the matches between terms. LoomTerm term1 = null; LoomTerm term2 = null; while (!termRetainer.isEmpty()) { term1 = termRetainer.remove(0); Iterator<LoomTerm> itr = termRetainer.iterator(); while (itr.hasNext()) { term2 = itr.next(); // Don't report any matches within an ontology. if (term1.matchOntID(term2)) { log.trace("Skipping terms, same ontology:"); log.trace("... {}", term1.toString()); log.trace("... {}", term2.toString()); continue; } // Detect any exact match for term URI. if (term1.matchURL(term2)) { // This is handled by the UriMatchMappings process log.trace("Skipping terms, same URL:"); log.trace("... {}", term1.toString()); log.trace("... {}", term2.toString()); continue; } // Don't report matches between synonyms. if (term1.isSkosAltLabel() && term2.isSkosAltLabel()) { log.trace("Skipping terms, both altLabel:"); log.trace("... {}", term1.toString()); log.trace("... {}", term2.toString()); continue; } // Ignore some term labels. if (skipLabels(term1, term2) || skipCellPhone(term1, term2)) { log.trace("Skipping terms, a label is problematic:"); log.trace("... {}", term1.toString()); log.trace("... {}", term2.toString()); continue; } createLoomMappings(term1, term2); } } // Should not be necessary, but just to be sure! termRetainer.clear(); // Do we need a new output file? if (oFileName != null && oMapCount > oMapCountMax) { createOutputStream(oFileName); oMapCount = 0; } }