private void runAnalysis( String sourceOntologyFile, String sourceOntologyName, String targetOntologyFile, String targetOntologyName, String referenceAlignmentFile) { LOG.info("Loading ontology " + sourceOntologyFile); // load source ontology Ontology sourceOntology = null; try { OntologyDefinition sourceOntDef = new OntologyDefinition( true, sourceOntologyFile, OntologyLanguage.OWL, OntologySyntax.RDFXML); OntoTreeBuilder sourceBuilder = new OntoTreeBuilder(sourceOntDef); sourceBuilder.build(); sourceOntology = sourceBuilder.getOntology(); } catch (Exception e) { e.printStackTrace(); } LOG.info("Loading ontology " + targetOntologyFile); // load target ontology Ontology targetOntology = null; try { OntologyDefinition targetOntDef = new OntologyDefinition( true, targetOntologyFile, OntologyLanguage.OWL, OntologySyntax.RDFXML); OntoTreeBuilder targetBuilder = new OntoTreeBuilder(targetOntDef); targetBuilder.build(); targetOntology = targetBuilder.getOntology(); } catch (Exception e) { e.printStackTrace(); } // set the settings for the matcher matcherToAnalyze.setSourceOntology(sourceOntology); matcherToAnalyze.setTargetOntology(targetOntology); matcherToAnalyze.setPerformSelection(false); if (matcherToAnalyze.needsParam()) matcherToAnalyze.setParam(prefParams); matcherToAnalyze.setMaxSourceAlign(prefSourceCardinality); matcherToAnalyze.setMaxTargetAlign(prefTargetCardinality); try { matcherToAnalyze.match(); } catch (Exception e) { LOG.error("Analysis aborted.", e); return; } // load the reference file ReferenceAlignmentParameters refParam = new ReferenceAlignmentParameters(); refParam.onlyEquivalence = true; refParam.fileName = referenceAlignmentFile; refParam.format = ReferenceAlignmentMatcher.OAEI; AbstractMatcher referenceAlignmentMatcher; try { referenceAlignmentMatcher = MatcherFactory.getMatcherInstance(ReferenceAlignmentMatcher.class); } catch (MatcherNotFoundException e1) { LOG.error("Analysis aborted.", e1); return; } referenceAlignmentMatcher.setParam(refParam); referenceAlignmentMatcher.setSourceOntology(sourceOntology); referenceAlignmentMatcher.setTargetOntology(targetOntology); try { referenceAlignmentMatcher.match(); } catch (Exception e) { LOG.error("Analysis aborted.", e); return; } // open the output files File outputPrecision = new File( outputDirectory + "/" + outputPrefix + "-" + sourceOntologyName + "-" + targetOntologyName + "-precision.txt"); File outputRecall = new File( outputDirectory + "/" + outputPrefix + "-" + sourceOntologyName + "-" + targetOntologyName + "-recall.txt"); File outputFMeasure = new File( outputDirectory + "/" + outputPrefix + "-" + sourceOntologyName + "-" + targetOntologyName + "-fmeasure.txt"); File outputMaxFM = new File( outputDirectory + "/" + outputPrefix + "-" + sourceOntologyName + "-" + targetOntologyName + "-max-fmeasure.txt"); try { BufferedWriter writerPrecision = new BufferedWriter(new FileWriter(outputPrecision)); BufferedWriter writerRecall = new BufferedWriter(new FileWriter(outputRecall)); BufferedWriter writerFMeasure = new BufferedWriter(new FileWriter(outputFMeasure)); BufferedWriter writerMaxFM = new BufferedWriter(new FileWriter(outputMaxFM)); // ok, we ran the matcher, now do the threshold analysis double maxFMeasure = 0.0; double maxFMTh = 0.0; for (double currentThreshold = prefStartThreshold; currentThreshold < prefEndThreshold; currentThreshold += prefThresholdIncrement) { currentThreshold = Utility.roundDouble(currentThreshold, 4); LOG.info("Selecting with threshold = " + currentThreshold); matcherToAnalyze.getParam().threshold = currentThreshold; matcherToAnalyze.select(); ReferenceEvaluationData currentEvaluation = ReferenceEvaluator.compare( matcherToAnalyze.getAlignment(), referenceAlignmentMatcher.getAlignment()); writerPrecision.write( currentThreshold + "," + Utility.roundDouble(currentEvaluation.getPrecision(), 2) + "\n"); writerRecall.write( currentThreshold + "," + Utility.roundDouble(currentEvaluation.getRecall(), 2) + "\n"); writerFMeasure.write( currentThreshold + "," + Utility.roundDouble(currentEvaluation.getFmeasure(), 2) + "\n"); LOG.info( "Results: (precision, recall, f-measure) = (" + Utility.roundDouble(currentEvaluation.getPrecision() * 100.0d, 2) + ", " + Utility.roundDouble(currentEvaluation.getRecall() * 100.0d, 2) + ", " + Utility.roundDouble(currentEvaluation.getFmeasure() * 100.0d, 2) + ")"); LOG.info( " : (found mappings, correct mappings, reference mappings) = (" + currentEvaluation.getFound() + ", " + currentEvaluation.getCorrect() + ", " + currentEvaluation.getExist() + ")"); if (maxFMeasure < currentEvaluation.getFmeasure()) { maxFMeasure = currentEvaluation.getFmeasure(); maxFMTh = Utility.roundDouble(currentThreshold * 100f, 4); } } writerMaxFM.write( Utility.roundDouble(maxFMTh, 2) + ", " + Utility.roundDouble(maxFMeasure, 2)); writerPrecision.close(); writerRecall.close(); writerFMeasure.close(); writerMaxFM.close(); } catch (IOException e) { // cannot create files e.printStackTrace(); return; } // analysis done }
/** * TODO: Check to see if this method could be merged with the work done in the {@link * ThresholdAnalysis#runAnalysis()} method. * * @param reference it can be either an Alignment<Mapping> or a List<MatchingPair> */ public static ThresholdAnalysisData thresholdAnalysis( AbstractMatcher toBeEvaluated, Object reference, double[] thresholds, boolean removeDuplicates) { Alignment<Mapping> referenceSet = null; List<MatchingPair> referencePairs = null; if (reference instanceof Alignment) referenceSet = (Alignment<Mapping>) reference; else if (reference instanceof List) referencePairs = (List<MatchingPair>) reference; else return null; double step = 0.05; if (thresholds == null) thresholds = Utility.getDoubleArray(0.0d, 0.01d, 101); ReferenceEvaluationData maxrd = null; ReferenceEvaluationData rd; Alignment<Mapping> evaluateSet; double maxTh = step; double sumPrecision = 0; double sumRecall = 0; double sumFmeasure = 0; int sumFound = 0; int sumCorrect = 0; String matcherName = toBeEvaluated.getName(); ThresholdAnalysisData tad = new ThresholdAnalysisData(thresholds); tad.setMatcherName(matcherName); String report = matcherName + "\n\n"; double th; report += "Threshold:\tFound\tCorrect\tReference\tPrecision\tRecall\tF-Measure\n"; // output the info to the console for easy copy/pasting // System.out.println("Threshold, " + // "Precision, " + // "Recall, " + // "F-Measure" ); for (int t = 0; t < thresholds.length; t++) { th = thresholds[t]; toBeEvaluated.getParam().threshold = th; toBeEvaluated.select(); evaluateSet = toBeEvaluated.getAlignment(); if (referenceSet != null) rd = ReferenceEvaluator.compare(evaluateSet, referenceSet); else { Alignment<Mapping> alignment = toBeEvaluated.getAlignment(); List<MatchingPair> pairs = AlignmentUtilities.alignmentToMatchingPairs(alignment); AlignmentUtilities.removeDuplicates(referencePairs); AlignmentUtilities.removeDuplicates(pairs); rd = AlignmentUtilities.compare(pairs, referencePairs); tad.addEvaluationData(rd); } report += Utility.getNoDecimalPercentFromDouble(th) + "\t" + rd.getMeasuresLine(); sumPrecision += rd.getPrecision(); sumRecall += rd.getRecall(); sumFmeasure += rd.getFmeasure(); sumFound += rd.getFound(); sumCorrect += rd.getCorrect(); // output this information to the console for easy copy/pasting // TODO: make a button to be // able to copy/paste this info // System.out.println(Double.toString(th) + ", " + // Double.toString(rd.getPrecision()) + ", " + // Double.toString(rd.getRecall()) + ", " + // Double.toString(rd.getFmeasure()) ); if (maxrd == null || maxrd.getFmeasure() < rd.getFmeasure()) { maxrd = rd; maxTh = th; } } toBeEvaluated.getParam().threshold = maxTh; toBeEvaluated.select(); toBeEvaluated.setRefEvaluation(maxrd); report += "Best Run:\n"; report += Utility.getNoDecimalPercentFromDouble(maxTh) + "\t" + maxrd.getMeasuresLine(); sumPrecision /= thresholds.length; sumRecall /= thresholds.length; sumFmeasure /= thresholds.length; sumFound /= thresholds.length; sumCorrect /= thresholds.length; report += "Average:\t" + sumFound + "\t" + sumCorrect + "\t" + maxrd.getExist() + "\t" + Utility.getOneDecimalPercentFromDouble(sumPrecision) + "\t" + Utility.getOneDecimalPercentFromDouble(sumRecall) + "\t" + Utility.getOneDecimalPercentFromDouble(sumFmeasure) + "\n\n"; tad.setReport(report); return tad; }
/** This is a single matcher mode. The matcher should have been already executed. */ private void runSingleAnalysis() { // load the reference file ReferenceAlignmentParameters refParam = new ReferenceAlignmentParameters(); refParam.onlyEquivalence = true; refParam.fileName = singleRunReferenceAlignment; refParam.format = ReferenceAlignmentMatcher.OAEI; AbstractMatcher referenceAlignmentMatcher; try { referenceAlignmentMatcher = MatcherFactory.getMatcherInstance(ReferenceAlignmentMatcher.class); } catch (MatcherNotFoundException e1) { LOG.error("Analysis aborted.", e1); return; } referenceAlignmentMatcher.setParam(refParam); try { referenceAlignmentMatcher.match(); } catch (Exception e) { LOG.error("Analysis aborted.", e); return; } String sourceOntologyName = Core.getInstance().getSourceOntology().getTitle(); String targetOntologyName = Core.getInstance().getTargetOntology().getTitle(); File outputPrecision, outputRecall, outputFMeasure, outputMaxFM; // open the output files if (prefFilenameOntologyNames) { outputPrecision = new File( outputDirectory + "/" + outputPrefix + "-" + sourceOntologyName + "-" + targetOntologyName + "-precision.txt"); outputRecall = new File( outputDirectory + "/" + outputPrefix + "-" + sourceOntologyName + "-" + targetOntologyName + "-recall.txt"); outputFMeasure = new File( outputDirectory + "/" + outputPrefix + "-" + sourceOntologyName + "-" + targetOntologyName + "-fmeasure.txt"); outputMaxFM = new File( outputDirectory + "/" + outputPrefix + "-" + sourceOntologyName + "-" + targetOntologyName + "-max-fmeasure.txt"); } else { outputPrecision = new File(outputDirectory + "/" + outputPrefix + "-" + "-precision.txt"); outputRecall = new File(outputDirectory + "/" + outputPrefix + "-" + "-recall.txt"); outputFMeasure = new File(outputDirectory + "/" + outputPrefix + "-" + "-fmeasure.txt"); outputMaxFM = new File(outputDirectory + "/" + outputPrefix + "-" + "-max-fmeasure.txt"); } try { BufferedWriter writerPrecision = new BufferedWriter(new FileWriter(outputPrecision)); BufferedWriter writerRecall = new BufferedWriter(new FileWriter(outputRecall)); BufferedWriter writerFMeasure = new BufferedWriter(new FileWriter(outputFMeasure)); BufferedWriter writerMaxFM = new BufferedWriter(new FileWriter(outputMaxFM)); // ok, we ran the matcher, now do the threshold analysis double maxFMeasure = 0.0; double maxFMTh = 0.0; for (double currentThreshold = prefStartThreshold; currentThreshold < prefEndThreshold; currentThreshold += prefThresholdIncrement) { currentThreshold = Utility.roundDouble(currentThreshold, 4); LOG.info("Selecting with threshold = " + currentThreshold); matcherToAnalyze.getParam().threshold = currentThreshold; matcherToAnalyze.select(); ReferenceEvaluationData currentEvaluation = ReferenceEvaluator.compare( matcherToAnalyze.getAlignment(), referenceAlignmentMatcher.getAlignment()); double th = Utility.roundDouble(currentThreshold * 100f, 4); writerPrecision.write( th + "," + Utility.roundDouble(currentEvaluation.getPrecision() * 100.0d, 2) + "\n"); writerRecall.write( th + "," + Utility.roundDouble(currentEvaluation.getRecall() * 100.0d, 2) + "\n"); writerFMeasure.write( th + "," + Utility.roundDouble(currentEvaluation.getFmeasure() * 100.0d, 2) + "\n"); LOG.info( "Results: (precision, recall, f-measure) = (" + Utility.roundDouble(currentEvaluation.getPrecision() * 100.0d, 2) + ", " + Utility.roundDouble(currentEvaluation.getRecall() * 100.0d, 2) + ", " + Utility.roundDouble(currentEvaluation.getFmeasure() * 100.0d, 2) + ")"); LOG.info( " : (found mappings, correct mappings, reference mappings) = (" + currentEvaluation.getFound() + ", " + currentEvaluation.getCorrect() + ", " + currentEvaluation.getExist() + ")"); writerPrecision.flush(); writerRecall.flush(); writerFMeasure.flush(); if (maxFMeasure < currentEvaluation.getFmeasure()) { maxFMeasure = currentEvaluation.getFmeasure(); maxFMTh = Utility.roundDouble(currentThreshold * 100f, 4); } } writerMaxFM.write(maxFMTh + ", " + Utility.roundDouble(maxFMeasure * 100.0d, 2)); writerPrecision.close(); writerRecall.close(); writerFMeasure.close(); writerMaxFM.close(); } catch (IOException e) { // cannot create files e.printStackTrace(); return; } }