public void testDefault() throws Exception { Heuristics heuristics = RuleBasedHeuristics.getDefaultInstance(); FileSystemManager fsm = VFS.getManager(); FileObject root = fsm.resolveFile( RuleBasedHeuristicsTest.class.getResource("/heuristics/.root").toExternalForm()) .getParent(); for (Reason reason : Reason.values()) { for (FileObject file : root.resolveFile(reason.toString().toLowerCase()).getChildren()) { InputStream is = file.getContent().getInputStream(); DeliveryStatus ds = new DeliveryStatus(is); is.close(); assertEquals( "Reason for " + file, reason, heuristics.getReason(ds.getPerRecipientParts()[0].getDiagnostic())); } } }
// exact computation for 5 heuristics; each one adapted to super class learning; // each one takes the noise parameter into account public double getAccuracyOrTooWeakExact(Description description, double noise) { nanoStartTime = System.nanoTime(); if (heuristic.equals(HeuristicType.JACCARD)) { // computing R(A) TreeSet<Individual> coveredInstancesSet = new TreeSet<Individual>(); for (Individual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstancesSet.add(ind); } if (terminationTimeExpired()) { return 0; } } // if even the optimal case (no additional instances covered) is not sufficient, // the concept is too weak if (coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) { return -1; } // computing R(C) restricted to relevant instances TreeSet<Individual> additionalInstancesSet = new TreeSet<Individual>(); for (Individual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstancesSet.add(ind); } if (terminationTimeExpired()) { return 0; } } Set<Individual> union = Helper.union(classInstancesSet, additionalInstancesSet); return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size()); } else if (heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE) || heuristic.equals(HeuristicType.PRED_ACC)) { // computing R(C) restricted to relevant instances int additionalInstances = 0; for (Individual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstances++; } if (terminationTimeExpired()) { return 0; } } // computing R(A) int coveredInstances = 0; for (Individual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstances++; } if (terminationTimeExpired()) { return 0; } } double recall = coveredInstances / (double) classInstances.size(); // noise computation is incorrect // if(recall < 1 - noise) { // return -1; // } double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); if (heuristic.equals(HeuristicType.AMEASURE)) { // best reachable concept has same recall and precision 1: // 1/t+1 * (t*r + 1) if ((coverageFactor * recall + 1) / (double) (coverageFactor + 1) < (1 - noise)) { return -1; } else { return Heuristics.getAScore(recall, precision, coverageFactor); } } else if (heuristic.equals(HeuristicType.FMEASURE)) { // best reachable concept has same recall and precision 1: if (((1 + Math.sqrt(coverageFactor)) * recall) / (Math.sqrt(coverageFactor) + 1) < 1 - noise) { return -1; } else { return getFMeasure(recall, precision); } } else if (heuristic.equals(HeuristicType.PRED_ACC)) { if ((coverageFactor * coveredInstances + superClassInstances.size()) / (double) (coverageFactor * classInstances.size() + superClassInstances.size()) < 1 - noise) { return -1; } else { // correctly classified divided by all examples return (coverageFactor * coveredInstances + superClassInstances.size() - additionalInstances) / (double) (coverageFactor * classInstances.size() + superClassInstances.size()); } } // return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : // getAccuracy(recall, precision); } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) { // implementation is based on: // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf // default negation should be turned off when using fast instance checker // compute I_C (negated and non-negated concepts separately) TreeSet<Individual> icPos = new TreeSet<Individual>(); TreeSet<Individual> icNeg = new TreeSet<Individual>(); Description descriptionNeg = new Negation(description); // loop through all relevant instances for (Individual ind : classAndSuperClassInstances) { if (getReasoner().hasType(description, ind)) { icPos.add(ind); } else if (getReasoner().hasType(descriptionNeg, ind)) { icNeg.add(ind); } if (terminationTimeExpired()) { return 0; } } // semantic precision // first compute I_C \cap Cn(DC) // it seems that in our setting, we can ignore Cn, because the examples (class instances) // are already part of the background knowledge Set<Individual> tmp1Pos = Helper.intersection(icPos, classInstancesSet); Set<Individual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances); int tmp1Size = tmp1Pos.size() + tmp1Neg.size(); // Cn(I_C) \cap D_C is the same set if we ignore Cn ... int icSize = icPos.size() + icNeg.size(); double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize; double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size()); // System.out.println(description); // System.out.println("I_C pos: " + icPos); // System.out.println("I_C neg: " + icNeg); // System.out.println("class instances: " + classInstances); // System.out.println("negated class instances: " + negatedClassInstances); // System.out.println(prec); // System.out.println(rec); // System.out.println(coverageFactor); // too weak: see F-measure above // => does not work for generalised F-measure, because even very general // concepts do not have a recall of 1 // if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) { // return -1; // } // we only return too weak if there is no recall if (rec <= 0.0000001) { return -1; } return getFMeasure(rec, prec); } throw new Error("ClassLearningProblem error: not implemented"); }
// instead of using the standard operation, we use optimisation // and approximation here public double getAccuracyOrTooWeakApprox(Description description, double noise) { if (heuristic.equals(HeuristicType.FMEASURE)) { // we abort when there are too many uncovered positives int maxNotCovered = (int) Math.ceil(noise * classInstances.size()); int instancesCovered = 0; int instancesNotCovered = 0; for (Individual ind : classInstances) { if (getReasoner().hasType(description, ind)) { instancesCovered++; } else { instancesNotCovered++; if (instancesNotCovered > maxNotCovered) { return -1; } } } double recall = instancesCovered / (double) classInstances.size(); int testsPerformed = 0; int instancesDescription = 0; for (Individual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { instancesDescription++; } testsPerformed++; // check whether approximation is sufficiently accurate double[] approx = Heuristics.getFScoreApproximation( instancesCovered, recall, coverageFactor, superClassInstances.size(), testsPerformed, instancesDescription); if (approx[1] < approxDelta) { return approx[0]; } } // standard computation (no approximation) double precision = instancesCovered / (double) (instancesDescription + instancesCovered); // if(instancesCovered + instancesDescription == 0) { // precision = 0; // } return Heuristics.getFScore(recall, precision, coverageFactor); } else if (heuristic.equals(HeuristicType.AMEASURE)) { // the F-MEASURE implementation is now separate (different optimisation // strategy) // we abort when there are too many uncovered positives int maxNotCovered = (int) Math.ceil(noise * classInstances.size()); int instancesCovered = 0; int instancesNotCovered = 0; int total = 0; boolean estimatedA = false; double lowerBorderA = 0; int lowerEstimateA = 0; double upperBorderA = 1; int upperEstimateA = classInstances.size(); for (Individual ind : classInstances) { if (getReasoner().hasType(description, ind)) { instancesCovered++; } else { instancesNotCovered++; if (instancesNotCovered > maxNotCovered) { return -1; } } // approximation step (starting after 10 tests) total = instancesCovered + instancesNotCovered; if (total > 10) { // compute confidence interval double p1 = p1(instancesCovered, total); double p2 = p3(p1, total); lowerBorderA = Math.max(0, p1 - p2); upperBorderA = Math.min(1, p1 + p2); double size = upperBorderA - lowerBorderA; // if the interval has a size smaller than 10%, we can be confident if (size < 2 * approxDelta) { // we have to distinguish the cases that the accuracy limit is // below, within, or above the limit and that the mean is below // or above the limit double mean = instancesCovered / (double) total; // we can estimate the best possible concept to reach with downward refinement // by setting precision to 1 and recall = mean stays as it is double optimumEstimate = heuristic.equals(HeuristicType.FMEASURE) ? ((1 + Math.sqrt(coverageFactor)) * mean) / (Math.sqrt(coverageFactor) + 1) : (coverageFactor * mean + 1) / (double) (coverageFactor + 1); // if the mean is greater than the required minimum, we can accept; // we also accept if the interval is small and close to the minimum // (worst case is to accept a few inaccurate descriptions) if (optimumEstimate > 1 - noise - 0.03) { // || (upperBorderA > mean && size < 0.03)) { instancesCovered = (int) (instancesCovered / (double) total * classInstances.size()); upperEstimateA = (int) (upperBorderA * classInstances.size()); lowerEstimateA = (int) (lowerBorderA * classInstances.size()); estimatedA = true; break; } // reject only if the upper border is far away (we are very // certain not to lose a potential solution) // if(upperBorderA + 0.1 < 1-noise) { double optimumEstimateUpperBorder = heuristic.equals(HeuristicType.FMEASURE) ? ((1 + Math.sqrt(coverageFactor)) * (upperBorderA + 0.1)) / (Math.sqrt(coverageFactor) + 1) : (coverageFactor * (upperBorderA + 0.1) + 1) / (double) (coverageFactor + 1); if (optimumEstimateUpperBorder < 1 - noise) { return -1; } } } } double recall = instancesCovered / (double) classInstances.size(); // MonitorFactory.add("estimatedA","count", estimatedA ? 1 : 0); // MonitorFactory.add("aInstances","count", total); // we know that a definition candidate is always subclass of the // intersection of all super classes, so we test only the relevant instances // (leads to undesired effects for descriptions not following this rule, // but improves performance a lot); // for learning a superclass of a defined class, similar observations apply; int testsPerformed = 0; int instancesDescription = 0; // boolean estimatedB = false; for (Individual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { instancesDescription++; } testsPerformed++; if (testsPerformed > 10) { // compute confidence interval double p1 = p1(instancesDescription, testsPerformed); double p2 = p3(p1, testsPerformed); double lowerBorder = Math.max(0, p1 - p2); double upperBorder = Math.min(1, p1 + p2); int lowerEstimate = (int) (lowerBorder * superClassInstances.size()); int upperEstimate = (int) (upperBorder * superClassInstances.size()); double size; if (estimatedA) { // size = 1/(coverageFactor+1) * (coverageFactor * (upperBorderA-lowerBorderA) + // Math.sqrt(upperEstimateA/(upperEstimateA+lowerEstimate)) + // Math.sqrt(lowerEstimateA/(lowerEstimateA+upperEstimate))); size = heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure( upperBorderA, upperEstimateA / (double) (upperEstimateA + lowerEstimate)) - getFMeasure( lowerBorderA, lowerEstimateA / (double) (lowerEstimateA + upperEstimate)) : Heuristics.getAScore( upperBorderA, upperEstimateA / (double) (upperEstimateA + lowerEstimate), coverageFactor) - Heuristics.getAScore( lowerBorderA, lowerEstimateA / (double) (lowerEstimateA + upperEstimate), coverageFactor); } else { // size = 1/(coverageFactor+1) * (coverageFactor * coverage + // Math.sqrt(instancesCovered/(instancesCovered+lowerEstimate)) + // Math.sqrt(instancesCovered/(instancesCovered+upperEstimate))); size = heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure( recall, instancesCovered / (double) (instancesCovered + lowerEstimate)) - getFMeasure( recall, instancesCovered / (double) (instancesCovered + upperEstimate)) : Heuristics.getAScore( recall, instancesCovered / (double) (instancesCovered + lowerEstimate), coverageFactor) - Heuristics.getAScore( recall, instancesCovered / (double) (instancesCovered + upperEstimate), coverageFactor); } if (size < 0.1) { // System.out.println(instancesDescription + " of " + testsPerformed); // System.out.println("interval from " + lowerEstimate + " to " + upperEstimate); // System.out.println("size: " + size); // estimatedB = true; // calculate total number of instances instancesDescription = (int) (instancesDescription / (double) testsPerformed * superClassInstances.size()); break; } } } // since we measured/estimated accuracy only on instances outside A (superClassInstances // does not include instances of A), we need to add it in the denominator double precision = instancesCovered / (double) (instancesDescription + instancesCovered); if (instancesCovered + instancesDescription == 0) { precision = 0; } return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : Heuristics.getAScore(recall, precision, coverageFactor); } else if (heuristic.equals(HeuristicType.FMEASURE)) { int maxNotCovered = (int) Math.ceil(noise * classInstances.size()); int notCoveredPos = 0; // int notCoveredNeg = 0; int posClassifiedAsPos = 0; int negClassifiedAsNeg = 0; int nrOfPosChecks = 0; int nrOfNegChecks = 0; // special case: we test positive and negative examples in turn Iterator<Individual> itPos = classInstances.iterator(); Iterator<Individual> itNeg = superClassInstances.iterator(); do { // in each loop we pick 0 or 1 positives and 0 or 1 negative // and classify it if (itPos.hasNext()) { Individual posExample = itPos.next(); // System.out.println(posExample); if (getReasoner().hasType(description, posExample)) { posClassifiedAsPos++; } else { notCoveredPos++; } nrOfPosChecks++; // take noise into account if (notCoveredPos > maxNotCovered) { return -1; } } if (itNeg.hasNext()) { Individual negExample = itNeg.next(); if (!getReasoner().hasType(description, negExample)) { negClassifiedAsNeg++; } nrOfNegChecks++; } // compute how accurate our current approximation is and return if it is sufficiently // accurate double approx[] = Heuristics.getPredAccApproximation( classInstances.size(), superClassInstances.size(), 1, nrOfPosChecks, posClassifiedAsPos, nrOfNegChecks, negClassifiedAsNeg); if (approx[1] < approxDelta) { // System.out.println(approx[0]); return approx[0]; } } while (itPos.hasNext() || itNeg.hasNext()); double ret = Heuristics.getPredictiveAccuracy( classInstances.size(), superClassInstances.size(), posClassifiedAsPos, negClassifiedAsNeg, 1); return ret; } else { throw new Error("Approximation for " + heuristic + " not implemented."); } }
@Override public ClassScore computeScore(Description description) { // TODO: reuse code to ensure that we never return inconsistent results // between getAccuracy, getAccuracyOrTooWeak and computeScore // overhang Set<Individual> additionalInstances = new TreeSet<Individual>(); for (Individual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstances.add(ind); } } // coverage Set<Individual> coveredInstances = new TreeSet<Individual>(); for (Individual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstances.add(ind); } } double recall = coveredInstances.size() / (double) classInstances.size(); double precision = (additionalInstances.size() + coveredInstances.size() == 0) ? 0 : coveredInstances.size() / (double) (coveredInstances.size() + additionalInstances.size()); // for each description with less than 100% coverage, we check whether it is // leads to an inconsistent knowledge base double acc = 0; if (heuristic.equals(HeuristicType.FMEASURE)) { acc = getFMeasure(recall, precision); } else if (heuristic.equals(HeuristicType.AMEASURE)) { acc = Heuristics.getAScore(recall, precision, coverageFactor); } else { // TODO: some superfluous instance checks are required to compute accuracy => // move accuracy computation here if possible acc = getAccuracyOrTooWeakExact(description, 1); } if (checkConsistency) { // we check whether the axiom already follows from the knowledge base // boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe); // boolean followsFromKB = equivalence ? reasoner.isEquivalentClass(description, // classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe); boolean followsFromKB = followsFromKB(description); // workaround due to a bug (see // http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319) // boolean isConsistent = coverage >= 0.999999 || isConsistent(description); // (if the axiom follows, then the knowledge base remains consistent) boolean isConsistent = followsFromKB || isConsistent(description); // double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, // protusion); return new ClassScore( coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc, isConsistent, followsFromKB); } else { return new ClassScore( coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc); } }
// exact computation for 5 heuristics; each one adapted to super class learning; // each one takes the noise parameter into account public double getAccuracyOrTooWeakExact(OWLClassExpression description, double noise) { // System.out.println(description); nanoStartTime = System.nanoTime(); if (heuristic.equals(HeuristicType.JACCARD)) { // computing R(A) TreeSet<OWLIndividual> coveredInstancesSet = new TreeSet<OWLIndividual>(); for (OWLIndividual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstancesSet.add(ind); } if (terminationTimeExpired()) { return 0; } } // if even the optimal case (no additional instances covered) is not sufficient, // the concept is too weak if (coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) { return -1; } // computing R(C) restricted to relevant instances TreeSet<OWLIndividual> additionalInstancesSet = new TreeSet<OWLIndividual>(); for (OWLIndividual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstancesSet.add(ind); } if (terminationTimeExpired()) { return 0; } } Set<OWLIndividual> union = Helper.union(classInstancesSet, additionalInstancesSet); return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size()); } else if (heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE) || heuristic.equals(HeuristicType.PRED_ACC)) { int additionalInstances = 0; int coveredInstances = 0; if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) { // R(C) String query = "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {" + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . " + converter.convert("?s", description) + "FILTER NOT EXISTS {?s a ?classToDescribe}}"; ParameterizedSparqlString template = new ParameterizedSparqlString(query); // System.err.println(converter.convert("?s", description)); // template.setIri("cls", description.asOWLClass().toStringID()); template.setIri("classToDescribe", classToDescribe.toStringID()); QueryExecution qe = ((SPARQLReasoner) reasoner) .getQueryExecutionFactory() .createQueryExecution(template.toString()); additionalInstances = qe.execSelect().next().getLiteral("cnt").getInt(); // R(A) OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description); coveredInstances = ((SPARQLReasoner) reasoner).getPopularityOf(ce); // System.out.println(coveredInstances); // System.out.println(additionalInstances); } else { // computing R(C) restricted to relevant instances if (useInstanceChecks) { for (OWLIndividual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstances++; } if (terminationTimeExpired()) { return 0; } } } else { SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description); individuals.retainAll(superClassInstances); additionalInstances = individuals.size(); } // computing R(A) if (useInstanceChecks) { for (OWLIndividual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstances++; } if (terminationTimeExpired()) { return 0; } } } else { SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description); individuals.retainAll(classInstances); coveredInstances = individuals.size(); } } // System.out.println(description + ":" + coveredInstances + "/" + classInstances.size()); double recall = coveredInstances / (double) classInstances.size(); // noise computation is incorrect // if(recall < 1 - noise) { // return -1; // } double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); if (heuristic.equals(HeuristicType.AMEASURE)) { // best reachable concept has same recall and precision 1: // 1/t+1 * (t*r + 1) if ((coverageFactor * recall + 1) / (coverageFactor + 1) < (1 - noise)) { return -1; } else { return Heuristics.getAScore(recall, precision, coverageFactor); } } else if (heuristic.equals(HeuristicType.FMEASURE)) { // best reachable concept has same recall and precision 1: if (((1 + Math.sqrt(coverageFactor)) * recall) / (Math.sqrt(coverageFactor) + 1) < 1 - noise) { return -1; } else { return Heuristics.getFScore(recall, precision, coverageFactor); } } else if (heuristic.equals(HeuristicType.PRED_ACC)) { if ((coverageFactor * coveredInstances + superClassInstances.size()) / (coverageFactor * classInstances.size() + superClassInstances.size()) < 1 - noise) { return -1; } else { // correctly classified divided by all examples return (coverageFactor * coveredInstances + superClassInstances.size() - additionalInstances) / (coverageFactor * classInstances.size() + superClassInstances.size()); } } // return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : // getAccuracy(recall, precision); } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) { // implementation is based on: // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf // default negation should be turned off when using fast instance checker // compute I_C (negated and non-negated concepts separately) TreeSet<OWLIndividual> icPos = new TreeSet<OWLIndividual>(); TreeSet<OWLIndividual> icNeg = new TreeSet<OWLIndividual>(); OWLClassExpression descriptionNeg = df.getOWLObjectComplementOf(description); // loop through all relevant instances for (OWLIndividual ind : classAndSuperClassInstances) { if (getReasoner().hasType(description, ind)) { icPos.add(ind); } else if (getReasoner().hasType(descriptionNeg, ind)) { icNeg.add(ind); } if (terminationTimeExpired()) { return 0; } } // semantic precision // first compute I_C \cap Cn(DC) // it seems that in our setting, we can ignore Cn, because the examples (class instances) // are already part of the background knowledge Set<OWLIndividual> tmp1Pos = Helper.intersection(icPos, classInstancesSet); Set<OWLIndividual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances); int tmp1Size = tmp1Pos.size() + tmp1Neg.size(); // Cn(I_C) \cap D_C is the same set if we ignore Cn ... int icSize = icPos.size() + icNeg.size(); double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize; double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size()); // System.out.println(description); // System.out.println("I_C pos: " + icPos); // System.out.println("I_C neg: " + icNeg); // System.out.println("class instances: " + classInstances); // System.out.println("negated class instances: " + negatedClassInstances); // System.out.println(prec); // System.out.println(rec); // System.out.println(coverageFactor); // too weak: see F-measure above // => does not work for generalised F-measure, because even very general // concepts do not have a recall of 1 // if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) { // return -1; // } // we only return too weak if there is no recall if (rec <= 0.0000001) { return -1; } return getFMeasure(rec, prec); } throw new Error("ClassLearningProblem error: not implemented"); }
@Override public ClassScore computeScore(OWLClassExpression description, double noise) { // TODO: reuse code to ensure that we never return inconsistent results // between getAccuracy, getAccuracyOrTooWeak and computeScore Set<OWLIndividual> additionalInstances = new TreeSet<OWLIndividual>(); Set<OWLIndividual> coveredInstances = new TreeSet<OWLIndividual>(); int additionalInstancesCnt = 0; int coveredInstancesCnt = 0; if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) { // R(C) String query = "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {" + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . " + converter.convert("?s", description) + "FILTER NOT EXISTS {?s a ?classToDescribe}}"; ParameterizedSparqlString template = new ParameterizedSparqlString(query); // System.err.println(converter.convert("?s", description)); // template.setIri("cls", description.asOWLClass().toStringID()); template.setIri("classToDescribe", classToDescribe.toStringID()); QueryExecution qe = ((SPARQLReasoner) reasoner) .getQueryExecutionFactory() .createQueryExecution(template.toString()); additionalInstancesCnt = qe.execSelect().next().getLiteral("cnt").getInt(); // R(A) OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description); coveredInstancesCnt = ((SPARQLReasoner) reasoner).getPopularityOf(ce); } else { // overhang for (OWLIndividual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstances.add(ind); } } // coverage for (OWLIndividual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstances.add(ind); } } additionalInstancesCnt = additionalInstances.size(); coveredInstancesCnt = coveredInstances.size(); } double recall = coveredInstancesCnt / (double) classInstances.size(); double precision = (additionalInstancesCnt + coveredInstancesCnt == 0) ? 0 : coveredInstancesCnt / (double) (coveredInstancesCnt + additionalInstancesCnt); // for each OWLClassExpression with less than 100% coverage, we check whether it is // leads to an inconsistent knowledge base double acc = 0; if (heuristic.equals(HeuristicType.FMEASURE)) { acc = Heuristics.getFScore(recall, precision, coverageFactor); } else if (heuristic.equals(HeuristicType.AMEASURE)) { acc = Heuristics.getAScore(recall, precision, coverageFactor); } else { // TODO: some superfluous instance checks are required to compute accuracy => // move accuracy computation here if possible acc = getAccuracyOrTooWeakExact(description, noise); } if (checkConsistency) { // we check whether the axiom already follows from the knowledge base // boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe); // boolean followsFromKB = equivalence ? reasoner.isEquivalentClass(description, // classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe); boolean followsFromKB = followsFromKB(description); // workaround due to a bug (see // http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319) // boolean isConsistent = coverage >= 0.999999 || isConsistent(description); // (if the axiom follows, then the knowledge base remains consistent) boolean isConsistent = followsFromKB || isConsistent(description); // double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, // protusion); return new ClassScore( coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc, isConsistent, followsFromKB); } else { return new ClassScore( coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc); } }
public int getEstimatedTotalCost(final T p_target, final Heuristics<T> p_heuristics) { return (m_costAndState >>> 2) + p_heuristics.estimateCost((T) this, p_target); }