// exact computation for 5 heuristics; each one adapted to super class learning; // each one takes the noise parameter into account public double getAccuracyOrTooWeakExact(Description description, double noise) { nanoStartTime = System.nanoTime(); if (heuristic.equals(HeuristicType.JACCARD)) { // computing R(A) TreeSet<Individual> coveredInstancesSet = new TreeSet<Individual>(); for (Individual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstancesSet.add(ind); } if (terminationTimeExpired()) { return 0; } } // if even the optimal case (no additional instances covered) is not sufficient, // the concept is too weak if (coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) { return -1; } // computing R(C) restricted to relevant instances TreeSet<Individual> additionalInstancesSet = new TreeSet<Individual>(); for (Individual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstancesSet.add(ind); } if (terminationTimeExpired()) { return 0; } } Set<Individual> union = Helper.union(classInstancesSet, additionalInstancesSet); return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size()); } else if (heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE) || heuristic.equals(HeuristicType.PRED_ACC)) { // computing R(C) restricted to relevant instances int additionalInstances = 0; for (Individual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstances++; } if (terminationTimeExpired()) { return 0; } } // computing R(A) int coveredInstances = 0; for (Individual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstances++; } if (terminationTimeExpired()) { return 0; } } double recall = coveredInstances / (double) classInstances.size(); // noise computation is incorrect // if(recall < 1 - noise) { // return -1; // } double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); if (heuristic.equals(HeuristicType.AMEASURE)) { // best reachable concept has same recall and precision 1: // 1/t+1 * (t*r + 1) if ((coverageFactor * recall + 1) / (double) (coverageFactor + 1) < (1 - noise)) { return -1; } else { return Heuristics.getAScore(recall, precision, coverageFactor); } } else if (heuristic.equals(HeuristicType.FMEASURE)) { // best reachable concept has same recall and precision 1: if (((1 + Math.sqrt(coverageFactor)) * recall) / (Math.sqrt(coverageFactor) + 1) < 1 - noise) { return -1; } else { return getFMeasure(recall, precision); } } else if (heuristic.equals(HeuristicType.PRED_ACC)) { if ((coverageFactor * coveredInstances + superClassInstances.size()) / (double) (coverageFactor * classInstances.size() + superClassInstances.size()) < 1 - noise) { return -1; } else { // correctly classified divided by all examples return (coverageFactor * coveredInstances + superClassInstances.size() - additionalInstances) / (double) (coverageFactor * classInstances.size() + superClassInstances.size()); } } // return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : // getAccuracy(recall, precision); } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) { // implementation is based on: // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf // default negation should be turned off when using fast instance checker // compute I_C (negated and non-negated concepts separately) TreeSet<Individual> icPos = new TreeSet<Individual>(); TreeSet<Individual> icNeg = new TreeSet<Individual>(); Description descriptionNeg = new Negation(description); // loop through all relevant instances for (Individual ind : classAndSuperClassInstances) { if (getReasoner().hasType(description, ind)) { icPos.add(ind); } else if (getReasoner().hasType(descriptionNeg, ind)) { icNeg.add(ind); } if (terminationTimeExpired()) { return 0; } } // semantic precision // first compute I_C \cap Cn(DC) // it seems that in our setting, we can ignore Cn, because the examples (class instances) // are already part of the background knowledge Set<Individual> tmp1Pos = Helper.intersection(icPos, classInstancesSet); Set<Individual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances); int tmp1Size = tmp1Pos.size() + tmp1Neg.size(); // Cn(I_C) \cap D_C is the same set if we ignore Cn ... int icSize = icPos.size() + icNeg.size(); double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize; double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size()); // System.out.println(description); // System.out.println("I_C pos: " + icPos); // System.out.println("I_C neg: " + icNeg); // System.out.println("class instances: " + classInstances); // System.out.println("negated class instances: " + negatedClassInstances); // System.out.println(prec); // System.out.println(rec); // System.out.println(coverageFactor); // too weak: see F-measure above // => does not work for generalised F-measure, because even very general // concepts do not have a recall of 1 // if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) { // return -1; // } // we only return too weak if there is no recall if (rec <= 0.0000001) { return -1; } return getFMeasure(rec, prec); } throw new Error("ClassLearningProblem error: not implemented"); }
// instead of using the standard operation, we use optimisation // and approximation here public double getAccuracyOrTooWeakApprox(Description description, double noise) { if (heuristic.equals(HeuristicType.FMEASURE)) { // we abort when there are too many uncovered positives int maxNotCovered = (int) Math.ceil(noise * classInstances.size()); int instancesCovered = 0; int instancesNotCovered = 0; for (Individual ind : classInstances) { if (getReasoner().hasType(description, ind)) { instancesCovered++; } else { instancesNotCovered++; if (instancesNotCovered > maxNotCovered) { return -1; } } } double recall = instancesCovered / (double) classInstances.size(); int testsPerformed = 0; int instancesDescription = 0; for (Individual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { instancesDescription++; } testsPerformed++; // check whether approximation is sufficiently accurate double[] approx = Heuristics.getFScoreApproximation( instancesCovered, recall, coverageFactor, superClassInstances.size(), testsPerformed, instancesDescription); if (approx[1] < approxDelta) { return approx[0]; } } // standard computation (no approximation) double precision = instancesCovered / (double) (instancesDescription + instancesCovered); // if(instancesCovered + instancesDescription == 0) { // precision = 0; // } return Heuristics.getFScore(recall, precision, coverageFactor); } else if (heuristic.equals(HeuristicType.AMEASURE)) { // the F-MEASURE implementation is now separate (different optimisation // strategy) // we abort when there are too many uncovered positives int maxNotCovered = (int) Math.ceil(noise * classInstances.size()); int instancesCovered = 0; int instancesNotCovered = 0; int total = 0; boolean estimatedA = false; double lowerBorderA = 0; int lowerEstimateA = 0; double upperBorderA = 1; int upperEstimateA = classInstances.size(); for (Individual ind : classInstances) { if (getReasoner().hasType(description, ind)) { instancesCovered++; } else { instancesNotCovered++; if (instancesNotCovered > maxNotCovered) { return -1; } } // approximation step (starting after 10 tests) total = instancesCovered + instancesNotCovered; if (total > 10) { // compute confidence interval double p1 = p1(instancesCovered, total); double p2 = p3(p1, total); lowerBorderA = Math.max(0, p1 - p2); upperBorderA = Math.min(1, p1 + p2); double size = upperBorderA - lowerBorderA; // if the interval has a size smaller than 10%, we can be confident if (size < 2 * approxDelta) { // we have to distinguish the cases that the accuracy limit is // below, within, or above the limit and that the mean is below // or above the limit double mean = instancesCovered / (double) total; // we can estimate the best possible concept to reach with downward refinement // by setting precision to 1 and recall = mean stays as it is double optimumEstimate = heuristic.equals(HeuristicType.FMEASURE) ? ((1 + Math.sqrt(coverageFactor)) * mean) / (Math.sqrt(coverageFactor) + 1) : (coverageFactor * mean + 1) / (double) (coverageFactor + 1); // if the mean is greater than the required minimum, we can accept; // we also accept if the interval is small and close to the minimum // (worst case is to accept a few inaccurate descriptions) if (optimumEstimate > 1 - noise - 0.03) { // || (upperBorderA > mean && size < 0.03)) { instancesCovered = (int) (instancesCovered / (double) total * classInstances.size()); upperEstimateA = (int) (upperBorderA * classInstances.size()); lowerEstimateA = (int) (lowerBorderA * classInstances.size()); estimatedA = true; break; } // reject only if the upper border is far away (we are very // certain not to lose a potential solution) // if(upperBorderA + 0.1 < 1-noise) { double optimumEstimateUpperBorder = heuristic.equals(HeuristicType.FMEASURE) ? ((1 + Math.sqrt(coverageFactor)) * (upperBorderA + 0.1)) / (Math.sqrt(coverageFactor) + 1) : (coverageFactor * (upperBorderA + 0.1) + 1) / (double) (coverageFactor + 1); if (optimumEstimateUpperBorder < 1 - noise) { return -1; } } } } double recall = instancesCovered / (double) classInstances.size(); // MonitorFactory.add("estimatedA","count", estimatedA ? 1 : 0); // MonitorFactory.add("aInstances","count", total); // we know that a definition candidate is always subclass of the // intersection of all super classes, so we test only the relevant instances // (leads to undesired effects for descriptions not following this rule, // but improves performance a lot); // for learning a superclass of a defined class, similar observations apply; int testsPerformed = 0; int instancesDescription = 0; // boolean estimatedB = false; for (Individual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { instancesDescription++; } testsPerformed++; if (testsPerformed > 10) { // compute confidence interval double p1 = p1(instancesDescription, testsPerformed); double p2 = p3(p1, testsPerformed); double lowerBorder = Math.max(0, p1 - p2); double upperBorder = Math.min(1, p1 + p2); int lowerEstimate = (int) (lowerBorder * superClassInstances.size()); int upperEstimate = (int) (upperBorder * superClassInstances.size()); double size; if (estimatedA) { // size = 1/(coverageFactor+1) * (coverageFactor * (upperBorderA-lowerBorderA) + // Math.sqrt(upperEstimateA/(upperEstimateA+lowerEstimate)) + // Math.sqrt(lowerEstimateA/(lowerEstimateA+upperEstimate))); size = heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure( upperBorderA, upperEstimateA / (double) (upperEstimateA + lowerEstimate)) - getFMeasure( lowerBorderA, lowerEstimateA / (double) (lowerEstimateA + upperEstimate)) : Heuristics.getAScore( upperBorderA, upperEstimateA / (double) (upperEstimateA + lowerEstimate), coverageFactor) - Heuristics.getAScore( lowerBorderA, lowerEstimateA / (double) (lowerEstimateA + upperEstimate), coverageFactor); } else { // size = 1/(coverageFactor+1) * (coverageFactor * coverage + // Math.sqrt(instancesCovered/(instancesCovered+lowerEstimate)) + // Math.sqrt(instancesCovered/(instancesCovered+upperEstimate))); size = heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure( recall, instancesCovered / (double) (instancesCovered + lowerEstimate)) - getFMeasure( recall, instancesCovered / (double) (instancesCovered + upperEstimate)) : Heuristics.getAScore( recall, instancesCovered / (double) (instancesCovered + lowerEstimate), coverageFactor) - Heuristics.getAScore( recall, instancesCovered / (double) (instancesCovered + upperEstimate), coverageFactor); } if (size < 0.1) { // System.out.println(instancesDescription + " of " + testsPerformed); // System.out.println("interval from " + lowerEstimate + " to " + upperEstimate); // System.out.println("size: " + size); // estimatedB = true; // calculate total number of instances instancesDescription = (int) (instancesDescription / (double) testsPerformed * superClassInstances.size()); break; } } } // since we measured/estimated accuracy only on instances outside A (superClassInstances // does not include instances of A), we need to add it in the denominator double precision = instancesCovered / (double) (instancesDescription + instancesCovered); if (instancesCovered + instancesDescription == 0) { precision = 0; } return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : Heuristics.getAScore(recall, precision, coverageFactor); } else if (heuristic.equals(HeuristicType.FMEASURE)) { int maxNotCovered = (int) Math.ceil(noise * classInstances.size()); int notCoveredPos = 0; // int notCoveredNeg = 0; int posClassifiedAsPos = 0; int negClassifiedAsNeg = 0; int nrOfPosChecks = 0; int nrOfNegChecks = 0; // special case: we test positive and negative examples in turn Iterator<Individual> itPos = classInstances.iterator(); Iterator<Individual> itNeg = superClassInstances.iterator(); do { // in each loop we pick 0 or 1 positives and 0 or 1 negative // and classify it if (itPos.hasNext()) { Individual posExample = itPos.next(); // System.out.println(posExample); if (getReasoner().hasType(description, posExample)) { posClassifiedAsPos++; } else { notCoveredPos++; } nrOfPosChecks++; // take noise into account if (notCoveredPos > maxNotCovered) { return -1; } } if (itNeg.hasNext()) { Individual negExample = itNeg.next(); if (!getReasoner().hasType(description, negExample)) { negClassifiedAsNeg++; } nrOfNegChecks++; } // compute how accurate our current approximation is and return if it is sufficiently // accurate double approx[] = Heuristics.getPredAccApproximation( classInstances.size(), superClassInstances.size(), 1, nrOfPosChecks, posClassifiedAsPos, nrOfNegChecks, negClassifiedAsNeg); if (approx[1] < approxDelta) { // System.out.println(approx[0]); return approx[0]; } } while (itPos.hasNext() || itNeg.hasNext()); double ret = Heuristics.getPredictiveAccuracy( classInstances.size(), superClassInstances.size(), posClassifiedAsPos, negClassifiedAsNeg, 1); return ret; } else { throw new Error("Approximation for " + heuristic + " not implemented."); } }
@Override public ClassScore computeScore(Description description) { // TODO: reuse code to ensure that we never return inconsistent results // between getAccuracy, getAccuracyOrTooWeak and computeScore // overhang Set<Individual> additionalInstances = new TreeSet<Individual>(); for (Individual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstances.add(ind); } } // coverage Set<Individual> coveredInstances = new TreeSet<Individual>(); for (Individual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstances.add(ind); } } double recall = coveredInstances.size() / (double) classInstances.size(); double precision = (additionalInstances.size() + coveredInstances.size() == 0) ? 0 : coveredInstances.size() / (double) (coveredInstances.size() + additionalInstances.size()); // for each description with less than 100% coverage, we check whether it is // leads to an inconsistent knowledge base double acc = 0; if (heuristic.equals(HeuristicType.FMEASURE)) { acc = getFMeasure(recall, precision); } else if (heuristic.equals(HeuristicType.AMEASURE)) { acc = Heuristics.getAScore(recall, precision, coverageFactor); } else { // TODO: some superfluous instance checks are required to compute accuracy => // move accuracy computation here if possible acc = getAccuracyOrTooWeakExact(description, 1); } if (checkConsistency) { // we check whether the axiom already follows from the knowledge base // boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe); // boolean followsFromKB = equivalence ? reasoner.isEquivalentClass(description, // classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe); boolean followsFromKB = followsFromKB(description); // workaround due to a bug (see // http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319) // boolean isConsistent = coverage >= 0.999999 || isConsistent(description); // (if the axiom follows, then the knowledge base remains consistent) boolean isConsistent = followsFromKB || isConsistent(description); // double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, // protusion); return new ClassScore( coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc, isConsistent, followsFromKB); } else { return new ClassScore( coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc); } }
@Override public void init() throws ComponentInitException { // classToDescribe = new NamedClass(configurator.getClassToDescribe().toString()); // useApproximations = configurator.getUseApproximations(); // String accM = configurator.getAccuracyMethod(); // if(accM.equals("standard")) { // heuristic = HeuristicType.AMEASURE; // } else if(accM.equals("fmeasure")) { // heuristic = HeuristicType.FMEASURE; // } else if(accM.equals("generalised_fmeasure")) { // heuristic = HeuristicType.GEN_FMEASURE; // } else if(accM.equals("jaccard")) { // heuristic = HeuristicType.JACCARD; // } else if(accM.equals("pred_acc")) { // heuristic = HeuristicType.PRED_ACC; // } if (useApproximations && heuristic.equals(HeuristicType.PRED_ACC)) { System.err.println( "Approximating predictive accuracy is an experimental feature. USE IT AT YOUR OWN RISK. If you consider to use it for anything serious, please extend the unit tests at org.dllearner.test.junit.HeuristicTests first to verify that it works."); } if (useApproximations && !(heuristic.equals(HeuristicType.PRED_ACC) || heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE))) { throw new ComponentInitException( "Approximations only supported for F-Measure or Standard-Measure. It is unsupported for \"" + heuristic + ".\""); } // useFMeasure = configurator.getAccuracyMethod().equals("fmeasure"); // approxDelta = configurator.getApproxAccuracy(); if (!getReasoner().getNamedClasses().contains(classToDescribe)) { throw new ComponentInitException( "The class \"" + classToDescribe + "\" does not exist. Make sure you spelled it correctly."); } classInstances = new LinkedList<Individual>(getReasoner().getIndividuals(classToDescribe)); // sanity check if (classInstances.size() == 0) { throw new ComponentInitException( "Class " + classToDescribe + " has 0 instances according to \"" + ComponentManager.getInstance().getComponentName(getReasoner().getClass()) + "\". Cannot perform class learning with 0 instances."); } classInstancesSet = new TreeSet<Individual>(classInstances); // equivalence = (configurator.getType().equals("equivalence")); // maxExecutionTimeInSeconds = configurator.getMaxExecutionTimeInSeconds(); if (equivalence) { coverageFactor = betaEq; } else { coverageFactor = betaSC; } // we compute the instances of the super class to perform // optimisations later on Set<Description> superClasses = getReasoner().getClassHierarchy().getSuperClasses(classToDescribe); TreeSet<Individual> superClassInstancesTmp = new TreeSet<Individual>(getReasoner().getIndividuals()); for (Description superClass : superClasses) { superClassInstancesTmp.retainAll(getReasoner().getIndividuals(superClass)); } // we create one list, which includes instances of the class (an instance of the class is also // instance of all super classes) ... classAndSuperClassInstances = new LinkedList<Individual>(superClassInstancesTmp); // ... and a second list not including them superClassInstancesTmp.removeAll(classInstances); // since we use the instance list for approximations, we want to avoid // any bias through URI names, so we shuffle the list once pseudo-randomly superClassInstances = new LinkedList<Individual>(superClassInstancesTmp); Random rand = new Random(1); Collections.shuffle(classInstances, rand); Collections.shuffle(superClassInstances, rand); if (heuristic.equals(HeuristicType.GEN_FMEASURE)) { Description classToDescribeNeg = new Negation(classToDescribe); negatedClassInstances = new TreeSet<Individual>(); for (Individual ind : superClassInstances) { if (getReasoner().hasType(classToDescribeNeg, ind)) { negatedClassInstances.add(ind); } } // System.out.println("negated class instances: " + negatedClassInstances); } // System.out.println(classInstances.size() + " " + superClassInstances.size()); }
// exact computation for 5 heuristics; each one adapted to super class learning; // each one takes the noise parameter into account public double getAccuracyOrTooWeakExact(OWLClassExpression description, double noise) { // System.out.println(description); nanoStartTime = System.nanoTime(); if (heuristic.equals(HeuristicType.JACCARD)) { // computing R(A) TreeSet<OWLIndividual> coveredInstancesSet = new TreeSet<OWLIndividual>(); for (OWLIndividual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstancesSet.add(ind); } if (terminationTimeExpired()) { return 0; } } // if even the optimal case (no additional instances covered) is not sufficient, // the concept is too weak if (coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) { return -1; } // computing R(C) restricted to relevant instances TreeSet<OWLIndividual> additionalInstancesSet = new TreeSet<OWLIndividual>(); for (OWLIndividual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstancesSet.add(ind); } if (terminationTimeExpired()) { return 0; } } Set<OWLIndividual> union = Helper.union(classInstancesSet, additionalInstancesSet); return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size()); } else if (heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE) || heuristic.equals(HeuristicType.PRED_ACC)) { int additionalInstances = 0; int coveredInstances = 0; if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) { // R(C) String query = "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {" + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . " + converter.convert("?s", description) + "FILTER NOT EXISTS {?s a ?classToDescribe}}"; ParameterizedSparqlString template = new ParameterizedSparqlString(query); // System.err.println(converter.convert("?s", description)); // template.setIri("cls", description.asOWLClass().toStringID()); template.setIri("classToDescribe", classToDescribe.toStringID()); QueryExecution qe = ((SPARQLReasoner) reasoner) .getQueryExecutionFactory() .createQueryExecution(template.toString()); additionalInstances = qe.execSelect().next().getLiteral("cnt").getInt(); // R(A) OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description); coveredInstances = ((SPARQLReasoner) reasoner).getPopularityOf(ce); // System.out.println(coveredInstances); // System.out.println(additionalInstances); } else { // computing R(C) restricted to relevant instances if (useInstanceChecks) { for (OWLIndividual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstances++; } if (terminationTimeExpired()) { return 0; } } } else { SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description); individuals.retainAll(superClassInstances); additionalInstances = individuals.size(); } // computing R(A) if (useInstanceChecks) { for (OWLIndividual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstances++; } if (terminationTimeExpired()) { return 0; } } } else { SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description); individuals.retainAll(classInstances); coveredInstances = individuals.size(); } } // System.out.println(description + ":" + coveredInstances + "/" + classInstances.size()); double recall = coveredInstances / (double) classInstances.size(); // noise computation is incorrect // if(recall < 1 - noise) { // return -1; // } double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); if (heuristic.equals(HeuristicType.AMEASURE)) { // best reachable concept has same recall and precision 1: // 1/t+1 * (t*r + 1) if ((coverageFactor * recall + 1) / (coverageFactor + 1) < (1 - noise)) { return -1; } else { return Heuristics.getAScore(recall, precision, coverageFactor); } } else if (heuristic.equals(HeuristicType.FMEASURE)) { // best reachable concept has same recall and precision 1: if (((1 + Math.sqrt(coverageFactor)) * recall) / (Math.sqrt(coverageFactor) + 1) < 1 - noise) { return -1; } else { return Heuristics.getFScore(recall, precision, coverageFactor); } } else if (heuristic.equals(HeuristicType.PRED_ACC)) { if ((coverageFactor * coveredInstances + superClassInstances.size()) / (coverageFactor * classInstances.size() + superClassInstances.size()) < 1 - noise) { return -1; } else { // correctly classified divided by all examples return (coverageFactor * coveredInstances + superClassInstances.size() - additionalInstances) / (coverageFactor * classInstances.size() + superClassInstances.size()); } } // return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : // getAccuracy(recall, precision); } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) { // implementation is based on: // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf // default negation should be turned off when using fast instance checker // compute I_C (negated and non-negated concepts separately) TreeSet<OWLIndividual> icPos = new TreeSet<OWLIndividual>(); TreeSet<OWLIndividual> icNeg = new TreeSet<OWLIndividual>(); OWLClassExpression descriptionNeg = df.getOWLObjectComplementOf(description); // loop through all relevant instances for (OWLIndividual ind : classAndSuperClassInstances) { if (getReasoner().hasType(description, ind)) { icPos.add(ind); } else if (getReasoner().hasType(descriptionNeg, ind)) { icNeg.add(ind); } if (terminationTimeExpired()) { return 0; } } // semantic precision // first compute I_C \cap Cn(DC) // it seems that in our setting, we can ignore Cn, because the examples (class instances) // are already part of the background knowledge Set<OWLIndividual> tmp1Pos = Helper.intersection(icPos, classInstancesSet); Set<OWLIndividual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances); int tmp1Size = tmp1Pos.size() + tmp1Neg.size(); // Cn(I_C) \cap D_C is the same set if we ignore Cn ... int icSize = icPos.size() + icNeg.size(); double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize; double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size()); // System.out.println(description); // System.out.println("I_C pos: " + icPos); // System.out.println("I_C neg: " + icNeg); // System.out.println("class instances: " + classInstances); // System.out.println("negated class instances: " + negatedClassInstances); // System.out.println(prec); // System.out.println(rec); // System.out.println(coverageFactor); // too weak: see F-measure above // => does not work for generalised F-measure, because even very general // concepts do not have a recall of 1 // if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) { // return -1; // } // we only return too weak if there is no recall if (rec <= 0.0000001) { return -1; } return getFMeasure(rec, prec); } throw new Error("ClassLearningProblem error: not implemented"); }
@Override public ClassScore computeScore(OWLClassExpression description, double noise) { // TODO: reuse code to ensure that we never return inconsistent results // between getAccuracy, getAccuracyOrTooWeak and computeScore Set<OWLIndividual> additionalInstances = new TreeSet<OWLIndividual>(); Set<OWLIndividual> coveredInstances = new TreeSet<OWLIndividual>(); int additionalInstancesCnt = 0; int coveredInstancesCnt = 0; if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) { // R(C) String query = "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {" + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . " + converter.convert("?s", description) + "FILTER NOT EXISTS {?s a ?classToDescribe}}"; ParameterizedSparqlString template = new ParameterizedSparqlString(query); // System.err.println(converter.convert("?s", description)); // template.setIri("cls", description.asOWLClass().toStringID()); template.setIri("classToDescribe", classToDescribe.toStringID()); QueryExecution qe = ((SPARQLReasoner) reasoner) .getQueryExecutionFactory() .createQueryExecution(template.toString()); additionalInstancesCnt = qe.execSelect().next().getLiteral("cnt").getInt(); // R(A) OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description); coveredInstancesCnt = ((SPARQLReasoner) reasoner).getPopularityOf(ce); } else { // overhang for (OWLIndividual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstances.add(ind); } } // coverage for (OWLIndividual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstances.add(ind); } } additionalInstancesCnt = additionalInstances.size(); coveredInstancesCnt = coveredInstances.size(); } double recall = coveredInstancesCnt / (double) classInstances.size(); double precision = (additionalInstancesCnt + coveredInstancesCnt == 0) ? 0 : coveredInstancesCnt / (double) (coveredInstancesCnt + additionalInstancesCnt); // for each OWLClassExpression with less than 100% coverage, we check whether it is // leads to an inconsistent knowledge base double acc = 0; if (heuristic.equals(HeuristicType.FMEASURE)) { acc = Heuristics.getFScore(recall, precision, coverageFactor); } else if (heuristic.equals(HeuristicType.AMEASURE)) { acc = Heuristics.getAScore(recall, precision, coverageFactor); } else { // TODO: some superfluous instance checks are required to compute accuracy => // move accuracy computation here if possible acc = getAccuracyOrTooWeakExact(description, noise); } if (checkConsistency) { // we check whether the axiom already follows from the knowledge base // boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe); // boolean followsFromKB = equivalence ? reasoner.isEquivalentClass(description, // classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe); boolean followsFromKB = followsFromKB(description); // workaround due to a bug (see // http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319) // boolean isConsistent = coverage >= 0.999999 || isConsistent(description); // (if the axiom follows, then the knowledge base remains consistent) boolean isConsistent = followsFromKB || isConsistent(description); // double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, // protusion); return new ClassScore( coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc, isConsistent, followsFromKB); } else { return new ClassScore( coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc); } }