@SuppressWarnings("unused") private double getInverseJaccardDistance( TreeSet<OWLIndividual> set1, TreeSet<OWLIndividual> set2) { Set<OWLIndividual> intersection = Helper.intersection(set1, set2); Set<OWLIndividual> union = Helper.union(set1, set2); return 1 - (union.size() - intersection.size()) / (double) union.size(); }
@Override public String toString() { String str = ""; if (nrOfRetrievals > 0) { str += "number of retrievals: " + nrOfRetrievals + "\n"; str += "retrieval reasoning time: " + Helper.prettyPrintNanoSeconds(retrievalReasoningTimeNs) + " ( " + Helper.prettyPrintNanoSeconds(getTimePerRetrievalNs()) + " per retrieval)" + "\n"; } if (nrOfInstanceChecks > 0) { str += "number of instance checks: " + nrOfInstanceChecks + " (" + nrOfMultiInstanceChecks + " multiple)\n"; str += "instance check reasoning time: " + Helper.prettyPrintNanoSeconds(instanceCheckReasoningTimeNs) + " ( " + Helper.prettyPrintNanoSeconds(getTimePerInstanceCheckNs()) + " per instance check)\n"; } if (nrOfSubsumptionHierarchyQueries > 0) { str += "subsumption hierarchy queries: " + nrOfSubsumptionHierarchyQueries + "\n"; } if (nrOfSubsumptionChecks > 0) { str += "(complex) subsumption checks: " + nrOfSubsumptionChecks + " (" + nrOfMultiSubsumptionChecks + " multiple)\n"; str += "subsumption reasoning time: " + Helper.prettyPrintNanoSeconds(subsumptionReasoningTimeNs) + " ( " + Helper.prettyPrintNanoSeconds(getTimePerSubsumptionCheckNs()) + " per subsumption check)\n"; } str += "overall reasoning time: " + Helper.prettyPrintNanoSeconds(overallReasoningTimeNs) + "\n"; return str; }
// exact computation for 5 heuristics; each one adapted to super class learning; // each one takes the noise parameter into account public double getAccuracyOrTooWeakExact(Description description, double noise) { nanoStartTime = System.nanoTime(); if (heuristic.equals(HeuristicType.JACCARD)) { // computing R(A) TreeSet<Individual> coveredInstancesSet = new TreeSet<Individual>(); for (Individual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstancesSet.add(ind); } if (terminationTimeExpired()) { return 0; } } // if even the optimal case (no additional instances covered) is not sufficient, // the concept is too weak if (coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) { return -1; } // computing R(C) restricted to relevant instances TreeSet<Individual> additionalInstancesSet = new TreeSet<Individual>(); for (Individual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstancesSet.add(ind); } if (terminationTimeExpired()) { return 0; } } Set<Individual> union = Helper.union(classInstancesSet, additionalInstancesSet); return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size()); } else if (heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE) || heuristic.equals(HeuristicType.PRED_ACC)) { // computing R(C) restricted to relevant instances int additionalInstances = 0; for (Individual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstances++; } if (terminationTimeExpired()) { return 0; } } // computing R(A) int coveredInstances = 0; for (Individual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstances++; } if (terminationTimeExpired()) { return 0; } } double recall = coveredInstances / (double) classInstances.size(); // noise computation is incorrect // if(recall < 1 - noise) { // return -1; // } double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); if (heuristic.equals(HeuristicType.AMEASURE)) { // best reachable concept has same recall and precision 1: // 1/t+1 * (t*r + 1) if ((coverageFactor * recall + 1) / (double) (coverageFactor + 1) < (1 - noise)) { return -1; } else { return Heuristics.getAScore(recall, precision, coverageFactor); } } else if (heuristic.equals(HeuristicType.FMEASURE)) { // best reachable concept has same recall and precision 1: if (((1 + Math.sqrt(coverageFactor)) * recall) / (Math.sqrt(coverageFactor) + 1) < 1 - noise) { return -1; } else { return getFMeasure(recall, precision); } } else if (heuristic.equals(HeuristicType.PRED_ACC)) { if ((coverageFactor * coveredInstances + superClassInstances.size()) / (double) (coverageFactor * classInstances.size() + superClassInstances.size()) < 1 - noise) { return -1; } else { // correctly classified divided by all examples return (coverageFactor * coveredInstances + superClassInstances.size() - additionalInstances) / (double) (coverageFactor * classInstances.size() + superClassInstances.size()); } } // return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : // getAccuracy(recall, precision); } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) { // implementation is based on: // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf // default negation should be turned off when using fast instance checker // compute I_C (negated and non-negated concepts separately) TreeSet<Individual> icPos = new TreeSet<Individual>(); TreeSet<Individual> icNeg = new TreeSet<Individual>(); Description descriptionNeg = new Negation(description); // loop through all relevant instances for (Individual ind : classAndSuperClassInstances) { if (getReasoner().hasType(description, ind)) { icPos.add(ind); } else if (getReasoner().hasType(descriptionNeg, ind)) { icNeg.add(ind); } if (terminationTimeExpired()) { return 0; } } // semantic precision // first compute I_C \cap Cn(DC) // it seems that in our setting, we can ignore Cn, because the examples (class instances) // are already part of the background knowledge Set<Individual> tmp1Pos = Helper.intersection(icPos, classInstancesSet); Set<Individual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances); int tmp1Size = tmp1Pos.size() + tmp1Neg.size(); // Cn(I_C) \cap D_C is the same set if we ignore Cn ... int icSize = icPos.size() + icNeg.size(); double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize; double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size()); // System.out.println(description); // System.out.println("I_C pos: " + icPos); // System.out.println("I_C neg: " + icNeg); // System.out.println("class instances: " + classInstances); // System.out.println("negated class instances: " + negatedClassInstances); // System.out.println(prec); // System.out.println(rec); // System.out.println(coverageFactor); // too weak: see F-measure above // => does not work for generalised F-measure, because even very general // concepts do not have a recall of 1 // if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) { // return -1; // } // we only return too weak if there is no recall if (rec <= 0.0000001) { return -1; } return getFMeasure(rec, prec); } throw new Error("ClassLearningProblem error: not implemented"); }
@Override public ClassScore computeScore(Description description) { // TODO: reuse code to ensure that we never return inconsistent results // between getAccuracy, getAccuracyOrTooWeak and computeScore // overhang Set<Individual> additionalInstances = new TreeSet<Individual>(); for (Individual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstances.add(ind); } } // coverage Set<Individual> coveredInstances = new TreeSet<Individual>(); for (Individual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstances.add(ind); } } double recall = coveredInstances.size() / (double) classInstances.size(); double precision = (additionalInstances.size() + coveredInstances.size() == 0) ? 0 : coveredInstances.size() / (double) (coveredInstances.size() + additionalInstances.size()); // for each description with less than 100% coverage, we check whether it is // leads to an inconsistent knowledge base double acc = 0; if (heuristic.equals(HeuristicType.FMEASURE)) { acc = getFMeasure(recall, precision); } else if (heuristic.equals(HeuristicType.AMEASURE)) { acc = Heuristics.getAScore(recall, precision, coverageFactor); } else { // TODO: some superfluous instance checks are required to compute accuracy => // move accuracy computation here if possible acc = getAccuracyOrTooWeakExact(description, 1); } if (checkConsistency) { // we check whether the axiom already follows from the knowledge base // boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe); // boolean followsFromKB = equivalence ? reasoner.isEquivalentClass(description, // classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe); boolean followsFromKB = followsFromKB(description); // workaround due to a bug (see // http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319) // boolean isConsistent = coverage >= 0.999999 || isConsistent(description); // (if the axiom follows, then the knowledge base remains consistent) boolean isConsistent = followsFromKB || isConsistent(description); // double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, // protusion); return new ClassScore( coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc, isConsistent, followsFromKB); } else { return new ClassScore( coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc); } }
// exact computation for 5 heuristics; each one adapted to super class learning; // each one takes the noise parameter into account public double getAccuracyOrTooWeakExact(OWLClassExpression description, double noise) { // System.out.println(description); nanoStartTime = System.nanoTime(); if (heuristic.equals(HeuristicType.JACCARD)) { // computing R(A) TreeSet<OWLIndividual> coveredInstancesSet = new TreeSet<OWLIndividual>(); for (OWLIndividual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstancesSet.add(ind); } if (terminationTimeExpired()) { return 0; } } // if even the optimal case (no additional instances covered) is not sufficient, // the concept is too weak if (coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) { return -1; } // computing R(C) restricted to relevant instances TreeSet<OWLIndividual> additionalInstancesSet = new TreeSet<OWLIndividual>(); for (OWLIndividual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstancesSet.add(ind); } if (terminationTimeExpired()) { return 0; } } Set<OWLIndividual> union = Helper.union(classInstancesSet, additionalInstancesSet); return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size()); } else if (heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE) || heuristic.equals(HeuristicType.PRED_ACC)) { int additionalInstances = 0; int coveredInstances = 0; if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) { // R(C) String query = "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {" + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . " + converter.convert("?s", description) + "FILTER NOT EXISTS {?s a ?classToDescribe}}"; ParameterizedSparqlString template = new ParameterizedSparqlString(query); // System.err.println(converter.convert("?s", description)); // template.setIri("cls", description.asOWLClass().toStringID()); template.setIri("classToDescribe", classToDescribe.toStringID()); QueryExecution qe = ((SPARQLReasoner) reasoner) .getQueryExecutionFactory() .createQueryExecution(template.toString()); additionalInstances = qe.execSelect().next().getLiteral("cnt").getInt(); // R(A) OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description); coveredInstances = ((SPARQLReasoner) reasoner).getPopularityOf(ce); // System.out.println(coveredInstances); // System.out.println(additionalInstances); } else { // computing R(C) restricted to relevant instances if (useInstanceChecks) { for (OWLIndividual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstances++; } if (terminationTimeExpired()) { return 0; } } } else { SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description); individuals.retainAll(superClassInstances); additionalInstances = individuals.size(); } // computing R(A) if (useInstanceChecks) { for (OWLIndividual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstances++; } if (terminationTimeExpired()) { return 0; } } } else { SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description); individuals.retainAll(classInstances); coveredInstances = individuals.size(); } } // System.out.println(description + ":" + coveredInstances + "/" + classInstances.size()); double recall = coveredInstances / (double) classInstances.size(); // noise computation is incorrect // if(recall < 1 - noise) { // return -1; // } double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); if (heuristic.equals(HeuristicType.AMEASURE)) { // best reachable concept has same recall and precision 1: // 1/t+1 * (t*r + 1) if ((coverageFactor * recall + 1) / (coverageFactor + 1) < (1 - noise)) { return -1; } else { return Heuristics.getAScore(recall, precision, coverageFactor); } } else if (heuristic.equals(HeuristicType.FMEASURE)) { // best reachable concept has same recall and precision 1: if (((1 + Math.sqrt(coverageFactor)) * recall) / (Math.sqrt(coverageFactor) + 1) < 1 - noise) { return -1; } else { return Heuristics.getFScore(recall, precision, coverageFactor); } } else if (heuristic.equals(HeuristicType.PRED_ACC)) { if ((coverageFactor * coveredInstances + superClassInstances.size()) / (coverageFactor * classInstances.size() + superClassInstances.size()) < 1 - noise) { return -1; } else { // correctly classified divided by all examples return (coverageFactor * coveredInstances + superClassInstances.size() - additionalInstances) / (coverageFactor * classInstances.size() + superClassInstances.size()); } } // return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : // getAccuracy(recall, precision); } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) { // implementation is based on: // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf // default negation should be turned off when using fast instance checker // compute I_C (negated and non-negated concepts separately) TreeSet<OWLIndividual> icPos = new TreeSet<OWLIndividual>(); TreeSet<OWLIndividual> icNeg = new TreeSet<OWLIndividual>(); OWLClassExpression descriptionNeg = df.getOWLObjectComplementOf(description); // loop through all relevant instances for (OWLIndividual ind : classAndSuperClassInstances) { if (getReasoner().hasType(description, ind)) { icPos.add(ind); } else if (getReasoner().hasType(descriptionNeg, ind)) { icNeg.add(ind); } if (terminationTimeExpired()) { return 0; } } // semantic precision // first compute I_C \cap Cn(DC) // it seems that in our setting, we can ignore Cn, because the examples (class instances) // are already part of the background knowledge Set<OWLIndividual> tmp1Pos = Helper.intersection(icPos, classInstancesSet); Set<OWLIndividual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances); int tmp1Size = tmp1Pos.size() + tmp1Neg.size(); // Cn(I_C) \cap D_C is the same set if we ignore Cn ... int icSize = icPos.size() + icNeg.size(); double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize; double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size()); // System.out.println(description); // System.out.println("I_C pos: " + icPos); // System.out.println("I_C neg: " + icNeg); // System.out.println("class instances: " + classInstances); // System.out.println("negated class instances: " + negatedClassInstances); // System.out.println(prec); // System.out.println(rec); // System.out.println(coverageFactor); // too weak: see F-measure above // => does not work for generalised F-measure, because even very general // concepts do not have a recall of 1 // if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) { // return -1; // } // we only return too weak if there is no recall if (rec <= 0.0000001) { return -1; } return getFMeasure(rec, prec); } throw new Error("ClassLearningProblem error: not implemented"); }
@Override public ClassScore computeScore(OWLClassExpression description, double noise) { // TODO: reuse code to ensure that we never return inconsistent results // between getAccuracy, getAccuracyOrTooWeak and computeScore Set<OWLIndividual> additionalInstances = new TreeSet<OWLIndividual>(); Set<OWLIndividual> coveredInstances = new TreeSet<OWLIndividual>(); int additionalInstancesCnt = 0; int coveredInstancesCnt = 0; if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) { // R(C) String query = "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {" + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . " + converter.convert("?s", description) + "FILTER NOT EXISTS {?s a ?classToDescribe}}"; ParameterizedSparqlString template = new ParameterizedSparqlString(query); // System.err.println(converter.convert("?s", description)); // template.setIri("cls", description.asOWLClass().toStringID()); template.setIri("classToDescribe", classToDescribe.toStringID()); QueryExecution qe = ((SPARQLReasoner) reasoner) .getQueryExecutionFactory() .createQueryExecution(template.toString()); additionalInstancesCnt = qe.execSelect().next().getLiteral("cnt").getInt(); // R(A) OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description); coveredInstancesCnt = ((SPARQLReasoner) reasoner).getPopularityOf(ce); } else { // overhang for (OWLIndividual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstances.add(ind); } } // coverage for (OWLIndividual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstances.add(ind); } } additionalInstancesCnt = additionalInstances.size(); coveredInstancesCnt = coveredInstances.size(); } double recall = coveredInstancesCnt / (double) classInstances.size(); double precision = (additionalInstancesCnt + coveredInstancesCnt == 0) ? 0 : coveredInstancesCnt / (double) (coveredInstancesCnt + additionalInstancesCnt); // for each OWLClassExpression with less than 100% coverage, we check whether it is // leads to an inconsistent knowledge base double acc = 0; if (heuristic.equals(HeuristicType.FMEASURE)) { acc = Heuristics.getFScore(recall, precision, coverageFactor); } else if (heuristic.equals(HeuristicType.AMEASURE)) { acc = Heuristics.getAScore(recall, precision, coverageFactor); } else { // TODO: some superfluous instance checks are required to compute accuracy => // move accuracy computation here if possible acc = getAccuracyOrTooWeakExact(description, noise); } if (checkConsistency) { // we check whether the axiom already follows from the knowledge base // boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe); // boolean followsFromKB = equivalence ? reasoner.isEquivalentClass(description, // classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe); boolean followsFromKB = followsFromKB(description); // workaround due to a bug (see // http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319) // boolean isConsistent = coverage >= 0.999999 || isConsistent(description); // (if the axiom follows, then the knowledge base remains consistent) boolean isConsistent = followsFromKB || isConsistent(description); // double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, // protusion); return new ClassScore( coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc, isConsistent, followsFromKB); } else { return new ClassScore( coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc); } }