// exact computation for 5 heuristics; each one adapted to super class learning; // each one takes the noise parameter into account public double getAccuracyOrTooWeakExact(OWLClassExpression description, double noise) { // System.out.println(description); nanoStartTime = System.nanoTime(); if (heuristic.equals(HeuristicType.JACCARD)) { // computing R(A) TreeSet<OWLIndividual> coveredInstancesSet = new TreeSet<OWLIndividual>(); for (OWLIndividual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstancesSet.add(ind); } if (terminationTimeExpired()) { return 0; } } // if even the optimal case (no additional instances covered) is not sufficient, // the concept is too weak if (coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) { return -1; } // computing R(C) restricted to relevant instances TreeSet<OWLIndividual> additionalInstancesSet = new TreeSet<OWLIndividual>(); for (OWLIndividual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstancesSet.add(ind); } if (terminationTimeExpired()) { return 0; } } Set<OWLIndividual> union = Helper.union(classInstancesSet, additionalInstancesSet); return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size()); } else if (heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE) || heuristic.equals(HeuristicType.PRED_ACC)) { int additionalInstances = 0; int coveredInstances = 0; if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) { // R(C) String query = "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {" + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . " + converter.convert("?s", description) + "FILTER NOT EXISTS {?s a ?classToDescribe}}"; ParameterizedSparqlString template = new ParameterizedSparqlString(query); // System.err.println(converter.convert("?s", description)); // template.setIri("cls", description.asOWLClass().toStringID()); template.setIri("classToDescribe", classToDescribe.toStringID()); QueryExecution qe = ((SPARQLReasoner) reasoner) .getQueryExecutionFactory() .createQueryExecution(template.toString()); additionalInstances = qe.execSelect().next().getLiteral("cnt").getInt(); // R(A) OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description); coveredInstances = ((SPARQLReasoner) reasoner).getPopularityOf(ce); // System.out.println(coveredInstances); // System.out.println(additionalInstances); } else { // computing R(C) restricted to relevant instances if (useInstanceChecks) { for (OWLIndividual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstances++; } if (terminationTimeExpired()) { return 0; } } } else { SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description); individuals.retainAll(superClassInstances); additionalInstances = individuals.size(); } // computing R(A) if (useInstanceChecks) { for (OWLIndividual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstances++; } if (terminationTimeExpired()) { return 0; } } } else { SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description); individuals.retainAll(classInstances); coveredInstances = individuals.size(); } } // System.out.println(description + ":" + coveredInstances + "/" + classInstances.size()); double recall = coveredInstances / (double) classInstances.size(); // noise computation is incorrect // if(recall < 1 - noise) { // return -1; // } double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); if (heuristic.equals(HeuristicType.AMEASURE)) { // best reachable concept has same recall and precision 1: // 1/t+1 * (t*r + 1) if ((coverageFactor * recall + 1) / (coverageFactor + 1) < (1 - noise)) { return -1; } else { return Heuristics.getAScore(recall, precision, coverageFactor); } } else if (heuristic.equals(HeuristicType.FMEASURE)) { // best reachable concept has same recall and precision 1: if (((1 + Math.sqrt(coverageFactor)) * recall) / (Math.sqrt(coverageFactor) + 1) < 1 - noise) { return -1; } else { return Heuristics.getFScore(recall, precision, coverageFactor); } } else if (heuristic.equals(HeuristicType.PRED_ACC)) { if ((coverageFactor * coveredInstances + superClassInstances.size()) / (coverageFactor * classInstances.size() + superClassInstances.size()) < 1 - noise) { return -1; } else { // correctly classified divided by all examples return (coverageFactor * coveredInstances + superClassInstances.size() - additionalInstances) / (coverageFactor * classInstances.size() + superClassInstances.size()); } } // return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : // getAccuracy(recall, precision); } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) { // implementation is based on: // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf // default negation should be turned off when using fast instance checker // compute I_C (negated and non-negated concepts separately) TreeSet<OWLIndividual> icPos = new TreeSet<OWLIndividual>(); TreeSet<OWLIndividual> icNeg = new TreeSet<OWLIndividual>(); OWLClassExpression descriptionNeg = df.getOWLObjectComplementOf(description); // loop through all relevant instances for (OWLIndividual ind : classAndSuperClassInstances) { if (getReasoner().hasType(description, ind)) { icPos.add(ind); } else if (getReasoner().hasType(descriptionNeg, ind)) { icNeg.add(ind); } if (terminationTimeExpired()) { return 0; } } // semantic precision // first compute I_C \cap Cn(DC) // it seems that in our setting, we can ignore Cn, because the examples (class instances) // are already part of the background knowledge Set<OWLIndividual> tmp1Pos = Helper.intersection(icPos, classInstancesSet); Set<OWLIndividual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances); int tmp1Size = tmp1Pos.size() + tmp1Neg.size(); // Cn(I_C) \cap D_C is the same set if we ignore Cn ... int icSize = icPos.size() + icNeg.size(); double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize; double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size()); // System.out.println(description); // System.out.println("I_C pos: " + icPos); // System.out.println("I_C neg: " + icNeg); // System.out.println("class instances: " + classInstances); // System.out.println("negated class instances: " + negatedClassInstances); // System.out.println(prec); // System.out.println(rec); // System.out.println(coverageFactor); // too weak: see F-measure above // => does not work for generalised F-measure, because even very general // concepts do not have a recall of 1 // if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) { // return -1; // } // we only return too weak if there is no recall if (rec <= 0.0000001) { return -1; } return getFMeasure(rec, prec); } throw new Error("ClassLearningProblem error: not implemented"); }
@Override public ClassScore computeScore(OWLClassExpression description, double noise) { // TODO: reuse code to ensure that we never return inconsistent results // between getAccuracy, getAccuracyOrTooWeak and computeScore Set<OWLIndividual> additionalInstances = new TreeSet<OWLIndividual>(); Set<OWLIndividual> coveredInstances = new TreeSet<OWLIndividual>(); int additionalInstancesCnt = 0; int coveredInstancesCnt = 0; if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) { // R(C) String query = "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {" + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . " + converter.convert("?s", description) + "FILTER NOT EXISTS {?s a ?classToDescribe}}"; ParameterizedSparqlString template = new ParameterizedSparqlString(query); // System.err.println(converter.convert("?s", description)); // template.setIri("cls", description.asOWLClass().toStringID()); template.setIri("classToDescribe", classToDescribe.toStringID()); QueryExecution qe = ((SPARQLReasoner) reasoner) .getQueryExecutionFactory() .createQueryExecution(template.toString()); additionalInstancesCnt = qe.execSelect().next().getLiteral("cnt").getInt(); // R(A) OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description); coveredInstancesCnt = ((SPARQLReasoner) reasoner).getPopularityOf(ce); } else { // overhang for (OWLIndividual ind : superClassInstances) { if (getReasoner().hasType(description, ind)) { additionalInstances.add(ind); } } // coverage for (OWLIndividual ind : classInstances) { if (getReasoner().hasType(description, ind)) { coveredInstances.add(ind); } } additionalInstancesCnt = additionalInstances.size(); coveredInstancesCnt = coveredInstances.size(); } double recall = coveredInstancesCnt / (double) classInstances.size(); double precision = (additionalInstancesCnt + coveredInstancesCnt == 0) ? 0 : coveredInstancesCnt / (double) (coveredInstancesCnt + additionalInstancesCnt); // for each OWLClassExpression with less than 100% coverage, we check whether it is // leads to an inconsistent knowledge base double acc = 0; if (heuristic.equals(HeuristicType.FMEASURE)) { acc = Heuristics.getFScore(recall, precision, coverageFactor); } else if (heuristic.equals(HeuristicType.AMEASURE)) { acc = Heuristics.getAScore(recall, precision, coverageFactor); } else { // TODO: some superfluous instance checks are required to compute accuracy => // move accuracy computation here if possible acc = getAccuracyOrTooWeakExact(description, noise); } if (checkConsistency) { // we check whether the axiom already follows from the knowledge base // boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe); // boolean followsFromKB = equivalence ? reasoner.isEquivalentClass(description, // classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe); boolean followsFromKB = followsFromKB(description); // workaround due to a bug (see // http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319) // boolean isConsistent = coverage >= 0.999999 || isConsistent(description); // (if the axiom follows, then the knowledge base remains consistent) boolean isConsistent = followsFromKB || isConsistent(description); // double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, // protusion); return new ClassScore( coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc, isConsistent, followsFromKB); } else { return new ClassScore( coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc); } }