Ejemplo n.º 1
0
  // exact computation for 5 heuristics; each one adapted to super class learning;
  // each one takes the noise parameter into account
  public double getAccuracyOrTooWeakExact(OWLClassExpression description, double noise) {
    // System.out.println(description);
    nanoStartTime = System.nanoTime();

    if (heuristic.equals(HeuristicType.JACCARD)) {

      // computing R(A)
      TreeSet<OWLIndividual> coveredInstancesSet = new TreeSet<OWLIndividual>();
      for (OWLIndividual ind : classInstances) {
        if (getReasoner().hasType(description, ind)) {
          coveredInstancesSet.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      // if even the optimal case (no additional instances covered) is not sufficient,
      // the concept is too weak
      if (coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) {
        return -1;
      }

      // computing R(C) restricted to relevant instances
      TreeSet<OWLIndividual> additionalInstancesSet = new TreeSet<OWLIndividual>();
      for (OWLIndividual ind : superClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          additionalInstancesSet.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      Set<OWLIndividual> union = Helper.union(classInstancesSet, additionalInstancesSet);
      return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size());

    } else if (heuristic.equals(HeuristicType.AMEASURE)
        || heuristic.equals(HeuristicType.FMEASURE)
        || heuristic.equals(HeuristicType.PRED_ACC)) {

      int additionalInstances = 0;
      int coveredInstances = 0;

      if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) {
        // R(C)
        String query =
            "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {"
                + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . "
                + converter.convert("?s", description)
                + "FILTER NOT EXISTS {?s a ?classToDescribe}}";
        ParameterizedSparqlString template = new ParameterizedSparqlString(query);
        // System.err.println(converter.convert("?s", description));
        // template.setIri("cls", description.asOWLClass().toStringID());
        template.setIri("classToDescribe", classToDescribe.toStringID());

        QueryExecution qe =
            ((SPARQLReasoner) reasoner)
                .getQueryExecutionFactory()
                .createQueryExecution(template.toString());
        additionalInstances = qe.execSelect().next().getLiteral("cnt").getInt();

        // R(A)
        OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description);
        coveredInstances = ((SPARQLReasoner) reasoner).getPopularityOf(ce);

        // System.out.println(coveredInstances);
        // System.out.println(additionalInstances);
      } else {
        // computing R(C) restricted to relevant instances
        if (useInstanceChecks) {
          for (OWLIndividual ind : superClassInstances) {
            if (getReasoner().hasType(description, ind)) {
              additionalInstances++;
            }
            if (terminationTimeExpired()) {
              return 0;
            }
          }
        } else {
          SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description);
          individuals.retainAll(superClassInstances);
          additionalInstances = individuals.size();
        }

        // computing R(A)
        if (useInstanceChecks) {
          for (OWLIndividual ind : classInstances) {
            if (getReasoner().hasType(description, ind)) {
              coveredInstances++;
            }
            if (terminationTimeExpired()) {
              return 0;
            }
          }
        } else {
          SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description);
          individuals.retainAll(classInstances);
          coveredInstances = individuals.size();
        }
      }

      //			System.out.println(description + ":" + coveredInstances + "/" + classInstances.size());
      double recall = coveredInstances / (double) classInstances.size();

      // noise computation is incorrect
      //			if(recall < 1 - noise) {
      //				return -1;
      //			}

      double precision =
          (additionalInstances + coveredInstances == 0)
              ? 0
              : coveredInstances / (double) (coveredInstances + additionalInstances);

      if (heuristic.equals(HeuristicType.AMEASURE)) {
        // best reachable concept has same recall and precision 1:
        // 1/t+1 * (t*r + 1)
        if ((coverageFactor * recall + 1) / (coverageFactor + 1) < (1 - noise)) {
          return -1;
        } else {
          return Heuristics.getAScore(recall, precision, coverageFactor);
        }
      } else if (heuristic.equals(HeuristicType.FMEASURE)) {
        // best reachable concept has same recall and precision 1:
        if (((1 + Math.sqrt(coverageFactor)) * recall) / (Math.sqrt(coverageFactor) + 1)
            < 1 - noise) {
          return -1;
        } else {
          return Heuristics.getFScore(recall, precision, coverageFactor);
        }
      } else if (heuristic.equals(HeuristicType.PRED_ACC)) {
        if ((coverageFactor * coveredInstances + superClassInstances.size())
                / (coverageFactor * classInstances.size() + superClassInstances.size())
            < 1 - noise) {
          return -1;
        } else {
          // correctly classified divided by all examples
          return (coverageFactor * coveredInstances
                  + superClassInstances.size()
                  - additionalInstances)
              / (coverageFactor * classInstances.size() + superClassInstances.size());
        }
      }

      //			return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) :
      // getAccuracy(recall, precision);
    } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) {

      // implementation is based on:
      // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf
      // default negation should be turned off when using fast instance checker

      // compute I_C (negated and non-negated concepts separately)
      TreeSet<OWLIndividual> icPos = new TreeSet<OWLIndividual>();
      TreeSet<OWLIndividual> icNeg = new TreeSet<OWLIndividual>();
      OWLClassExpression descriptionNeg = df.getOWLObjectComplementOf(description);
      // loop through all relevant instances
      for (OWLIndividual ind : classAndSuperClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          icPos.add(ind);
        } else if (getReasoner().hasType(descriptionNeg, ind)) {
          icNeg.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      // semantic precision
      // first compute I_C \cap Cn(DC)
      // it seems that in our setting, we can ignore Cn, because the examples (class instances)
      // are already part of the background knowledge
      Set<OWLIndividual> tmp1Pos = Helper.intersection(icPos, classInstancesSet);
      Set<OWLIndividual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances);
      int tmp1Size = tmp1Pos.size() + tmp1Neg.size();

      // Cn(I_C) \cap D_C is the same set if we ignore Cn ...

      int icSize = icPos.size() + icNeg.size();
      double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize;
      double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size());

      //			System.out.println(description);

      //			System.out.println("I_C pos: " + icPos);
      //			System.out.println("I_C neg: " + icNeg);
      //			System.out.println("class instances: " + classInstances);
      //			System.out.println("negated class instances: " + negatedClassInstances);

      //			System.out.println(prec);
      //			System.out.println(rec);
      //			System.out.println(coverageFactor);

      // too weak: see F-measure above
      // => does not work for generalised F-measure, because even very general
      // concepts do not have a recall of 1
      //			if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) {
      //				return -1;
      //			}
      // we only return too weak if there is no recall
      if (rec <= 0.0000001) {
        return -1;
      }

      return getFMeasure(rec, prec);
    }

    throw new Error("ClassLearningProblem error: not implemented");
  }
Ejemplo n.º 2
0
  @Override
  public ClassScore computeScore(OWLClassExpression description, double noise) {

    // TODO: reuse code to ensure that we never return inconsistent results
    // between getAccuracy, getAccuracyOrTooWeak and computeScore
    Set<OWLIndividual> additionalInstances = new TreeSet<OWLIndividual>();
    Set<OWLIndividual> coveredInstances = new TreeSet<OWLIndividual>();

    int additionalInstancesCnt = 0;
    int coveredInstancesCnt = 0;

    if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) {
      // R(C)
      String query =
          "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {"
              + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . "
              + converter.convert("?s", description)
              + "FILTER NOT EXISTS {?s a ?classToDescribe}}";
      ParameterizedSparqlString template = new ParameterizedSparqlString(query);
      // System.err.println(converter.convert("?s", description));
      // template.setIri("cls", description.asOWLClass().toStringID());
      template.setIri("classToDescribe", classToDescribe.toStringID());

      QueryExecution qe =
          ((SPARQLReasoner) reasoner)
              .getQueryExecutionFactory()
              .createQueryExecution(template.toString());
      additionalInstancesCnt = qe.execSelect().next().getLiteral("cnt").getInt();

      // R(A)
      OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description);
      coveredInstancesCnt = ((SPARQLReasoner) reasoner).getPopularityOf(ce);
    } else {
      // overhang
      for (OWLIndividual ind : superClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          additionalInstances.add(ind);
        }
      }

      // coverage
      for (OWLIndividual ind : classInstances) {
        if (getReasoner().hasType(description, ind)) {
          coveredInstances.add(ind);
        }
      }
      additionalInstancesCnt = additionalInstances.size();
      coveredInstancesCnt = coveredInstances.size();
    }

    double recall = coveredInstancesCnt / (double) classInstances.size();
    double precision =
        (additionalInstancesCnt + coveredInstancesCnt == 0)
            ? 0
            : coveredInstancesCnt / (double) (coveredInstancesCnt + additionalInstancesCnt);
    // for each OWLClassExpression with less than 100% coverage, we check whether it is
    // leads to an inconsistent knowledge base

    double acc = 0;
    if (heuristic.equals(HeuristicType.FMEASURE)) {
      acc = Heuristics.getFScore(recall, precision, coverageFactor);
    } else if (heuristic.equals(HeuristicType.AMEASURE)) {
      acc = Heuristics.getAScore(recall, precision, coverageFactor);
    } else {
      // TODO: some superfluous instance checks are required to compute accuracy =>
      // move accuracy computation here if possible
      acc = getAccuracyOrTooWeakExact(description, noise);
    }

    if (checkConsistency) {

      // we check whether the axiom already follows from the knowledge base
      //			boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe);

      //			boolean followsFromKB = equivalence ? reasoner.isEquivalentClass(description,
      // classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe);
      boolean followsFromKB = followsFromKB(description);

      // workaround due to a bug (see
      // http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319)
      //			boolean isConsistent = coverage >= 0.999999 || isConsistent(description);
      // (if the axiom follows, then the knowledge base remains consistent)
      boolean isConsistent = followsFromKB || isConsistent(description);

      //			double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage,
      // protusion);
      return new ClassScore(
          coveredInstances,
          Helper.difference(classInstancesSet, coveredInstances),
          recall,
          additionalInstances,
          precision,
          acc,
          isConsistent,
          followsFromKB);

    } else {
      return new ClassScore(
          coveredInstances,
          Helper.difference(classInstancesSet, coveredInstances),
          recall,
          additionalInstances,
          precision,
          acc);
    }
  }