@SuppressWarnings("unused")
 private double getInverseJaccardDistance(
     TreeSet<OWLIndividual> set1, TreeSet<OWLIndividual> set2) {
   Set<OWLIndividual> intersection = Helper.intersection(set1, set2);
   Set<OWLIndividual> union = Helper.union(set1, set2);
   return 1 - (union.size() - intersection.size()) / (double) union.size();
 }
示例#2
0
  // exact computation for 5 heuristics; each one adapted to super class learning;
  // each one takes the noise parameter into account
  public double getAccuracyOrTooWeakExact(Description description, double noise) {

    nanoStartTime = System.nanoTime();

    if (heuristic.equals(HeuristicType.JACCARD)) {

      // computing R(A)
      TreeSet<Individual> coveredInstancesSet = new TreeSet<Individual>();
      for (Individual ind : classInstances) {
        if (getReasoner().hasType(description, ind)) {
          coveredInstancesSet.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      // if even the optimal case (no additional instances covered) is not sufficient,
      // the concept is too weak
      if (coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) {
        return -1;
      }

      // computing R(C) restricted to relevant instances
      TreeSet<Individual> additionalInstancesSet = new TreeSet<Individual>();
      for (Individual ind : superClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          additionalInstancesSet.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      Set<Individual> union = Helper.union(classInstancesSet, additionalInstancesSet);
      return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size());

    } else if (heuristic.equals(HeuristicType.AMEASURE)
        || heuristic.equals(HeuristicType.FMEASURE)
        || heuristic.equals(HeuristicType.PRED_ACC)) {

      // computing R(C) restricted to relevant instances
      int additionalInstances = 0;
      for (Individual ind : superClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          additionalInstances++;
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      // computing R(A)
      int coveredInstances = 0;
      for (Individual ind : classInstances) {
        if (getReasoner().hasType(description, ind)) {
          coveredInstances++;
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      double recall = coveredInstances / (double) classInstances.size();

      // noise computation is incorrect
      //			if(recall < 1 - noise) {
      //				return -1;
      //			}

      double precision =
          (additionalInstances + coveredInstances == 0)
              ? 0
              : coveredInstances / (double) (coveredInstances + additionalInstances);

      if (heuristic.equals(HeuristicType.AMEASURE)) {
        // best reachable concept has same recall and precision 1:
        // 1/t+1 * (t*r + 1)
        if ((coverageFactor * recall + 1) / (double) (coverageFactor + 1) < (1 - noise)) {
          return -1;
        } else {
          return Heuristics.getAScore(recall, precision, coverageFactor);
        }
      } else if (heuristic.equals(HeuristicType.FMEASURE)) {
        // best reachable concept has same recall and precision 1:
        if (((1 + Math.sqrt(coverageFactor)) * recall) / (Math.sqrt(coverageFactor) + 1)
            < 1 - noise) {
          return -1;
        } else {
          return getFMeasure(recall, precision);
        }
      } else if (heuristic.equals(HeuristicType.PRED_ACC)) {
        if ((coverageFactor * coveredInstances + superClassInstances.size())
                / (double) (coverageFactor * classInstances.size() + superClassInstances.size())
            < 1 - noise) {
          return -1;
        } else {
          // correctly classified divided by all examples
          return (coverageFactor * coveredInstances
                  + superClassInstances.size()
                  - additionalInstances)
              / (double) (coverageFactor * classInstances.size() + superClassInstances.size());
        }
      }

      //			return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) :
      // getAccuracy(recall, precision);
    } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) {

      // implementation is based on:
      // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf
      // default negation should be turned off when using fast instance checker

      // compute I_C (negated and non-negated concepts separately)
      TreeSet<Individual> icPos = new TreeSet<Individual>();
      TreeSet<Individual> icNeg = new TreeSet<Individual>();
      Description descriptionNeg = new Negation(description);
      // loop through all relevant instances
      for (Individual ind : classAndSuperClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          icPos.add(ind);
        } else if (getReasoner().hasType(descriptionNeg, ind)) {
          icNeg.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      // semantic precision
      // first compute I_C \cap Cn(DC)
      // it seems that in our setting, we can ignore Cn, because the examples (class instances)
      // are already part of the background knowledge
      Set<Individual> tmp1Pos = Helper.intersection(icPos, classInstancesSet);
      Set<Individual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances);
      int tmp1Size = tmp1Pos.size() + tmp1Neg.size();

      // Cn(I_C) \cap D_C is the same set if we ignore Cn ...

      int icSize = icPos.size() + icNeg.size();
      double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize;
      double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size());

      //			System.out.println(description);

      //			System.out.println("I_C pos: " + icPos);
      //			System.out.println("I_C neg: " + icNeg);
      //			System.out.println("class instances: " + classInstances);
      //			System.out.println("negated class instances: " + negatedClassInstances);

      //			System.out.println(prec);
      //			System.out.println(rec);
      //			System.out.println(coverageFactor);

      // too weak: see F-measure above
      // => does not work for generalised F-measure, because even very general
      // concepts do not have a recall of 1
      //			if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) {
      //				return -1;
      //			}
      // we only return too weak if there is no recall
      if (rec <= 0.0000001) {
        return -1;
      }

      return getFMeasure(rec, prec);
    }

    throw new Error("ClassLearningProblem error: not implemented");
  }
  // exact computation for 5 heuristics; each one adapted to super class learning;
  // each one takes the noise parameter into account
  public double getAccuracyOrTooWeakExact(OWLClassExpression description, double noise) {
    // System.out.println(description);
    nanoStartTime = System.nanoTime();

    if (heuristic.equals(HeuristicType.JACCARD)) {

      // computing R(A)
      TreeSet<OWLIndividual> coveredInstancesSet = new TreeSet<OWLIndividual>();
      for (OWLIndividual ind : classInstances) {
        if (getReasoner().hasType(description, ind)) {
          coveredInstancesSet.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      // if even the optimal case (no additional instances covered) is not sufficient,
      // the concept is too weak
      if (coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) {
        return -1;
      }

      // computing R(C) restricted to relevant instances
      TreeSet<OWLIndividual> additionalInstancesSet = new TreeSet<OWLIndividual>();
      for (OWLIndividual ind : superClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          additionalInstancesSet.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      Set<OWLIndividual> union = Helper.union(classInstancesSet, additionalInstancesSet);
      return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size());

    } else if (heuristic.equals(HeuristicType.AMEASURE)
        || heuristic.equals(HeuristicType.FMEASURE)
        || heuristic.equals(HeuristicType.PRED_ACC)) {

      int additionalInstances = 0;
      int coveredInstances = 0;

      if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) {
        // R(C)
        String query =
            "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {"
                + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . "
                + converter.convert("?s", description)
                + "FILTER NOT EXISTS {?s a ?classToDescribe}}";
        ParameterizedSparqlString template = new ParameterizedSparqlString(query);
        // System.err.println(converter.convert("?s", description));
        // template.setIri("cls", description.asOWLClass().toStringID());
        template.setIri("classToDescribe", classToDescribe.toStringID());

        QueryExecution qe =
            ((SPARQLReasoner) reasoner)
                .getQueryExecutionFactory()
                .createQueryExecution(template.toString());
        additionalInstances = qe.execSelect().next().getLiteral("cnt").getInt();

        // R(A)
        OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description);
        coveredInstances = ((SPARQLReasoner) reasoner).getPopularityOf(ce);

        // System.out.println(coveredInstances);
        // System.out.println(additionalInstances);
      } else {
        // computing R(C) restricted to relevant instances
        if (useInstanceChecks) {
          for (OWLIndividual ind : superClassInstances) {
            if (getReasoner().hasType(description, ind)) {
              additionalInstances++;
            }
            if (terminationTimeExpired()) {
              return 0;
            }
          }
        } else {
          SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description);
          individuals.retainAll(superClassInstances);
          additionalInstances = individuals.size();
        }

        // computing R(A)
        if (useInstanceChecks) {
          for (OWLIndividual ind : classInstances) {
            if (getReasoner().hasType(description, ind)) {
              coveredInstances++;
            }
            if (terminationTimeExpired()) {
              return 0;
            }
          }
        } else {
          SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description);
          individuals.retainAll(classInstances);
          coveredInstances = individuals.size();
        }
      }

      //			System.out.println(description + ":" + coveredInstances + "/" + classInstances.size());
      double recall = coveredInstances / (double) classInstances.size();

      // noise computation is incorrect
      //			if(recall < 1 - noise) {
      //				return -1;
      //			}

      double precision =
          (additionalInstances + coveredInstances == 0)
              ? 0
              : coveredInstances / (double) (coveredInstances + additionalInstances);

      if (heuristic.equals(HeuristicType.AMEASURE)) {
        // best reachable concept has same recall and precision 1:
        // 1/t+1 * (t*r + 1)
        if ((coverageFactor * recall + 1) / (coverageFactor + 1) < (1 - noise)) {
          return -1;
        } else {
          return Heuristics.getAScore(recall, precision, coverageFactor);
        }
      } else if (heuristic.equals(HeuristicType.FMEASURE)) {
        // best reachable concept has same recall and precision 1:
        if (((1 + Math.sqrt(coverageFactor)) * recall) / (Math.sqrt(coverageFactor) + 1)
            < 1 - noise) {
          return -1;
        } else {
          return Heuristics.getFScore(recall, precision, coverageFactor);
        }
      } else if (heuristic.equals(HeuristicType.PRED_ACC)) {
        if ((coverageFactor * coveredInstances + superClassInstances.size())
                / (coverageFactor * classInstances.size() + superClassInstances.size())
            < 1 - noise) {
          return -1;
        } else {
          // correctly classified divided by all examples
          return (coverageFactor * coveredInstances
                  + superClassInstances.size()
                  - additionalInstances)
              / (coverageFactor * classInstances.size() + superClassInstances.size());
        }
      }

      //			return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) :
      // getAccuracy(recall, precision);
    } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) {

      // implementation is based on:
      // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf
      // default negation should be turned off when using fast instance checker

      // compute I_C (negated and non-negated concepts separately)
      TreeSet<OWLIndividual> icPos = new TreeSet<OWLIndividual>();
      TreeSet<OWLIndividual> icNeg = new TreeSet<OWLIndividual>();
      OWLClassExpression descriptionNeg = df.getOWLObjectComplementOf(description);
      // loop through all relevant instances
      for (OWLIndividual ind : classAndSuperClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          icPos.add(ind);
        } else if (getReasoner().hasType(descriptionNeg, ind)) {
          icNeg.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      // semantic precision
      // first compute I_C \cap Cn(DC)
      // it seems that in our setting, we can ignore Cn, because the examples (class instances)
      // are already part of the background knowledge
      Set<OWLIndividual> tmp1Pos = Helper.intersection(icPos, classInstancesSet);
      Set<OWLIndividual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances);
      int tmp1Size = tmp1Pos.size() + tmp1Neg.size();

      // Cn(I_C) \cap D_C is the same set if we ignore Cn ...

      int icSize = icPos.size() + icNeg.size();
      double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize;
      double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size());

      //			System.out.println(description);

      //			System.out.println("I_C pos: " + icPos);
      //			System.out.println("I_C neg: " + icNeg);
      //			System.out.println("class instances: " + classInstances);
      //			System.out.println("negated class instances: " + negatedClassInstances);

      //			System.out.println(prec);
      //			System.out.println(rec);
      //			System.out.println(coverageFactor);

      // too weak: see F-measure above
      // => does not work for generalised F-measure, because even very general
      // concepts do not have a recall of 1
      //			if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) {
      //				return -1;
      //			}
      // we only return too weak if there is no recall
      if (rec <= 0.0000001) {
        return -1;
      }

      return getFMeasure(rec, prec);
    }

    throw new Error("ClassLearningProblem error: not implemented");
  }