public void testDefault() throws Exception {
   Heuristics heuristics = RuleBasedHeuristics.getDefaultInstance();
   FileSystemManager fsm = VFS.getManager();
   FileObject root =
       fsm.resolveFile(
               RuleBasedHeuristicsTest.class.getResource("/heuristics/.root").toExternalForm())
           .getParent();
   for (Reason reason : Reason.values()) {
     for (FileObject file : root.resolveFile(reason.toString().toLowerCase()).getChildren()) {
       InputStream is = file.getContent().getInputStream();
       DeliveryStatus ds = new DeliveryStatus(is);
       is.close();
       assertEquals(
           "Reason for " + file,
           reason,
           heuristics.getReason(ds.getPerRecipientParts()[0].getDiagnostic()));
     }
   }
 }
예제 #2
0
  // exact computation for 5 heuristics; each one adapted to super class learning;
  // each one takes the noise parameter into account
  public double getAccuracyOrTooWeakExact(Description description, double noise) {

    nanoStartTime = System.nanoTime();

    if (heuristic.equals(HeuristicType.JACCARD)) {

      // computing R(A)
      TreeSet<Individual> coveredInstancesSet = new TreeSet<Individual>();
      for (Individual ind : classInstances) {
        if (getReasoner().hasType(description, ind)) {
          coveredInstancesSet.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      // if even the optimal case (no additional instances covered) is not sufficient,
      // the concept is too weak
      if (coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) {
        return -1;
      }

      // computing R(C) restricted to relevant instances
      TreeSet<Individual> additionalInstancesSet = new TreeSet<Individual>();
      for (Individual ind : superClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          additionalInstancesSet.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      Set<Individual> union = Helper.union(classInstancesSet, additionalInstancesSet);
      return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size());

    } else if (heuristic.equals(HeuristicType.AMEASURE)
        || heuristic.equals(HeuristicType.FMEASURE)
        || heuristic.equals(HeuristicType.PRED_ACC)) {

      // computing R(C) restricted to relevant instances
      int additionalInstances = 0;
      for (Individual ind : superClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          additionalInstances++;
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      // computing R(A)
      int coveredInstances = 0;
      for (Individual ind : classInstances) {
        if (getReasoner().hasType(description, ind)) {
          coveredInstances++;
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      double recall = coveredInstances / (double) classInstances.size();

      // noise computation is incorrect
      //			if(recall < 1 - noise) {
      //				return -1;
      //			}

      double precision =
          (additionalInstances + coveredInstances == 0)
              ? 0
              : coveredInstances / (double) (coveredInstances + additionalInstances);

      if (heuristic.equals(HeuristicType.AMEASURE)) {
        // best reachable concept has same recall and precision 1:
        // 1/t+1 * (t*r + 1)
        if ((coverageFactor * recall + 1) / (double) (coverageFactor + 1) < (1 - noise)) {
          return -1;
        } else {
          return Heuristics.getAScore(recall, precision, coverageFactor);
        }
      } else if (heuristic.equals(HeuristicType.FMEASURE)) {
        // best reachable concept has same recall and precision 1:
        if (((1 + Math.sqrt(coverageFactor)) * recall) / (Math.sqrt(coverageFactor) + 1)
            < 1 - noise) {
          return -1;
        } else {
          return getFMeasure(recall, precision);
        }
      } else if (heuristic.equals(HeuristicType.PRED_ACC)) {
        if ((coverageFactor * coveredInstances + superClassInstances.size())
                / (double) (coverageFactor * classInstances.size() + superClassInstances.size())
            < 1 - noise) {
          return -1;
        } else {
          // correctly classified divided by all examples
          return (coverageFactor * coveredInstances
                  + superClassInstances.size()
                  - additionalInstances)
              / (double) (coverageFactor * classInstances.size() + superClassInstances.size());
        }
      }

      //			return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) :
      // getAccuracy(recall, precision);
    } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) {

      // implementation is based on:
      // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf
      // default negation should be turned off when using fast instance checker

      // compute I_C (negated and non-negated concepts separately)
      TreeSet<Individual> icPos = new TreeSet<Individual>();
      TreeSet<Individual> icNeg = new TreeSet<Individual>();
      Description descriptionNeg = new Negation(description);
      // loop through all relevant instances
      for (Individual ind : classAndSuperClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          icPos.add(ind);
        } else if (getReasoner().hasType(descriptionNeg, ind)) {
          icNeg.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      // semantic precision
      // first compute I_C \cap Cn(DC)
      // it seems that in our setting, we can ignore Cn, because the examples (class instances)
      // are already part of the background knowledge
      Set<Individual> tmp1Pos = Helper.intersection(icPos, classInstancesSet);
      Set<Individual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances);
      int tmp1Size = tmp1Pos.size() + tmp1Neg.size();

      // Cn(I_C) \cap D_C is the same set if we ignore Cn ...

      int icSize = icPos.size() + icNeg.size();
      double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize;
      double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size());

      //			System.out.println(description);

      //			System.out.println("I_C pos: " + icPos);
      //			System.out.println("I_C neg: " + icNeg);
      //			System.out.println("class instances: " + classInstances);
      //			System.out.println("negated class instances: " + negatedClassInstances);

      //			System.out.println(prec);
      //			System.out.println(rec);
      //			System.out.println(coverageFactor);

      // too weak: see F-measure above
      // => does not work for generalised F-measure, because even very general
      // concepts do not have a recall of 1
      //			if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) {
      //				return -1;
      //			}
      // we only return too weak if there is no recall
      if (rec <= 0.0000001) {
        return -1;
      }

      return getFMeasure(rec, prec);
    }

    throw new Error("ClassLearningProblem error: not implemented");
  }
예제 #3
0
  // instead of using the standard operation, we use optimisation
  // and approximation here
  public double getAccuracyOrTooWeakApprox(Description description, double noise) {
    if (heuristic.equals(HeuristicType.FMEASURE)) {
      // we abort when there are too many uncovered positives
      int maxNotCovered = (int) Math.ceil(noise * classInstances.size());
      int instancesCovered = 0;
      int instancesNotCovered = 0;

      for (Individual ind : classInstances) {
        if (getReasoner().hasType(description, ind)) {
          instancesCovered++;
        } else {
          instancesNotCovered++;
          if (instancesNotCovered > maxNotCovered) {
            return -1;
          }
        }
      }

      double recall = instancesCovered / (double) classInstances.size();

      int testsPerformed = 0;
      int instancesDescription = 0;

      for (Individual ind : superClassInstances) {

        if (getReasoner().hasType(description, ind)) {
          instancesDescription++;
        }
        testsPerformed++;

        // check whether approximation is sufficiently accurate
        double[] approx =
            Heuristics.getFScoreApproximation(
                instancesCovered,
                recall,
                coverageFactor,
                superClassInstances.size(),
                testsPerformed,
                instancesDescription);
        if (approx[1] < approxDelta) {
          return approx[0];
        }
      }

      // standard computation (no approximation)
      double precision = instancesCovered / (double) (instancesDescription + instancesCovered);
      //			if(instancesCovered + instancesDescription == 0) {
      //				precision = 0;
      //			}
      return Heuristics.getFScore(recall, precision, coverageFactor);

    } else if (heuristic.equals(HeuristicType.AMEASURE)) {
      // the F-MEASURE implementation is now separate (different optimisation
      // strategy)

      // we abort when there are too many uncovered positives
      int maxNotCovered = (int) Math.ceil(noise * classInstances.size());
      int instancesCovered = 0;
      int instancesNotCovered = 0;
      int total = 0;
      boolean estimatedA = false;

      double lowerBorderA = 0;
      int lowerEstimateA = 0;
      double upperBorderA = 1;
      int upperEstimateA = classInstances.size();

      for (Individual ind : classInstances) {
        if (getReasoner().hasType(description, ind)) {
          instancesCovered++;
        } else {
          instancesNotCovered++;
          if (instancesNotCovered > maxNotCovered) {
            return -1;
          }
        }

        // approximation step (starting after 10 tests)
        total = instancesCovered + instancesNotCovered;
        if (total > 10) {
          // compute confidence interval
          double p1 = p1(instancesCovered, total);
          double p2 = p3(p1, total);
          lowerBorderA = Math.max(0, p1 - p2);
          upperBorderA = Math.min(1, p1 + p2);
          double size = upperBorderA - lowerBorderA;
          // if the interval has a size smaller than 10%, we can be confident
          if (size < 2 * approxDelta) {
            // we have to distinguish the cases that the accuracy limit is
            // below, within, or above the limit and that the mean is below
            // or above the limit
            double mean = instancesCovered / (double) total;

            // we can estimate the best possible concept to reach with downward refinement
            // by setting precision to 1 and recall = mean stays as it is
            double optimumEstimate =
                heuristic.equals(HeuristicType.FMEASURE)
                    ? ((1 + Math.sqrt(coverageFactor)) * mean) / (Math.sqrt(coverageFactor) + 1)
                    : (coverageFactor * mean + 1) / (double) (coverageFactor + 1);

            // if the mean is greater than the required minimum, we can accept;
            // we also accept if the interval is small and close to the minimum
            // (worst case is to accept a few inaccurate descriptions)
            if (optimumEstimate > 1 - noise - 0.03) {
              //								|| (upperBorderA > mean && size < 0.03)) {
              instancesCovered = (int) (instancesCovered / (double) total * classInstances.size());
              upperEstimateA = (int) (upperBorderA * classInstances.size());
              lowerEstimateA = (int) (lowerBorderA * classInstances.size());
              estimatedA = true;
              break;
            }

            // reject only if the upper border is far away (we are very
            // certain not to lose a potential solution)
            //						if(upperBorderA + 0.1 < 1-noise) {
            double optimumEstimateUpperBorder =
                heuristic.equals(HeuristicType.FMEASURE)
                    ? ((1 + Math.sqrt(coverageFactor)) * (upperBorderA + 0.1))
                        / (Math.sqrt(coverageFactor) + 1)
                    : (coverageFactor * (upperBorderA + 0.1) + 1) / (double) (coverageFactor + 1);
            if (optimumEstimateUpperBorder < 1 - noise) {
              return -1;
            }
          }
        }
      }

      double recall = instancesCovered / (double) classInstances.size();

      //			MonitorFactory.add("estimatedA","count", estimatedA ? 1 : 0);
      //			MonitorFactory.add("aInstances","count", total);

      // we know that a definition candidate is always subclass of the
      // intersection of all super classes, so we test only the relevant instances
      // (leads to undesired effects for descriptions not following this rule,
      // but improves performance a lot);
      // for learning a superclass of a defined class, similar observations apply;

      int testsPerformed = 0;
      int instancesDescription = 0;
      //			boolean estimatedB = false;

      for (Individual ind : superClassInstances) {

        if (getReasoner().hasType(description, ind)) {
          instancesDescription++;
        }

        testsPerformed++;

        if (testsPerformed > 10) {

          // compute confidence interval
          double p1 = p1(instancesDescription, testsPerformed);
          double p2 = p3(p1, testsPerformed);
          double lowerBorder = Math.max(0, p1 - p2);
          double upperBorder = Math.min(1, p1 + p2);
          int lowerEstimate = (int) (lowerBorder * superClassInstances.size());
          int upperEstimate = (int) (upperBorder * superClassInstances.size());

          double size;
          if (estimatedA) {
            //						size = 1/(coverageFactor+1) * (coverageFactor * (upperBorderA-lowerBorderA) +
            // Math.sqrt(upperEstimateA/(upperEstimateA+lowerEstimate)) +
            // Math.sqrt(lowerEstimateA/(lowerEstimateA+upperEstimate)));
            size =
                heuristic.equals(HeuristicType.FMEASURE)
                    ? getFMeasure(
                            upperBorderA,
                            upperEstimateA / (double) (upperEstimateA + lowerEstimate))
                        - getFMeasure(
                            lowerBorderA,
                            lowerEstimateA / (double) (lowerEstimateA + upperEstimate))
                    : Heuristics.getAScore(
                            upperBorderA,
                            upperEstimateA / (double) (upperEstimateA + lowerEstimate),
                            coverageFactor)
                        - Heuristics.getAScore(
                            lowerBorderA,
                            lowerEstimateA / (double) (lowerEstimateA + upperEstimate),
                            coverageFactor);
          } else {
            //						size = 1/(coverageFactor+1) * (coverageFactor * coverage +
            // Math.sqrt(instancesCovered/(instancesCovered+lowerEstimate)) +
            // Math.sqrt(instancesCovered/(instancesCovered+upperEstimate)));
            size =
                heuristic.equals(HeuristicType.FMEASURE)
                    ? getFMeasure(
                            recall, instancesCovered / (double) (instancesCovered + lowerEstimate))
                        - getFMeasure(
                            recall, instancesCovered / (double) (instancesCovered + upperEstimate))
                    : Heuristics.getAScore(
                            recall,
                            instancesCovered / (double) (instancesCovered + lowerEstimate),
                            coverageFactor)
                        - Heuristics.getAScore(
                            recall,
                            instancesCovered / (double) (instancesCovered + upperEstimate),
                            coverageFactor);
          }

          if (size < 0.1) {
            //						System.out.println(instancesDescription + " of " + testsPerformed);
            //						System.out.println("interval from " + lowerEstimate + " to " + upperEstimate);
            //						System.out.println("size: " + size);

            //						estimatedB = true;
            // calculate total number of instances
            instancesDescription =
                (int) (instancesDescription / (double) testsPerformed * superClassInstances.size());
            break;
          }
        }
      }

      // since we measured/estimated accuracy only on instances outside A (superClassInstances
      // does not include instances of A), we need to add it in the denominator
      double precision = instancesCovered / (double) (instancesDescription + instancesCovered);
      if (instancesCovered + instancesDescription == 0) {
        precision = 0;
      }

      return heuristic.equals(HeuristicType.FMEASURE)
          ? getFMeasure(recall, precision)
          : Heuristics.getAScore(recall, precision, coverageFactor);

    } else if (heuristic.equals(HeuristicType.FMEASURE)) {
      int maxNotCovered = (int) Math.ceil(noise * classInstances.size());

      int notCoveredPos = 0;
      //			int notCoveredNeg = 0;

      int posClassifiedAsPos = 0;
      int negClassifiedAsNeg = 0;

      int nrOfPosChecks = 0;
      int nrOfNegChecks = 0;

      // special case: we test positive and negative examples in turn
      Iterator<Individual> itPos = classInstances.iterator();
      Iterator<Individual> itNeg = superClassInstances.iterator();

      do {
        // in each loop we pick 0 or 1 positives and 0 or 1 negative
        // and classify it

        if (itPos.hasNext()) {
          Individual posExample = itPos.next();
          //					System.out.println(posExample);

          if (getReasoner().hasType(description, posExample)) {
            posClassifiedAsPos++;
          } else {
            notCoveredPos++;
          }
          nrOfPosChecks++;

          // take noise into account
          if (notCoveredPos > maxNotCovered) {
            return -1;
          }
        }

        if (itNeg.hasNext()) {
          Individual negExample = itNeg.next();
          if (!getReasoner().hasType(description, negExample)) {
            negClassifiedAsNeg++;
          }
          nrOfNegChecks++;
        }

        // compute how accurate our current approximation is and return if it is sufficiently
        // accurate
        double approx[] =
            Heuristics.getPredAccApproximation(
                classInstances.size(),
                superClassInstances.size(),
                1,
                nrOfPosChecks,
                posClassifiedAsPos,
                nrOfNegChecks,
                negClassifiedAsNeg);
        if (approx[1] < approxDelta) {
          //					System.out.println(approx[0]);
          return approx[0];
        }

      } while (itPos.hasNext() || itNeg.hasNext());

      double ret =
          Heuristics.getPredictiveAccuracy(
              classInstances.size(),
              superClassInstances.size(),
              posClassifiedAsPos,
              negClassifiedAsNeg,
              1);
      return ret;
    } else {
      throw new Error("Approximation for " + heuristic + " not implemented.");
    }
  }
예제 #4
0
  @Override
  public ClassScore computeScore(Description description) {

    // TODO: reuse code to ensure that we never return inconsistent results
    // between getAccuracy, getAccuracyOrTooWeak and computeScore

    // overhang
    Set<Individual> additionalInstances = new TreeSet<Individual>();
    for (Individual ind : superClassInstances) {
      if (getReasoner().hasType(description, ind)) {
        additionalInstances.add(ind);
      }
    }

    // coverage
    Set<Individual> coveredInstances = new TreeSet<Individual>();
    for (Individual ind : classInstances) {
      if (getReasoner().hasType(description, ind)) {
        coveredInstances.add(ind);
      }
    }

    double recall = coveredInstances.size() / (double) classInstances.size();
    double precision =
        (additionalInstances.size() + coveredInstances.size() == 0)
            ? 0
            : coveredInstances.size()
                / (double) (coveredInstances.size() + additionalInstances.size());
    // for each description with less than 100% coverage, we check whether it is
    // leads to an inconsistent knowledge base

    double acc = 0;
    if (heuristic.equals(HeuristicType.FMEASURE)) {
      acc = getFMeasure(recall, precision);
    } else if (heuristic.equals(HeuristicType.AMEASURE)) {
      acc = Heuristics.getAScore(recall, precision, coverageFactor);
    } else {
      // TODO: some superfluous instance checks are required to compute accuracy =>
      // move accuracy computation here if possible
      acc = getAccuracyOrTooWeakExact(description, 1);
    }

    if (checkConsistency) {

      // we check whether the axiom already follows from the knowledge base
      //			boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe);

      //			boolean followsFromKB = equivalence ? reasoner.isEquivalentClass(description,
      // classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe);
      boolean followsFromKB = followsFromKB(description);

      // workaround due to a bug (see
      // http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319)
      //			boolean isConsistent = coverage >= 0.999999 || isConsistent(description);
      // (if the axiom follows, then the knowledge base remains consistent)
      boolean isConsistent = followsFromKB || isConsistent(description);

      //			double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage,
      // protusion);
      return new ClassScore(
          coveredInstances,
          Helper.difference(classInstancesSet, coveredInstances),
          recall,
          additionalInstances,
          precision,
          acc,
          isConsistent,
          followsFromKB);

    } else {
      return new ClassScore(
          coveredInstances,
          Helper.difference(classInstancesSet, coveredInstances),
          recall,
          additionalInstances,
          precision,
          acc);
    }
  }
예제 #5
0
  // exact computation for 5 heuristics; each one adapted to super class learning;
  // each one takes the noise parameter into account
  public double getAccuracyOrTooWeakExact(OWLClassExpression description, double noise) {
    // System.out.println(description);
    nanoStartTime = System.nanoTime();

    if (heuristic.equals(HeuristicType.JACCARD)) {

      // computing R(A)
      TreeSet<OWLIndividual> coveredInstancesSet = new TreeSet<OWLIndividual>();
      for (OWLIndividual ind : classInstances) {
        if (getReasoner().hasType(description, ind)) {
          coveredInstancesSet.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      // if even the optimal case (no additional instances covered) is not sufficient,
      // the concept is too weak
      if (coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) {
        return -1;
      }

      // computing R(C) restricted to relevant instances
      TreeSet<OWLIndividual> additionalInstancesSet = new TreeSet<OWLIndividual>();
      for (OWLIndividual ind : superClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          additionalInstancesSet.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      Set<OWLIndividual> union = Helper.union(classInstancesSet, additionalInstancesSet);
      return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size());

    } else if (heuristic.equals(HeuristicType.AMEASURE)
        || heuristic.equals(HeuristicType.FMEASURE)
        || heuristic.equals(HeuristicType.PRED_ACC)) {

      int additionalInstances = 0;
      int coveredInstances = 0;

      if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) {
        // R(C)
        String query =
            "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {"
                + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . "
                + converter.convert("?s", description)
                + "FILTER NOT EXISTS {?s a ?classToDescribe}}";
        ParameterizedSparqlString template = new ParameterizedSparqlString(query);
        // System.err.println(converter.convert("?s", description));
        // template.setIri("cls", description.asOWLClass().toStringID());
        template.setIri("classToDescribe", classToDescribe.toStringID());

        QueryExecution qe =
            ((SPARQLReasoner) reasoner)
                .getQueryExecutionFactory()
                .createQueryExecution(template.toString());
        additionalInstances = qe.execSelect().next().getLiteral("cnt").getInt();

        // R(A)
        OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description);
        coveredInstances = ((SPARQLReasoner) reasoner).getPopularityOf(ce);

        // System.out.println(coveredInstances);
        // System.out.println(additionalInstances);
      } else {
        // computing R(C) restricted to relevant instances
        if (useInstanceChecks) {
          for (OWLIndividual ind : superClassInstances) {
            if (getReasoner().hasType(description, ind)) {
              additionalInstances++;
            }
            if (terminationTimeExpired()) {
              return 0;
            }
          }
        } else {
          SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description);
          individuals.retainAll(superClassInstances);
          additionalInstances = individuals.size();
        }

        // computing R(A)
        if (useInstanceChecks) {
          for (OWLIndividual ind : classInstances) {
            if (getReasoner().hasType(description, ind)) {
              coveredInstances++;
            }
            if (terminationTimeExpired()) {
              return 0;
            }
          }
        } else {
          SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description);
          individuals.retainAll(classInstances);
          coveredInstances = individuals.size();
        }
      }

      //			System.out.println(description + ":" + coveredInstances + "/" + classInstances.size());
      double recall = coveredInstances / (double) classInstances.size();

      // noise computation is incorrect
      //			if(recall < 1 - noise) {
      //				return -1;
      //			}

      double precision =
          (additionalInstances + coveredInstances == 0)
              ? 0
              : coveredInstances / (double) (coveredInstances + additionalInstances);

      if (heuristic.equals(HeuristicType.AMEASURE)) {
        // best reachable concept has same recall and precision 1:
        // 1/t+1 * (t*r + 1)
        if ((coverageFactor * recall + 1) / (coverageFactor + 1) < (1 - noise)) {
          return -1;
        } else {
          return Heuristics.getAScore(recall, precision, coverageFactor);
        }
      } else if (heuristic.equals(HeuristicType.FMEASURE)) {
        // best reachable concept has same recall and precision 1:
        if (((1 + Math.sqrt(coverageFactor)) * recall) / (Math.sqrt(coverageFactor) + 1)
            < 1 - noise) {
          return -1;
        } else {
          return Heuristics.getFScore(recall, precision, coverageFactor);
        }
      } else if (heuristic.equals(HeuristicType.PRED_ACC)) {
        if ((coverageFactor * coveredInstances + superClassInstances.size())
                / (coverageFactor * classInstances.size() + superClassInstances.size())
            < 1 - noise) {
          return -1;
        } else {
          // correctly classified divided by all examples
          return (coverageFactor * coveredInstances
                  + superClassInstances.size()
                  - additionalInstances)
              / (coverageFactor * classInstances.size() + superClassInstances.size());
        }
      }

      //			return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) :
      // getAccuracy(recall, precision);
    } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) {

      // implementation is based on:
      // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf
      // default negation should be turned off when using fast instance checker

      // compute I_C (negated and non-negated concepts separately)
      TreeSet<OWLIndividual> icPos = new TreeSet<OWLIndividual>();
      TreeSet<OWLIndividual> icNeg = new TreeSet<OWLIndividual>();
      OWLClassExpression descriptionNeg = df.getOWLObjectComplementOf(description);
      // loop through all relevant instances
      for (OWLIndividual ind : classAndSuperClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          icPos.add(ind);
        } else if (getReasoner().hasType(descriptionNeg, ind)) {
          icNeg.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      // semantic precision
      // first compute I_C \cap Cn(DC)
      // it seems that in our setting, we can ignore Cn, because the examples (class instances)
      // are already part of the background knowledge
      Set<OWLIndividual> tmp1Pos = Helper.intersection(icPos, classInstancesSet);
      Set<OWLIndividual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances);
      int tmp1Size = tmp1Pos.size() + tmp1Neg.size();

      // Cn(I_C) \cap D_C is the same set if we ignore Cn ...

      int icSize = icPos.size() + icNeg.size();
      double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize;
      double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size());

      //			System.out.println(description);

      //			System.out.println("I_C pos: " + icPos);
      //			System.out.println("I_C neg: " + icNeg);
      //			System.out.println("class instances: " + classInstances);
      //			System.out.println("negated class instances: " + negatedClassInstances);

      //			System.out.println(prec);
      //			System.out.println(rec);
      //			System.out.println(coverageFactor);

      // too weak: see F-measure above
      // => does not work for generalised F-measure, because even very general
      // concepts do not have a recall of 1
      //			if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) {
      //				return -1;
      //			}
      // we only return too weak if there is no recall
      if (rec <= 0.0000001) {
        return -1;
      }

      return getFMeasure(rec, prec);
    }

    throw new Error("ClassLearningProblem error: not implemented");
  }
예제 #6
0
  @Override
  public ClassScore computeScore(OWLClassExpression description, double noise) {

    // TODO: reuse code to ensure that we never return inconsistent results
    // between getAccuracy, getAccuracyOrTooWeak and computeScore
    Set<OWLIndividual> additionalInstances = new TreeSet<OWLIndividual>();
    Set<OWLIndividual> coveredInstances = new TreeSet<OWLIndividual>();

    int additionalInstancesCnt = 0;
    int coveredInstancesCnt = 0;

    if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) {
      // R(C)
      String query =
          "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {"
              + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . "
              + converter.convert("?s", description)
              + "FILTER NOT EXISTS {?s a ?classToDescribe}}";
      ParameterizedSparqlString template = new ParameterizedSparqlString(query);
      // System.err.println(converter.convert("?s", description));
      // template.setIri("cls", description.asOWLClass().toStringID());
      template.setIri("classToDescribe", classToDescribe.toStringID());

      QueryExecution qe =
          ((SPARQLReasoner) reasoner)
              .getQueryExecutionFactory()
              .createQueryExecution(template.toString());
      additionalInstancesCnt = qe.execSelect().next().getLiteral("cnt").getInt();

      // R(A)
      OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description);
      coveredInstancesCnt = ((SPARQLReasoner) reasoner).getPopularityOf(ce);
    } else {
      // overhang
      for (OWLIndividual ind : superClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          additionalInstances.add(ind);
        }
      }

      // coverage
      for (OWLIndividual ind : classInstances) {
        if (getReasoner().hasType(description, ind)) {
          coveredInstances.add(ind);
        }
      }
      additionalInstancesCnt = additionalInstances.size();
      coveredInstancesCnt = coveredInstances.size();
    }

    double recall = coveredInstancesCnt / (double) classInstances.size();
    double precision =
        (additionalInstancesCnt + coveredInstancesCnt == 0)
            ? 0
            : coveredInstancesCnt / (double) (coveredInstancesCnt + additionalInstancesCnt);
    // for each OWLClassExpression with less than 100% coverage, we check whether it is
    // leads to an inconsistent knowledge base

    double acc = 0;
    if (heuristic.equals(HeuristicType.FMEASURE)) {
      acc = Heuristics.getFScore(recall, precision, coverageFactor);
    } else if (heuristic.equals(HeuristicType.AMEASURE)) {
      acc = Heuristics.getAScore(recall, precision, coverageFactor);
    } else {
      // TODO: some superfluous instance checks are required to compute accuracy =>
      // move accuracy computation here if possible
      acc = getAccuracyOrTooWeakExact(description, noise);
    }

    if (checkConsistency) {

      // we check whether the axiom already follows from the knowledge base
      //			boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe);

      //			boolean followsFromKB = equivalence ? reasoner.isEquivalentClass(description,
      // classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe);
      boolean followsFromKB = followsFromKB(description);

      // workaround due to a bug (see
      // http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319)
      //			boolean isConsistent = coverage >= 0.999999 || isConsistent(description);
      // (if the axiom follows, then the knowledge base remains consistent)
      boolean isConsistent = followsFromKB || isConsistent(description);

      //			double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage,
      // protusion);
      return new ClassScore(
          coveredInstances,
          Helper.difference(classInstancesSet, coveredInstances),
          recall,
          additionalInstances,
          precision,
          acc,
          isConsistent,
          followsFromKB);

    } else {
      return new ClassScore(
          coveredInstances,
          Helper.difference(classInstancesSet, coveredInstances),
          recall,
          additionalInstances,
          precision,
          acc);
    }
  }
예제 #7
0
 public int getEstimatedTotalCost(final T p_target, final Heuristics<T> p_heuristics) {
   return (m_costAndState >>> 2) + p_heuristics.estimateCost((T) this, p_target);
 }