private void generateSample() {
    logger.info("Generating sample...");
    sample = ModelFactory.createDefaultModel();

    // we have to set up a new query execution factory working on our local model
    qef = new QueryExecutionFactoryModel(sample);
    reasoner = new SPARQLReasoner(qef);

    // get the page size
    // TODO put to base class
    long pageSize = 10000; // PaginationUtils.adjustPageSize(globalQef, 10000);

    ParameterizedSparqlString sampleQueryTemplate = getSampleQuery();
    sampleQueryTemplate.setIri("p", entityToDescribe.toStringID());
    Query query = sampleQueryTemplate.asQuery();
    query.setLimit(pageSize);

    boolean isEmpty = false;
    int i = 0;
    while (!isTimeout() && !isEmpty) {
      // get next sample
      logger.debug("Extending sample...");
      query.setOffset(i++ * pageSize);
      QueryExecution qe = ksQef.createQueryExecution(query);
      Model tmp = qe.execConstruct();
      sample.add(tmp);

      // if last call returned empty model, we can leave loop
      isEmpty = tmp.isEmpty();
    }
    logger.info("...done. Sample size: " + sample.size() + " triples");
  }
Пример #2
0
  private Model execute(Model inputModel, String endpoint) {
    Model cube = createModel();
    Resource dataset;
    Calendar calendar = Calendar.getInstance(TimeZone.getDefault());
    dataset =
        cube.createResource(
            GK.uri + "Properties_per_Class" + calendar.getTimeInMillis(), QB.Dataset);
    dataset.addLiteral(RDFS.comment, "Properties per class");
    dataset.addLiteral(DCTerms.date, cube.createTypedLiteral(calendar));
    dataset.addLiteral(DCTerms.publisher, "R & D, Unister GmbH, Geoknow");
    dataset.addProperty(QB.structure, cube.createResource(STRUCTURE));

    QueryExecution qExec;
    if (inputModel != null) {
      qExec = QueryExecutionFactory.create(INSTANCES, inputModel);
    } else {
      qExec =
          QueryExecutionFactory.sparqlService(endpoint, INSTANCES, defaultGraphs, defaultGraphs);
    }
    ResultSet result = qExec.execSelect();
    int i = 0;
    while (result.hasNext()) {
      Resource owlClass = result.next().getResource("class");
      NUMBER_OF_PROPERTIES.setIri("class", owlClass.getURI());
      QueryExecution propertiesQexec;
      if (inputModel != null) {
        propertiesQexec = QueryExecutionFactory.create(NUMBER_OF_PROPERTIES.asQuery(), inputModel);
      } else {
        propertiesQexec =
            QueryExecutionFactory.sparqlService(
                endpoint, NUMBER_OF_PROPERTIES.asQuery(), defaultGraphs, defaultGraphs);
        System.out.println(NUMBER_OF_PROPERTIES.asQuery());
      }
      try {
        ResultSet propertiesResult = propertiesQexec.execSelect();
        if (propertiesResult.hasNext()) {
          System.out.println(i);
          Resource obs =
              cube.createResource(
                  "http://www.geoknow.eu/data-cube/metric2/observation" + i, QB.Observation);
          obs.addProperty(QB.dataset, dataset);
          obs.addProperty(GK.DIM.Class, owlClass);
          obs.addLiteral(GK.MEASURE.PropertyCount, propertiesResult.next().getLiteral("count"));

          i++;
        }
      } catch (Exception e) {
        System.out.println(i);
        Resource obs =
            cube.createResource(
                "http://www.geoknow.eu/data-cube/metric2/observation" + i, QB.Observation);
        obs.addProperty(QB.dataset, dataset);
        obs.addProperty(GK.DIM.Class, owlClass);
        obs.addLiteral(GK.MEASURE.PropertyCount, -1);
        obs.addLiteral(RDFS.comment, e.getMessage());
        i++;
      }
    }
    return cube;
  }
Пример #3
0
  private Model execute(Model inputModel, String endpoint) {
    Model cube = createModel();

    Resource dataset;
    Calendar calendar = Calendar.getInstance(TimeZone.getDefault());
    dataset = cube.createResource(GK.uri + "Average_Surface", QB.Dataset);
    dataset.addLiteral(RDFS.comment, "Average Surface per class");
    dataset.addLiteral(DCTerms.date, cube.createTypedLiteral(calendar));
    dataset.addLiteral(DCTerms.publisher, "R & D, Unister GmbH, Geoknow");
    dataset.addProperty(QB.structure, cube.createResource(STRUCTURE));
    if (endpoint != null) {
      dataset.addProperty(DCTerms.source, endpoint);
    }

    QueryExecution qExec;
    if (inputModel != null) {
      qExec = QueryExecutionFactory.create(GET_CLASSES, inputModel);
    } else {
      qExec =
          QueryExecutionFactory.sparqlService(endpoint, GET_CLASSES, defaultGraphs, defaultGraphs);
    }
    ResultSet result = qExec.execSelect();
    int obsCount = 0;
    while (result.hasNext()) {

      double area = 0;
      int i = 0;
      Resource owlClass = result.next().get("class").asResource();

      if (!blacklist.contains(owlClass.toString())) {

        System.out.println(owlClass);
        GET_INSTANCES.setIri("class", owlClass.getURI());

        QueryExecution qexecInstances;
        if (inputModel != null) {
          qexecInstances = QueryExecutionFactory.create(GET_INSTANCES.asQuery(), inputModel);
        } else {
          qexecInstances =
              QueryExecutionFactory.sparqlService(
                  endpoint, GET_INSTANCES.asQuery(), defaultGraphs, defaultGraphs);
        }
        for (ResultSet instancesResult = qexecInstances.execSelect(); instancesResult.hasNext(); ) {

          QuerySolution next = instancesResult.next();
          String instance = next.get("instance").asResource().getURI();
          if (instance == null) {
            continue;
          }
          POLYGON.setIri("instance", instance);
          QueryExecution qexecMember;
          if (inputModel != null) {
            qexecMember = QueryExecutionFactory.create(POLYGON.asQuery(), inputModel);
          } else {
            qexecMember =
                QueryExecutionFactory.sparqlService(
                    endpoint, POLYGON.asQuery(), defaultGraphs, defaultGraphs);
          }
          StringBuilder polygonBuilder = new StringBuilder();
          firstLat = null;
          firstLong = null;
          for (ResultSet latLong = qexecMember.execSelect(); latLong.hasNext(); ) {
            processPoint(latLong.next(), polygonBuilder);
          }
          if (polygonBuilder.length() > 0) {
            area += calculateArea(polygonBuilder);
          } else {
            area = 0;
            polygonBuilder.setLength(0);
            this.firstLat = null;
            this.firstLong = null;
            MULTI_POLYGON.setIri("instance", instance);
            QueryExecution qexecMultiPolygon;
            if (inputModel != null) {
              qexecMultiPolygon = QueryExecutionFactory.create(MULTI_POLYGON.asQuery(), inputModel);
            } else {
              qexecMultiPolygon =
                  QueryExecutionFactory.sparqlService(
                      endpoint, MULTI_POLYGON.asQuery(), defaultGraphs, defaultGraphs);
            }
            String polygonName = "";
            for (ResultSet latLong = qexecMultiPolygon.execSelect(); latLong.hasNext(); ) {
              QuerySolution solution = latLong.next();
              if (!polygonName.equals(solution.get("polygon").asNode().getBlankNodeLabel())) {
                if (polygonBuilder.length() > 0) {
                  area += calculateArea(polygonBuilder);
                }
                this.firstLat = null;
                this.firstLong = null;
                polygonBuilder.setLength(0);
              }
              polygonName = solution.get("polygon").asNode().getBlankNodeLabel();
              processPoint(solution, polygonBuilder);
            }
          }
          i++;
        }
      }
      Resource obs = cube.createResource(structureUri + "/obs/" + obsCount, QB.Observation);
      double average = i == 0 ? 0 : area / i;
      obs.addProperty(GK.MEASURE.Average, cube.createTypedLiteral(average));
      obs.addProperty(GK.DIM.Class, owlClass);
      obs.addProperty(QB.dataset, dataset);
      obsCount++;
    }
    return cube;
  }
Пример #4
0
  // exact computation for 5 heuristics; each one adapted to super class learning;
  // each one takes the noise parameter into account
  public double getAccuracyOrTooWeakExact(OWLClassExpression description, double noise) {
    // System.out.println(description);
    nanoStartTime = System.nanoTime();

    if (heuristic.equals(HeuristicType.JACCARD)) {

      // computing R(A)
      TreeSet<OWLIndividual> coveredInstancesSet = new TreeSet<OWLIndividual>();
      for (OWLIndividual ind : classInstances) {
        if (getReasoner().hasType(description, ind)) {
          coveredInstancesSet.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      // if even the optimal case (no additional instances covered) is not sufficient,
      // the concept is too weak
      if (coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) {
        return -1;
      }

      // computing R(C) restricted to relevant instances
      TreeSet<OWLIndividual> additionalInstancesSet = new TreeSet<OWLIndividual>();
      for (OWLIndividual ind : superClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          additionalInstancesSet.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      Set<OWLIndividual> union = Helper.union(classInstancesSet, additionalInstancesSet);
      return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size());

    } else if (heuristic.equals(HeuristicType.AMEASURE)
        || heuristic.equals(HeuristicType.FMEASURE)
        || heuristic.equals(HeuristicType.PRED_ACC)) {

      int additionalInstances = 0;
      int coveredInstances = 0;

      if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) {
        // R(C)
        String query =
            "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {"
                + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . "
                + converter.convert("?s", description)
                + "FILTER NOT EXISTS {?s a ?classToDescribe}}";
        ParameterizedSparqlString template = new ParameterizedSparqlString(query);
        // System.err.println(converter.convert("?s", description));
        // template.setIri("cls", description.asOWLClass().toStringID());
        template.setIri("classToDescribe", classToDescribe.toStringID());

        QueryExecution qe =
            ((SPARQLReasoner) reasoner)
                .getQueryExecutionFactory()
                .createQueryExecution(template.toString());
        additionalInstances = qe.execSelect().next().getLiteral("cnt").getInt();

        // R(A)
        OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description);
        coveredInstances = ((SPARQLReasoner) reasoner).getPopularityOf(ce);

        // System.out.println(coveredInstances);
        // System.out.println(additionalInstances);
      } else {
        // computing R(C) restricted to relevant instances
        if (useInstanceChecks) {
          for (OWLIndividual ind : superClassInstances) {
            if (getReasoner().hasType(description, ind)) {
              additionalInstances++;
            }
            if (terminationTimeExpired()) {
              return 0;
            }
          }
        } else {
          SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description);
          individuals.retainAll(superClassInstances);
          additionalInstances = individuals.size();
        }

        // computing R(A)
        if (useInstanceChecks) {
          for (OWLIndividual ind : classInstances) {
            if (getReasoner().hasType(description, ind)) {
              coveredInstances++;
            }
            if (terminationTimeExpired()) {
              return 0;
            }
          }
        } else {
          SortedSet<OWLIndividual> individuals = getReasoner().getIndividuals(description);
          individuals.retainAll(classInstances);
          coveredInstances = individuals.size();
        }
      }

      //			System.out.println(description + ":" + coveredInstances + "/" + classInstances.size());
      double recall = coveredInstances / (double) classInstances.size();

      // noise computation is incorrect
      //			if(recall < 1 - noise) {
      //				return -1;
      //			}

      double precision =
          (additionalInstances + coveredInstances == 0)
              ? 0
              : coveredInstances / (double) (coveredInstances + additionalInstances);

      if (heuristic.equals(HeuristicType.AMEASURE)) {
        // best reachable concept has same recall and precision 1:
        // 1/t+1 * (t*r + 1)
        if ((coverageFactor * recall + 1) / (coverageFactor + 1) < (1 - noise)) {
          return -1;
        } else {
          return Heuristics.getAScore(recall, precision, coverageFactor);
        }
      } else if (heuristic.equals(HeuristicType.FMEASURE)) {
        // best reachable concept has same recall and precision 1:
        if (((1 + Math.sqrt(coverageFactor)) * recall) / (Math.sqrt(coverageFactor) + 1)
            < 1 - noise) {
          return -1;
        } else {
          return Heuristics.getFScore(recall, precision, coverageFactor);
        }
      } else if (heuristic.equals(HeuristicType.PRED_ACC)) {
        if ((coverageFactor * coveredInstances + superClassInstances.size())
                / (coverageFactor * classInstances.size() + superClassInstances.size())
            < 1 - noise) {
          return -1;
        } else {
          // correctly classified divided by all examples
          return (coverageFactor * coveredInstances
                  + superClassInstances.size()
                  - additionalInstances)
              / (coverageFactor * classInstances.size() + superClassInstances.size());
        }
      }

      //			return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) :
      // getAccuracy(recall, precision);
    } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) {

      // implementation is based on:
      // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf
      // default negation should be turned off when using fast instance checker

      // compute I_C (negated and non-negated concepts separately)
      TreeSet<OWLIndividual> icPos = new TreeSet<OWLIndividual>();
      TreeSet<OWLIndividual> icNeg = new TreeSet<OWLIndividual>();
      OWLClassExpression descriptionNeg = df.getOWLObjectComplementOf(description);
      // loop through all relevant instances
      for (OWLIndividual ind : classAndSuperClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          icPos.add(ind);
        } else if (getReasoner().hasType(descriptionNeg, ind)) {
          icNeg.add(ind);
        }
        if (terminationTimeExpired()) {
          return 0;
        }
      }

      // semantic precision
      // first compute I_C \cap Cn(DC)
      // it seems that in our setting, we can ignore Cn, because the examples (class instances)
      // are already part of the background knowledge
      Set<OWLIndividual> tmp1Pos = Helper.intersection(icPos, classInstancesSet);
      Set<OWLIndividual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances);
      int tmp1Size = tmp1Pos.size() + tmp1Neg.size();

      // Cn(I_C) \cap D_C is the same set if we ignore Cn ...

      int icSize = icPos.size() + icNeg.size();
      double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize;
      double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size());

      //			System.out.println(description);

      //			System.out.println("I_C pos: " + icPos);
      //			System.out.println("I_C neg: " + icNeg);
      //			System.out.println("class instances: " + classInstances);
      //			System.out.println("negated class instances: " + negatedClassInstances);

      //			System.out.println(prec);
      //			System.out.println(rec);
      //			System.out.println(coverageFactor);

      // too weak: see F-measure above
      // => does not work for generalised F-measure, because even very general
      // concepts do not have a recall of 1
      //			if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) {
      //				return -1;
      //			}
      // we only return too weak if there is no recall
      if (rec <= 0.0000001) {
        return -1;
      }

      return getFMeasure(rec, prec);
    }

    throw new Error("ClassLearningProblem error: not implemented");
  }
Пример #5
0
  @Override
  public ClassScore computeScore(OWLClassExpression description, double noise) {

    // TODO: reuse code to ensure that we never return inconsistent results
    // between getAccuracy, getAccuracyOrTooWeak and computeScore
    Set<OWLIndividual> additionalInstances = new TreeSet<OWLIndividual>();
    Set<OWLIndividual> coveredInstances = new TreeSet<OWLIndividual>();

    int additionalInstancesCnt = 0;
    int coveredInstancesCnt = 0;

    if (reasoner.getClass().isAssignableFrom(SPARQLReasoner.class)) {
      // R(C)
      String query =
          "SELECT (COUNT(DISTINCT(?s)) AS ?cnt) WHERE {"
              + "?s a ?sup . ?classToDescribe <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?sup . "
              + converter.convert("?s", description)
              + "FILTER NOT EXISTS {?s a ?classToDescribe}}";
      ParameterizedSparqlString template = new ParameterizedSparqlString(query);
      // System.err.println(converter.convert("?s", description));
      // template.setIri("cls", description.asOWLClass().toStringID());
      template.setIri("classToDescribe", classToDescribe.toStringID());

      QueryExecution qe =
          ((SPARQLReasoner) reasoner)
              .getQueryExecutionFactory()
              .createQueryExecution(template.toString());
      additionalInstancesCnt = qe.execSelect().next().getLiteral("cnt").getInt();

      // R(A)
      OWLObjectIntersectionOf ce = df.getOWLObjectIntersectionOf(classToDescribe, description);
      coveredInstancesCnt = ((SPARQLReasoner) reasoner).getPopularityOf(ce);
    } else {
      // overhang
      for (OWLIndividual ind : superClassInstances) {
        if (getReasoner().hasType(description, ind)) {
          additionalInstances.add(ind);
        }
      }

      // coverage
      for (OWLIndividual ind : classInstances) {
        if (getReasoner().hasType(description, ind)) {
          coveredInstances.add(ind);
        }
      }
      additionalInstancesCnt = additionalInstances.size();
      coveredInstancesCnt = coveredInstances.size();
    }

    double recall = coveredInstancesCnt / (double) classInstances.size();
    double precision =
        (additionalInstancesCnt + coveredInstancesCnt == 0)
            ? 0
            : coveredInstancesCnt / (double) (coveredInstancesCnt + additionalInstancesCnt);
    // for each OWLClassExpression with less than 100% coverage, we check whether it is
    // leads to an inconsistent knowledge base

    double acc = 0;
    if (heuristic.equals(HeuristicType.FMEASURE)) {
      acc = Heuristics.getFScore(recall, precision, coverageFactor);
    } else if (heuristic.equals(HeuristicType.AMEASURE)) {
      acc = Heuristics.getAScore(recall, precision, coverageFactor);
    } else {
      // TODO: some superfluous instance checks are required to compute accuracy =>
      // move accuracy computation here if possible
      acc = getAccuracyOrTooWeakExact(description, noise);
    }

    if (checkConsistency) {

      // we check whether the axiom already follows from the knowledge base
      //			boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe);

      //			boolean followsFromKB = equivalence ? reasoner.isEquivalentClass(description,
      // classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe);
      boolean followsFromKB = followsFromKB(description);

      // workaround due to a bug (see
      // http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319)
      //			boolean isConsistent = coverage >= 0.999999 || isConsistent(description);
      // (if the axiom follows, then the knowledge base remains consistent)
      boolean isConsistent = followsFromKB || isConsistent(description);

      //			double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage,
      // protusion);
      return new ClassScore(
          coveredInstances,
          Helper.difference(classInstancesSet, coveredInstances),
          recall,
          additionalInstances,
          precision,
          acc,
          isConsistent,
          followsFromKB);

    } else {
      return new ClassScore(
          coveredInstances,
          Helper.difference(classInstancesSet, coveredInstances),
          recall,
          additionalInstances,
          precision,
          acc);
    }
  }