コード例 #1
0
  protected Triple getSampleFromEndpoint(
      SPARQLEndpoint endpoint, Triple triplePattern, long sampleIndex)
      throws SADIException, IOException, NoSampleAvailableException {
    log.trace(
        "retrieving triple #"
            + sampleIndex
            + " for "
            + triplePattern
            + " from "
            + endpoint.getURI());

    Query query =
        getConstructQuery(
            triplePattern); // SPARQLStringUtils.getConstructQuery(Collections.singletonList(triplePattern), Collections.singletonList(triplePattern));
    query.setOffset(sampleIndex);
    query.setLimit(1);

    log.trace(String.format("sample query: %s", query.serialize()));

    Collection<Triple> triples = endpoint.constructQuery(query.serialize());

    if (triples.size() == 0) {
      throw new RuntimeException("triple #" + sampleIndex + " doesn't exists in " + endpoint);
    }

    Triple triple = triples.iterator().next();

    // Sanity check. If the index is incomplete or out of date, the sample triple may not satisfy
    // the predicate list / regular expressions for the endpoint.  The simplest thing to do in
    // this case is to fail and take another sample.

    if (getRegistry() != null
        && !getRegistry().findEndpointsByTriplePattern(triple).contains(endpoint)) {
      throw new NoSampleAvailableException(
          "sample triple does not match the regular expressions for "
              + endpoint.getURI()
              + " (from which it was sampled!)");
    }

    return triple;
  }
コード例 #2
0
  public Triple getSampleFromAnyEndpoint(Triple triplePattern)
      throws SADIException, IOException, NoSampleAvailableException, ExceededMaxAttemptsException {
    List<SPARQLEndpoint> endpoints =
        new ArrayList<SPARQLEndpoint>(getRegistry().findEndpointsByTriplePattern(triplePattern));

    RandomDataImpl generator = new RandomDataImpl();
    int attempts = 0;
    SPARQLEndpoint endpoint = null;

    while (attempts < MAX_ATTEMPTS) {

      if (endpoints.size() == 0)
        throw new NoSampleAvailableException(
            "there are no triples matching "
                + triplePattern
                + " in the data (without blank nodes)");

      int endpointIndex = endpoints.size() > 1 ? generator.nextInt(0, endpoints.size() - 1) : 0;
      endpoint = endpoints.get(endpointIndex);

      if (getRegistry().getServiceStatus(endpoint.getURI()) == ServiceStatus.DEAD) {
        endpoints.remove(endpointIndex);
        continue;
      }

      try {
        return getSampleFromEndpoint(endpoint, triplePattern);
      } catch (NoSampleAvailableException e) {
        log.warn(String.format("failed to retrieve sample from %s", endpoint), e);
      } catch (IOException e) {
        log.warn(String.format("failed to retrieve sample from %s", endpoint), e);
      }

      endpoints.remove(endpointIndex);
      attempts++;
    }

    throw new ExceededMaxAttemptsException(
        "exceeded "
            + MAX_ATTEMPTS
            + " attempts when trying to retrieve triples matching "
            + triplePattern);
  }
コード例 #3
0
  protected long getUpperSampleLimit(SPARQLEndpoint endpoint, Triple triplePattern)
      throws SADIException, IOException {
    String uri = endpoint.getURI();

    log.trace("determining number of triples matching " + triplePattern + " in " + uri);

    ServiceStatus status = ServiceStatus.OK;
    if (getRegistry() != null) {
      status = getRegistry().getServiceStatus(endpoint.getURI());
    }

    // check for a cached value first
    if (upperSampleLimitCache.contains(endpoint, triplePattern)) {
      log.trace("using previously cached value for upper sample limit");
      return upperSampleLimitCache.get(endpoint, triplePattern);
    }

    ElementGroup whereClause = getWhereClauseWithBlankNodeFilter(triplePattern);

    /*
    Node s = triplePattern.getSubject();
    Node o = triplePattern.getObject();

    // Build a Jena representation of the WHERE clause

    ElementGroup whereClause = new ElementGroup();
    whereClause.addTriplePattern(triplePattern);
    if(s.isVariable()) {
    	whereClause.addElementFilter(new ElementFilter(new E_LogicalNot(new E_IsBlank(new ExprVar(s)))));
    }
    if(o.isVariable()) {
    	whereClause.addElementFilter(new ElementFilter(new E_LogicalNot(new E_IsBlank(new ExprVar(o)))));
    }
    */

    if (status != ServiceStatus.SLOW) {
      try {
        // issue a SELECT COUNT(*) query
        Query countStarQuery = new Query();
        countStarQuery.setQuerySelectType();
        countStarQuery.setQueryPattern(whereClause);
        countStarQuery.addResultVar(countStarQuery.allocAggregate(AggCount.get()));

        List<Map<String, String>> results = endpoint.selectQuery(countStarQuery.serialize());

        Map<String, String> firstRow = results.iterator().next();
        String firstColumn = firstRow.keySet().iterator().next();
        long limit = Long.parseLong(firstRow.get(firstColumn));

        upperSampleLimitCache.put(endpoint, triplePattern, limit);
        log.trace(String.format("successful upper limit query: %s", countStarQuery.serialize()));
        log.trace(String.format("upper limit: %d", limit));
        return limit;
      } catch (IOException e) {
        log.warn(
            "failed to COUNT number of triples matching "
                + triplePattern
                + " in "
                + uri
                + ", trying for a lower bound instead.");
      }
    }

    Query selectStarQuery = new Query();
    selectStarQuery.setQuerySelectType();
    selectStarQuery.setQueryPattern(whereClause);
    selectStarQuery.setQueryResultStar(true);

    long limit = endpoint.getResultsCountLowerBound(selectStarQuery.serialize(), 50000);
    upperSampleLimitCache.put(endpoint, triplePattern, limit);

    log.trace(String.format("successful upper limit query: %s", selectStarQuery.serialize()));
    log.trace(String.format("upper limit: %d", limit));

    return limit;
  }