protected Triple getSampleFromEndpoint( SPARQLEndpoint endpoint, Triple triplePattern, long sampleIndex) throws SADIException, IOException, NoSampleAvailableException { log.trace( "retrieving triple #" + sampleIndex + " for " + triplePattern + " from " + endpoint.getURI()); Query query = getConstructQuery( triplePattern); // SPARQLStringUtils.getConstructQuery(Collections.singletonList(triplePattern), Collections.singletonList(triplePattern)); query.setOffset(sampleIndex); query.setLimit(1); log.trace(String.format("sample query: %s", query.serialize())); Collection<Triple> triples = endpoint.constructQuery(query.serialize()); if (triples.size() == 0) { throw new RuntimeException("triple #" + sampleIndex + " doesn't exists in " + endpoint); } Triple triple = triples.iterator().next(); // Sanity check. If the index is incomplete or out of date, the sample triple may not satisfy // the predicate list / regular expressions for the endpoint. The simplest thing to do in // this case is to fail and take another sample. if (getRegistry() != null && !getRegistry().findEndpointsByTriplePattern(triple).contains(endpoint)) { throw new NoSampleAvailableException( "sample triple does not match the regular expressions for " + endpoint.getURI() + " (from which it was sampled!)"); } return triple; }
public Triple getSampleFromAnyEndpoint(Triple triplePattern) throws SADIException, IOException, NoSampleAvailableException, ExceededMaxAttemptsException { List<SPARQLEndpoint> endpoints = new ArrayList<SPARQLEndpoint>(getRegistry().findEndpointsByTriplePattern(triplePattern)); RandomDataImpl generator = new RandomDataImpl(); int attempts = 0; SPARQLEndpoint endpoint = null; while (attempts < MAX_ATTEMPTS) { if (endpoints.size() == 0) throw new NoSampleAvailableException( "there are no triples matching " + triplePattern + " in the data (without blank nodes)"); int endpointIndex = endpoints.size() > 1 ? generator.nextInt(0, endpoints.size() - 1) : 0; endpoint = endpoints.get(endpointIndex); if (getRegistry().getServiceStatus(endpoint.getURI()) == ServiceStatus.DEAD) { endpoints.remove(endpointIndex); continue; } try { return getSampleFromEndpoint(endpoint, triplePattern); } catch (NoSampleAvailableException e) { log.warn(String.format("failed to retrieve sample from %s", endpoint), e); } catch (IOException e) { log.warn(String.format("failed to retrieve sample from %s", endpoint), e); } endpoints.remove(endpointIndex); attempts++; } throw new ExceededMaxAttemptsException( "exceeded " + MAX_ATTEMPTS + " attempts when trying to retrieve triples matching " + triplePattern); }
protected long getUpperSampleLimit(SPARQLEndpoint endpoint, Triple triplePattern) throws SADIException, IOException { String uri = endpoint.getURI(); log.trace("determining number of triples matching " + triplePattern + " in " + uri); ServiceStatus status = ServiceStatus.OK; if (getRegistry() != null) { status = getRegistry().getServiceStatus(endpoint.getURI()); } // check for a cached value first if (upperSampleLimitCache.contains(endpoint, triplePattern)) { log.trace("using previously cached value for upper sample limit"); return upperSampleLimitCache.get(endpoint, triplePattern); } ElementGroup whereClause = getWhereClauseWithBlankNodeFilter(triplePattern); /* Node s = triplePattern.getSubject(); Node o = triplePattern.getObject(); // Build a Jena representation of the WHERE clause ElementGroup whereClause = new ElementGroup(); whereClause.addTriplePattern(triplePattern); if(s.isVariable()) { whereClause.addElementFilter(new ElementFilter(new E_LogicalNot(new E_IsBlank(new ExprVar(s))))); } if(o.isVariable()) { whereClause.addElementFilter(new ElementFilter(new E_LogicalNot(new E_IsBlank(new ExprVar(o))))); } */ if (status != ServiceStatus.SLOW) { try { // issue a SELECT COUNT(*) query Query countStarQuery = new Query(); countStarQuery.setQuerySelectType(); countStarQuery.setQueryPattern(whereClause); countStarQuery.addResultVar(countStarQuery.allocAggregate(AggCount.get())); List<Map<String, String>> results = endpoint.selectQuery(countStarQuery.serialize()); Map<String, String> firstRow = results.iterator().next(); String firstColumn = firstRow.keySet().iterator().next(); long limit = Long.parseLong(firstRow.get(firstColumn)); upperSampleLimitCache.put(endpoint, triplePattern, limit); log.trace(String.format("successful upper limit query: %s", countStarQuery.serialize())); log.trace(String.format("upper limit: %d", limit)); return limit; } catch (IOException e) { log.warn( "failed to COUNT number of triples matching " + triplePattern + " in " + uri + ", trying for a lower bound instead."); } } Query selectStarQuery = new Query(); selectStarQuery.setQuerySelectType(); selectStarQuery.setQueryPattern(whereClause); selectStarQuery.setQueryResultStar(true); long limit = endpoint.getResultsCountLowerBound(selectStarQuery.serialize(), 50000); upperSampleLimitCache.put(endpoint, triplePattern, limit); log.trace(String.format("successful upper limit query: %s", selectStarQuery.serialize())); log.trace(String.format("upper limit: %d", limit)); return limit; }