/** * Get a set of unique queryObjName returned from a select query * * <p>Used to retrieve sets of modified objects used in sync * * @param rdfQuery query to execute * @param queryObjName name of the object returned * @return set of values for queryObjectName in the rdfQuery result */ HashSet<String> executeSyncQuery(String rdfQuery, String queryObjName) { HashSet<String> rdfUrls = new HashSet<String>(); Query query; try { query = QueryFactory.create(rdfQuery); } catch (QueryParseException qpe) { logger.warn( "Could not parse [{}]. Please provide a relevant query. {}", rdfQuery, qpe.getLocalizedMessage()); return null; } QueryExecution qExec = QueryExecutionFactory.sparqlService(rdfEndpoint, query); try { ResultSet results = qExec.execSelect(); while (results.hasNext()) { QuerySolution sol = results.nextSolution(); try { String value = sol.getResource(queryObjName).toString(); rdfUrls.add(value); } catch (NoSuchElementException e) { logger.error("Encountered a NoSuchElementException: " + e.getLocalizedMessage()); return null; } } } catch (Exception e) { logger.error( "Encountered a [{}] while querying the endpoint for sync", e.getLocalizedMessage()); return null; } finally { qExec.close(); } return rdfUrls; }
/** * Starts a harvester with predefined queries to synchronize with the changes from the SPARQL * endpoint */ public boolean sync() { logger.info("Sync resources newer than {}", startTime); String rdfQueryTemplate = "PREFIX xsd:<http://www.w3.org/2001/XMLSchema#> " + "SELECT DISTINCT ?resource WHERE { " + " GRAPH ?graph { %s }" + " ?graph <%s> ?time . %s " + " FILTER (?time > xsd:dateTime(\"%s\")) }"; String queryStr = String.format( rdfQueryTemplate, syncConditions, syncTimeProp, graphSyncConditions, startTime); Set<String> syncUris = executeSyncQuery(queryStr, "resource"); if (syncUris == null) { logger.error("Errors occurred during sync procedure. Aborting!"); return false; } /** * If desired, query for old data that has the sync conditions modified * * <p>This option is useful in the case in which the application indexes resources that match * some conditions. In this case, if they are modified and no longer match the initial * conditions, they will not be synchronized. When syncOldData is True, the modified resources * that no longer match the conditions are deleted. */ int deleted = 0; int count = 0; if (this.syncOldData) { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); queryStr = String.format( rdfQueryTemplate, syncConditions, syncTimeProp, graphSyncConditions, sdf.format(new Date(0))); HashSet<String> allIndexURIs = executeSyncQuery(queryStr, "resource"); if (allIndexURIs == null) { logger.error("Errors occurred during modified content sync query. Aborting!"); return false; } deleted = removeMissingUris(allIndexURIs); } /* Prepare a series of bulk uris to be described so we can make * a smaller number of calls to the SPARQL endpoint. */ ArrayList<ArrayList<String>> bulks = new ArrayList<ArrayList<String>>(); ArrayList<String> currentBulk = new ArrayList<String>(); for (String uri : syncUris) { currentBulk.add(uri); if (currentBulk.size() == EEASettings.DEFAULT_BULK_SIZE) { bulks.add(currentBulk); currentBulk = new ArrayList<String>(); } } if (currentBulk.size() > 0) { bulks.add(currentBulk); } /* Execute RDF queries for the resources in each bulk */ for (ArrayList<String> bulk : bulks) { String syncQuery = getSyncQueryStr(bulk); try { Query query = QueryFactory.create(syncQuery); QueryExecution qExec = QueryExecutionFactory.sparqlService(rdfEndpoint, query); try { Model constructModel = ModelFactory.createDefaultModel(); qExec.execConstruct(constructModel); BulkRequestBuilder bulkRequest = client.prepareBulk(); /** * When adding the model to ES do not use toDescribeURIs as the query already returned the * correct labels. */ addModelToES(constructModel, bulkRequest, false); count += bulk.size(); } catch (Exception e) { logger.error("Error while querying for modified content. {}", e.getLocalizedMessage()); return false; } finally { qExec.close(); } } catch (QueryParseException qpe) { logger.warn( "Could not parse Sync query. Please provide a relevant query. {}", qpe.getLocalizedMessage()); return false; } } logger.info( "Finished synchronisation: Deleted {}, Updated {}/{}", deleted, count, syncUris.size()); return true; }