/** * Converts a map of results to a String JSON representation for it * * @param map a map that matches properties with an ArrayList of values * @return the JSON representation for the map, as a String */ private String mapToString(Map<String, ArrayList<String>> map) { StringBuilder result = new StringBuilder("{"); for (Map.Entry<String, ArrayList<String>> entry : map.entrySet()) { ArrayList<String> value = entry.getValue(); if (value.size() == 1) result.append(String.format("\"%s\" : %s,\n", entry.getKey(), value.get(0))); else result.append(String.format("\"%s\" : %s,\n", entry.getKey(), value.toString())); } result.setCharAt(result.length() - 2, '}'); return result.toString(); }
/** * Starts a harvester with predefined queries to synchronize with the changes from the SPARQL * endpoint */ public boolean sync() { logger.info("Sync resources newer than {}", startTime); String rdfQueryTemplate = "PREFIX xsd:<http://www.w3.org/2001/XMLSchema#> " + "SELECT DISTINCT ?resource WHERE { " + " GRAPH ?graph { %s }" + " ?graph <%s> ?time . %s " + " FILTER (?time > xsd:dateTime(\"%s\")) }"; String queryStr = String.format( rdfQueryTemplate, syncConditions, syncTimeProp, graphSyncConditions, startTime); Set<String> syncUris = executeSyncQuery(queryStr, "resource"); if (syncUris == null) { logger.error("Errors occurred during sync procedure. Aborting!"); return false; } /** * If desired, query for old data that has the sync conditions modified * * <p>This option is useful in the case in which the application indexes resources that match * some conditions. In this case, if they are modified and no longer match the initial * conditions, they will not be synchronized. When syncOldData is True, the modified resources * that no longer match the conditions are deleted. */ int deleted = 0; int count = 0; if (this.syncOldData) { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); queryStr = String.format( rdfQueryTemplate, syncConditions, syncTimeProp, graphSyncConditions, sdf.format(new Date(0))); HashSet<String> allIndexURIs = executeSyncQuery(queryStr, "resource"); if (allIndexURIs == null) { logger.error("Errors occurred during modified content sync query. Aborting!"); return false; } deleted = removeMissingUris(allIndexURIs); } /* Prepare a series of bulk uris to be described so we can make * a smaller number of calls to the SPARQL endpoint. */ ArrayList<ArrayList<String>> bulks = new ArrayList<ArrayList<String>>(); ArrayList<String> currentBulk = new ArrayList<String>(); for (String uri : syncUris) { currentBulk.add(uri); if (currentBulk.size() == EEASettings.DEFAULT_BULK_SIZE) { bulks.add(currentBulk); currentBulk = new ArrayList<String>(); } } if (currentBulk.size() > 0) { bulks.add(currentBulk); } /* Execute RDF queries for the resources in each bulk */ for (ArrayList<String> bulk : bulks) { String syncQuery = getSyncQueryStr(bulk); try { Query query = QueryFactory.create(syncQuery); QueryExecution qExec = QueryExecutionFactory.sparqlService(rdfEndpoint, query); try { Model constructModel = ModelFactory.createDefaultModel(); qExec.execConstruct(constructModel); BulkRequestBuilder bulkRequest = client.prepareBulk(); /** * When adding the model to ES do not use toDescribeURIs as the query already returned the * correct labels. */ addModelToES(constructModel, bulkRequest, false); count += bulk.size(); } catch (Exception e) { logger.error("Error while querying for modified content. {}", e.getLocalizedMessage()); return false; } finally { qExec.close(); } } catch (QueryParseException qpe) { logger.warn( "Could not parse Sync query. Please provide a relevant query. {}", qpe.getLocalizedMessage()); return false; } } logger.info( "Finished synchronisation: Deleted {}, Updated {}/{}", deleted, count, syncUris.size()); return true; }