/* *get collaboration information based on organisation - returns a collaborationList. */ public HashMap<Integer, CollaborationList> getCollaborations_org( String id, TreeMap<Integer, Collaborator> network) { HashMap<Integer, CollaborationList> collaborations = new java.util.HashMap<Integer, CollaborationList>(); // reset the iterator Set<Integer> networkKeys = network.keySet(); Iterator<Integer> networkKeyIterator = networkKeys.iterator(); Integer networkKey = null; CollaborationList collaborationList; // loop through the list of keys while (networkKeyIterator.hasNext()) { // get the key networkKey = (Integer) networkKeyIterator.next(); // get the list of collaborations for this user collaborationList = getCollaborationList_org(id, networkKey.toString()); // add the collaborationList object to the list collaborations.put(networkKey, collaborationList); } return collaborations; }
private static ResultSetRewindable unique(ResultSetRewindable results) { // VERY crude. Utilises the fact that bindings have value equality. List<Binding> x = new ArrayList<Binding>(); Set<Binding> seen = new HashSet<Binding>(); for (; results.hasNext(); ) { Binding b = results.nextBinding(); if (seen.contains(b)) continue; seen.add(b); x.add(b); } QueryIterator qIter = new QueryIterPlainWrapper(x.iterator()); ResultSet rs = new ResultSetStream(results.getResultVars(), ModelFactory.createDefaultModel(), qIter); return ResultSetFactory.makeRewindable(rs); }
/** * Remove the documents from ElasticSearch that are not present in uris * * @param uris uris that should be present in the index. * @return true if the action completed, false if it failed during the process. */ private int removeMissingUris(Set<String> uris) { int searchKeepAlive = 60000; int count = 0; SearchResponse response = client .prepareSearch() .setIndices(indexName) .setTypes(typeName) .setScroll(new TimeValue(searchKeepAlive)) .setQuery(QueryBuilders.matchAllQuery()) .execute() .actionGet(); while (response.getHits().getHits().length > 0) { for (SearchHit hit : response.getHits()) { if (uris.contains(hit.getId())) continue; DeleteResponse deleteResponse = client.prepareDelete(indexName, typeName, hit.getId()).execute().actionGet(); if (deleteResponse.isFound()) count++; } response = client .prepareSearchScroll(response.getScrollId()) .setScroll(new TimeValue(searchKeepAlive)) .execute() .actionGet(); } return count; }
/** * Get JSON map for a given resource by applying the river settings * * @param rs resource being processed * @param properties properties to be indexed * @param model model returned by the indexing query * @param getPropLabel if set to true all URI property values will be indexed as their label. The * label is taken as the value of one of the properties set in {@link #uriDescriptionList}. * @return map of properties to be indexed for res */ private Map<String, ArrayList<String>> getJsonMap( Resource rs, Set<Property> properties, Model model, boolean getPropLabel) { Map<String, ArrayList<String>> jsonMap = new HashMap<String, ArrayList<String>>(); ArrayList<String> results = new ArrayList<String>(); if (addUriForResource) { results.add("\"" + rs.toString() + "\""); jsonMap.put("http://www.w3.org/1999/02/22-rdf-syntax-ns#about", results); } Set<String> rdfLanguages = new HashSet<String>(); for (Property prop : properties) { NodeIterator niter = model.listObjectsOfProperty(rs, prop); String property = prop.toString(); results = new ArrayList<String>(); String lang; String currValue; while (niter.hasNext()) { RDFNode node = niter.next(); currValue = getStringForResult(node, getPropLabel); if (addLanguage) { if (node.isLiteral()) { lang = node.asLiteral().getLanguage(); if (!lang.isEmpty()) { rdfLanguages.add("\"" + lang + "\""); } } } String shortValue = currValue; int currLen = currValue.length(); // Unquote string if (currLen > 1) shortValue = currValue.substring(1, currLen - 1); // If either whiteMap does contains shortValue // or blackMap contains the value // skip adding it to the index boolean whiteMapCond = whiteMap.containsKey(property) && !whiteMap.get(property).contains(shortValue); boolean blackMapCond = blackMap.containsKey(property) && blackMap.get(property).contains(shortValue); if (whiteMapCond || blackMapCond) { continue; } if (normalizeObj.containsKey(shortValue)) { results.add("\"" + normalizeObj.get(shortValue) + "\""); } else { results.add(currValue); } } // Do not index empty properties if (results.isEmpty()) continue; if (normalizeProp.containsKey(property)) { property = normalizeProp.get(property); if (jsonMap.containsKey(property)) { jsonMap.get(property).addAll(results); } else { jsonMap.put(property, results); } } else { jsonMap.put(property, results); } } if (addLanguage) { if (rdfLanguages.isEmpty() && !language.isEmpty()) rdfLanguages.add(language); if (!rdfLanguages.isEmpty()) jsonMap.put("language", new ArrayList<String>(rdfLanguages)); } for (Map.Entry<String, String> it : normalizeMissing.entrySet()) { if (!jsonMap.containsKey(it.getKey())) { ArrayList<String> res = new ArrayList<String>(); res.add("\"" + it.getValue() + "\""); jsonMap.put(it.getKey(), res); } } return jsonMap; }
/** * Starts a harvester with predefined queries to synchronize with the changes from the SPARQL * endpoint */ public boolean sync() { logger.info("Sync resources newer than {}", startTime); String rdfQueryTemplate = "PREFIX xsd:<http://www.w3.org/2001/XMLSchema#> " + "SELECT DISTINCT ?resource WHERE { " + " GRAPH ?graph { %s }" + " ?graph <%s> ?time . %s " + " FILTER (?time > xsd:dateTime(\"%s\")) }"; String queryStr = String.format( rdfQueryTemplate, syncConditions, syncTimeProp, graphSyncConditions, startTime); Set<String> syncUris = executeSyncQuery(queryStr, "resource"); if (syncUris == null) { logger.error("Errors occurred during sync procedure. Aborting!"); return false; } /** * If desired, query for old data that has the sync conditions modified * * <p>This option is useful in the case in which the application indexes resources that match * some conditions. In this case, if they are modified and no longer match the initial * conditions, they will not be synchronized. When syncOldData is True, the modified resources * that no longer match the conditions are deleted. */ int deleted = 0; int count = 0; if (this.syncOldData) { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); queryStr = String.format( rdfQueryTemplate, syncConditions, syncTimeProp, graphSyncConditions, sdf.format(new Date(0))); HashSet<String> allIndexURIs = executeSyncQuery(queryStr, "resource"); if (allIndexURIs == null) { logger.error("Errors occurred during modified content sync query. Aborting!"); return false; } deleted = removeMissingUris(allIndexURIs); } /* Prepare a series of bulk uris to be described so we can make * a smaller number of calls to the SPARQL endpoint. */ ArrayList<ArrayList<String>> bulks = new ArrayList<ArrayList<String>>(); ArrayList<String> currentBulk = new ArrayList<String>(); for (String uri : syncUris) { currentBulk.add(uri); if (currentBulk.size() == EEASettings.DEFAULT_BULK_SIZE) { bulks.add(currentBulk); currentBulk = new ArrayList<String>(); } } if (currentBulk.size() > 0) { bulks.add(currentBulk); } /* Execute RDF queries for the resources in each bulk */ for (ArrayList<String> bulk : bulks) { String syncQuery = getSyncQueryStr(bulk); try { Query query = QueryFactory.create(syncQuery); QueryExecution qExec = QueryExecutionFactory.sparqlService(rdfEndpoint, query); try { Model constructModel = ModelFactory.createDefaultModel(); qExec.execConstruct(constructModel); BulkRequestBuilder bulkRequest = client.prepareBulk(); /** * When adding the model to ES do not use toDescribeURIs as the query already returned the * correct labels. */ addModelToES(constructModel, bulkRequest, false); count += bulk.size(); } catch (Exception e) { logger.error("Error while querying for modified content. {}", e.getLocalizedMessage()); return false; } finally { qExec.close(); } } catch (QueryParseException qpe) { logger.warn( "Could not parse Sync query. Please provide a relevant query. {}", qpe.getLocalizedMessage()); return false; } } logger.info( "Finished synchronisation: Deleted {}, Updated {}/{}", deleted, count, syncUris.size()); return true; }