/** * Index all the resources in a Jena Model to ES * * @param model the model to index * @param bulkRequest a BulkRequestBuilder * @param getPropLabel if set to true all URI property values will be indexed as their label. The * label is taken as the value of one of the properties set in {@link #uriDescriptionList}. */ private void addModelToES(Model model, BulkRequestBuilder bulkRequest, boolean getPropLabel) { long startTime = System.currentTimeMillis(); long bulkLength = 0; HashSet<Property> properties = new HashSet<Property>(); StmtIterator it = model.listStatements(); while (it.hasNext()) { Statement st = it.nextStatement(); Property prop = st.getPredicate(); String property = prop.toString(); if (rdfPropList.isEmpty() || (isWhitePropList && rdfPropList.contains(property)) || (!isWhitePropList && !rdfPropList.contains(property)) || (normalizeProp.containsKey(property))) { properties.add(prop); } } ResIterator resIt = model.listSubjects(); while (resIt.hasNext()) { Resource rs = resIt.nextResource(); Map<String, ArrayList<String>> jsonMap = getJsonMap(rs, properties, model, getPropLabel); bulkRequest.add( client.prepareIndex(indexName, typeName, rs.toString()).setSource(mapToString(jsonMap))); bulkLength++; // We want to execute the bulk for every DEFAULT_BULK_SIZE requests if (bulkLength % EEASettings.DEFAULT_BULK_SIZE == 0) { BulkResponse bulkResponse = bulkRequest.execute().actionGet(); // After executing, flush the BulkRequestBuilder. bulkRequest = client.prepareBulk(); if (bulkResponse.hasFailures()) { processBulkResponseFailure(bulkResponse); } } } // Execute remaining requests if (bulkRequest.numberOfActions() > 0) { BulkResponse response = bulkRequest.execute().actionGet(); // Handle failure by iterating through each bulk response item if (response.hasFailures()) { processBulkResponseFailure(response); } } // Show time taken to index the documents logger.info( "Indexed {} documents on {}/{} in {} seconds", bulkLength, indexName, typeName, (System.currentTimeMillis() - startTime) / 1000.0); }
/** * Get a set of unique queryObjName returned from a select query * * <p>Used to retrieve sets of modified objects used in sync * * @param rdfQuery query to execute * @param queryObjName name of the object returned * @return set of values for queryObjectName in the rdfQuery result */ HashSet<String> executeSyncQuery(String rdfQuery, String queryObjName) { HashSet<String> rdfUrls = new HashSet<String>(); Query query; try { query = QueryFactory.create(rdfQuery); } catch (QueryParseException qpe) { logger.warn( "Could not parse [{}]. Please provide a relevant query. {}", rdfQuery, qpe.getLocalizedMessage()); return null; } QueryExecution qExec = QueryExecutionFactory.sparqlService(rdfEndpoint, query); try { ResultSet results = qExec.execSelect(); while (results.hasNext()) { QuerySolution sol = results.nextSolution(); try { String value = sol.getResource(queryObjName).toString(); rdfUrls.add(value); } catch (NoSuchElementException e) { logger.error("Encountered a NoSuchElementException: " + e.getLocalizedMessage()); return null; } } } catch (Exception e) { logger.error( "Encountered a [{}] while querying the endpoint for sync", e.getLocalizedMessage()); return null; } finally { qExec.close(); } return rdfUrls; }