/** * Index all the resources in a Jena Model to ES * * @param model the model to index * @param bulkRequest a BulkRequestBuilder * @param getPropLabel if set to true all URI property values will be indexed as their label. The * label is taken as the value of one of the properties set in {@link #uriDescriptionList}. */ private void addModelToES(Model model, BulkRequestBuilder bulkRequest, boolean getPropLabel) { long startTime = System.currentTimeMillis(); long bulkLength = 0; HashSet<Property> properties = new HashSet<Property>(); StmtIterator it = model.listStatements(); while (it.hasNext()) { Statement st = it.nextStatement(); Property prop = st.getPredicate(); String property = prop.toString(); if (rdfPropList.isEmpty() || (isWhitePropList && rdfPropList.contains(property)) || (!isWhitePropList && !rdfPropList.contains(property)) || (normalizeProp.containsKey(property))) { properties.add(prop); } } ResIterator resIt = model.listSubjects(); while (resIt.hasNext()) { Resource rs = resIt.nextResource(); Map<String, ArrayList<String>> jsonMap = getJsonMap(rs, properties, model, getPropLabel); bulkRequest.add( client.prepareIndex(indexName, typeName, rs.toString()).setSource(mapToString(jsonMap))); bulkLength++; // We want to execute the bulk for every DEFAULT_BULK_SIZE requests if (bulkLength % EEASettings.DEFAULT_BULK_SIZE == 0) { BulkResponse bulkResponse = bulkRequest.execute().actionGet(); // After executing, flush the BulkRequestBuilder. bulkRequest = client.prepareBulk(); if (bulkResponse.hasFailures()) { processBulkResponseFailure(bulkResponse); } } } // Execute remaining requests if (bulkRequest.numberOfActions() > 0) { BulkResponse response = bulkRequest.execute().actionGet(); // Handle failure by iterating through each bulk response item if (response.hasFailures()) { processBulkResponseFailure(response); } } // Show time taken to index the documents logger.info( "Indexed {} documents on {}/{} in {} seconds", bulkLength, indexName, typeName, (System.currentTimeMillis() - startTime) / 1000.0); }
/** * Partition the members of an iterator into two lists, according to whether they are named or * anonymous classes * * @param i An iterator to partition * @param named A list of named classes * @param anon A list of anonymous classes */ protected static void partitionByNamed( Iterator<? extends OntClass> i, List<OntClass> named, List<OntClass> anon) { while (i.hasNext()) { OntClass c = i.next(); boolean ignore = false; // duplicate check: we ignore this class if we've already got it if (named.contains(c)) { ignore = true; } // subsumption check: c must have only anon classes or Thing // as super-classes to still qualify as a root class Resource thing = c.getProfile().THING(); for (Iterator<OntClass> j = c.listSuperClasses(); !ignore && j.hasNext(); ) { OntClass sup = j.next(); if (!((thing != null && sup.equals(thing)) || sup.isAnon() || sup.equals(c))) { ignore = true; } } if (!ignore) { // place the class in the appropriate partition (c.isAnon() ? anon : named).add(c); } } }