private static List<Resource> equivalentTypes(Resource type) { List<Resource> types = new ArrayList<Resource>(); types.add(type); for (StmtIterator it = type.getModel().listStatements(ANY, OWL.equivalentClass, type); it.hasNext(); ) types.add(it.nextStatement().getSubject()); return types; }
private static void addTypeToAll(Resource type, Set<Resource> candidates) { List<Resource> types = equivalentTypes(type); for (Resource element : candidates) { Resource resource = element; for (int i = 0; i < types.size(); i += 1) resource.addProperty(RDF.type, types.get(i)); } }
private static void addIntersections(Model result, Model schema, Statement s) { Resource type = s.getSubject(); List<RDFNode> types = asJavaList(AssemblerHelp.getResource(s)); Set<Resource> candidates = subjectSet(result, ANY, RDF.type, types.get(0)); for (int i = 1; i < types.size(); i += 1) removeElementsWithoutType(candidates, (Resource) types.get(i)); addTypeToAll(type, candidates); }
/** * Index all the resources in a Jena Model to ES * * @param model the model to index * @param bulkRequest a BulkRequestBuilder * @param getPropLabel if set to true all URI property values will be indexed as their label. The * label is taken as the value of one of the properties set in {@link #uriDescriptionList}. */ private void addModelToES(Model model, BulkRequestBuilder bulkRequest, boolean getPropLabel) { long startTime = System.currentTimeMillis(); long bulkLength = 0; HashSet<Property> properties = new HashSet<Property>(); StmtIterator it = model.listStatements(); while (it.hasNext()) { Statement st = it.nextStatement(); Property prop = st.getPredicate(); String property = prop.toString(); if (rdfPropList.isEmpty() || (isWhitePropList && rdfPropList.contains(property)) || (!isWhitePropList && !rdfPropList.contains(property)) || (normalizeProp.containsKey(property))) { properties.add(prop); } } ResIterator resIt = model.listSubjects(); while (resIt.hasNext()) { Resource rs = resIt.nextResource(); Map<String, ArrayList<String>> jsonMap = getJsonMap(rs, properties, model, getPropLabel); bulkRequest.add( client.prepareIndex(indexName, typeName, rs.toString()).setSource(mapToString(jsonMap))); bulkLength++; // We want to execute the bulk for every DEFAULT_BULK_SIZE requests if (bulkLength % EEASettings.DEFAULT_BULK_SIZE == 0) { BulkResponse bulkResponse = bulkRequest.execute().actionGet(); // After executing, flush the BulkRequestBuilder. bulkRequest = client.prepareBulk(); if (bulkResponse.hasFailures()) { processBulkResponseFailure(bulkResponse); } } } // Execute remaining requests if (bulkRequest.numberOfActions() > 0) { BulkResponse response = bulkRequest.execute().actionGet(); // Handle failure by iterating through each bulk response item if (response.hasFailures()) { processBulkResponseFailure(response); } } // Show time taken to index the documents logger.info( "Indexed {} documents on {}/{} in {} seconds", bulkLength, indexName, typeName, (System.currentTimeMillis() - startTime) / 1000.0); }
/** * Answer a list of the named hierarchy roots of a given {@link OntModel}. This will be similar to * the results of {@link OntModel#listHierarchyRootClasses()}, with the added constraint that * every member of the returned iterator will be a named class, not an anonymous class expression. * The named root classes are calculated from the root classes, by recursively replacing every * anonymous class with its direct sub-classes. Thus it can be seen that the values in the list * consists of the shallowest fringe of named classes in the hierarchy. * * @param m An ontology model * @return A list of classes whose members are the named root classes of the class hierarchy in * <code>m</code> */ public static List<OntClass> namedHierarchyRoots(OntModel m) { List<OntClass> nhr = new ArrayList<OntClass>(); // named roots List<OntClass> ahr = new ArrayList<OntClass>(); // anon roots // do the initial partition of the root classes partitionByNamed(m.listHierarchyRootClasses(), nhr, ahr); // now push the fringe down until we have only named classes while (!ahr.isEmpty()) { OntClass c = ahr.remove(0); partitionByNamed(c.listSubClasses(true), nhr, ahr); } return nhr; }
public Collection<URI> getSupportedFacets(URI needUri) throws NoSuchNeedException { List<URI> ret = new LinkedList<URI>(); Need need = DataAccessUtils.loadNeed(needRepository, needUri); Model content = rdfStorageService.loadContent(need); if (content == null) return ret; Resource baseRes = content.getResource(content.getNsPrefixURI("")); StmtIterator stmtIterator = baseRes.listProperties(WON.HAS_FACET); while (stmtIterator.hasNext()) { RDFNode object = stmtIterator.nextStatement().getObject(); if (object.isURIResource()) { ret.add(URI.create(object.toString())); } } return ret; }
/** * Partition the members of an iterator into two lists, according to whether they are named or * anonymous classes * * @param i An iterator to partition * @param named A list of named classes * @param anon A list of anonymous classes */ protected static void partitionByNamed( Iterator<? extends OntClass> i, List<OntClass> named, List<OntClass> anon) { while (i.hasNext()) { OntClass c = i.next(); boolean ignore = false; // duplicate check: we ignore this class if we've already got it if (named.contains(c)) { ignore = true; } // subsumption check: c must have only anon classes or Thing // as super-classes to still qualify as a root class Resource thing = c.getProfile().THING(); for (Iterator<OntClass> j = c.listSuperClasses(); !ignore && j.hasNext(); ) { OntClass sup = j.next(); if (!((thing != null && sup.equals(thing)) || sup.isAnon() || sup.equals(c))) { ignore = true; } } if (!ignore) { // place the class in the appropriate partition (c.isAnon() ? anon : named).add(c); } } }
private static ResultSetRewindable unique(ResultSetRewindable results) { // VERY crude. Utilises the fact that bindings have value equality. List<Binding> x = new ArrayList<Binding>(); Set<Binding> seen = new HashSet<Binding>(); for (; results.hasNext(); ) { Binding b = results.nextBinding(); if (seen.contains(b)) continue; seen.add(b); x.add(b); } QueryIterator qIter = new QueryIterPlainWrapper(x.iterator()); ResultSet rs = new ResultSetStream(results.getResultVars(), ModelFactory.createDefaultModel(), qIter); return ResultSetFactory.makeRewindable(rs); }
/** Starts the harvester for queries and/or URLs */ public boolean runIndexAll() { logger.info( "Starting RDF harvester: endpoint [{}], queries [{}]," + "URIs [{}], index name [{}], typeName [{}]", rdfEndpoint, rdfQueries, rdfUris, indexName, typeName); while (true) { if (this.closed) { logger.info( "Ended harvest for endpoint [{}], queries [{}]," + "URIs [{}], index name {}, type name {}", rdfEndpoint, rdfQueries, rdfUris, indexName, typeName); return true; } /** Harvest from a SPARQL endpoint */ if (!rdfQueries.isEmpty()) { harvestFromEndpoint(); } /** Harvest from RDF dumps */ harvestFromDumps(); closed = true; } }
/** * Answer the shortest path from the <code>start</code> resource to the <code>end</code> RDF node, * such that every step on the path is accepted by the given filter. A path is a {@link List} of * RDF {@link Statement}s. The subject of the first statement in the list is <code>start</code>, * and the object of the last statement in the list is <code>end</code>. * * <p>The <code>onPath</code> argument is a {@link Filter}, which accepts a statement and returns * true if the statement should be considered to be on the path. To search for an unconstrained * path, pass {@link Filter#any} as an argument. To search for a path whose predicates match a * fixed restricted set of property names, pass an instance of {@link PredicatesFilter}. * * <p>If there is more than one path of minimal length from <code>start</code> to <code>end</code> * , this method returns an arbitrary one. The algorithm is blind breadth-first search, with loop * detection. * * @param m The model in which we are seeking a path * @param start The starting resource * @param end The end, or goal, node * @param onPath A filter which determines whether a given statement can be considered part of the * path * @return A path, consisting of a list of statements whose first subject is <code>start</code>, * and whose last object is <code>end</code>, or null if no such path exists. */ public static Path findShortestPath( Model m, Resource start, RDFNode end, Filter<Statement> onPath) { List<Path> bfs = new LinkedList<Path>(); Set<Resource> seen = new HashSet<Resource>(); // initialise the paths for (Iterator<Statement> i = m.listStatements(start, null, (RDFNode) null).filterKeep(onPath); i.hasNext(); ) { bfs.add(new Path().append(i.next())); } // search Path solution = null; while (solution == null && !bfs.isEmpty()) { Path candidate = bfs.remove(0); if (candidate.hasTerminus(end)) { solution = candidate; } else { Resource terminus = candidate.getTerminalResource(); if (terminus != null) { seen.add(terminus); // breadth-first expansion for (Iterator<Statement> i = terminus.listProperties().filterKeep(onPath); i.hasNext(); ) { Statement link = i.next(); // no looping allowed, so we skip this link if it takes us to a node we've seen if (!seen.contains(link.getObject())) { bfs.add(candidate.append(link)); } } } } } return solution; }
private ResultSetRewindable convertToStrings(ResultSetRewindable resultsActual) { List<Binding> bindings = new ArrayList<Binding>(); while (resultsActual.hasNext()) { Binding b = resultsActual.nextBinding(); BindingMap b2 = BindingFactory.create(); for (String vn : resultsActual.getResultVars()) { Var v = Var.alloc(vn); Node n = b.get(v); String s; if (n == null) s = ""; else if (n.isBlank()) s = "_:" + n.getBlankNodeLabel(); else s = NodeFunctions.str(n); b2.add(v, NodeFactory.createLiteral(s)); } bindings.add(b2); } ResultSet rs = new ResultSetStream( resultsActual.getResultVars(), null, new QueryIterPlainWrapper(bindings.iterator())); return ResultSetFactory.makeRewindable(rs); }
/** * Return all values for the given options as Strings, either locally or from the parent options * object. */ @Override protected List<String> getAllValues(OPT option) { List<String> l = super.getAllValues(option); return (l.isEmpty() && hasParent()) ? getParent().getAllValues(option) : l; }
public void testOmits(Model m, List<Statement> statements) { for (int i = 0; i < statements.size(); i += 1) assertFalse("it should not be here", m.contains(statements.get(i))); }
/** * Build a query returning all triples in which members of uris are the subjects of the triplets. * * <p>If toDescribeURIs is true the query will automatically add logic to retrieve the labels * directly from the SPARQL endpoint. * * @param uris URIs for queried resources * @return a CONSTRUCT query string */ private String getSyncQueryStr(Iterable<String> uris) { StringBuilder uriSetStrBuilder = new StringBuilder(); String delimiter = ""; uriSetStrBuilder.append("("); for (String uri : uris) { uriSetStrBuilder.append(delimiter).append(String.format("<%s>", uri)); delimiter = ", "; } uriSetStrBuilder.append(")"); String uriSet = uriSetStrBuilder.toString(); /* Get base triplets having any element from uris as subject */ StringBuilder queryBuilder = new StringBuilder(); queryBuilder .append("CONSTRUCT { ?s ?p ?o } WHERE {") .append("{?s ?p ?o") .append(String.format(" . FILTER (?s in %s )", uriSet)); /* Perform uri label resolution only if desired */ if (uriDescriptionList.isEmpty()) { queryBuilder.append("}}"); return queryBuilder.toString(); } /* Filter out properties having a label */ int index = 0; for (String prop : uriDescriptionList) { index++; String filterTemplate = " . OPTIONAL { ?o <%s> ?o%d } " + " . FILTER(!BOUND(?o%d))"; queryBuilder.append(String.format(filterTemplate, prop, index, index)); } queryBuilder.append("}"); /* We need this redundant clause as UNION queries can't handle sub-selects * without a prior clause. */ String redundantClause = "<http://www.w3.org/2000/01/rdf-schema#Class> " + "a <http://www.w3.org/2000/01/rdf-schema#Class>"; /* Add labels for filtered out properties */ for (String prop : uriDescriptionList) { /* Resolve ?o as str(?label) for the resource ?res * label is taken as being ?res <prop> ?label * * We need to take str(?label) in order to drop * language references of the terms so that the document * is indexed with a language present only in it's top-level * properties. * * As some Virtuoso versions do not allow the usage * of BIND so we have to create a sub-select in order to bind * ?o to str(?label) * * The sub-select works only with a prior clause. * We are using a redundant clause that is always true */ String partQueryTemplate = " UNION " + "{ " + redundantClause + " . " + "{ SELECT ?s ?p (str(?label) as ?o) { " + " ?s ?p ?res" + " . FILTER (?s in %s)" + " . ?res <%s> ?label }}}"; queryBuilder.append(String.format(partQueryTemplate, uriSet, prop)); } queryBuilder.append("}"); return queryBuilder.toString(); }
/** * Sets the {@link Harvester}'s {@link #rdfPropList} parameter * * @param list - a list of properties names that are either required in the object description, or * undesired, depending on its {@link #isWhitePropList} * @return the same {@link Harvester} with the {@link #rdfPropList} parameter set */ public Harvester rdfPropList(List<String> list) { if (!list.isEmpty()) { rdfPropList = new ArrayList<String>(list); } return this; }