/** * Describe the vocabularies which are in use in the KB based on the predicate partition * statistics. * * @param predicateParitionCounts The predicate partition statistics. */ protected void describeVocabularies(final IVCount[] predicatePartitionCounts) { // Find the distinct vocabularies in use. final Set<String> namespaces = new LinkedHashSet<String>(); { // property partitions. for (IVCount tmp : predicatePartitionCounts) { final URI p = (URI) tmp.getValue(); String namespace = p.getNamespace(); if (namespace.endsWith("#")) { // Strip trailing '#' per VoID specification. namespace = namespace.substring(0, namespace.length() - 1); } namespaces.add(namespace); } } // Sort into dictionary order. final String[] a = namespaces.toArray(new String[namespaces.size()]); Arrays.sort(a); for (String namespace : a) { g.add(aDataset, VoidVocabularyDecl.vocabulary, f.createURI(namespace)); } }
/** * Describe a named or default graph. * * @param graph The named graph. * @param predicatePartitionCounts The predicate partition statistics for that graph. * @param classPartitionCounts The class partition statistics for that graph. */ protected void describeGraph( final Resource graph, final IVCount[] predicatePartitionCounts, final IVCount[] classPartitionCounts) { // The graph is a Graph. g.add(graph, RDF.TYPE, SD.Graph); // #of triples in the default graph g.add(graph, VoidVocabularyDecl.triples, f.createLiteral(tripleStore.getStatementCount())); // #of entities in the default graph. g.add(graph, VoidVocabularyDecl.entities, f.createLiteral(tripleStore.getURICount())); // #of distinct predicates in the default graph. g.add(graph, VoidVocabularyDecl.properties, f.createLiteral(predicatePartitionCounts.length)); // #of distinct classes in the default graph. g.add(graph, VoidVocabularyDecl.classes, f.createLiteral(classPartitionCounts.length)); // property partition statistics. for (IVCount tmp : predicatePartitionCounts) { final BNode propertyPartition = f.createBNode(); final URI p = (URI) tmp.getValue(); g.add(graph, VoidVocabularyDecl.propertyPartition, propertyPartition); g.add(propertyPartition, VoidVocabularyDecl.property, p); g.add(propertyPartition, VoidVocabularyDecl.triples, f.createLiteral(tmp.count)); } // class partition statistics. { // per class partition statistics. for (IVCount tmp : classPartitionCounts) { final BNode classPartition = f.createBNode(); final BigdataValue cls = tmp.getValue(); g.add(graph, VoidVocabularyDecl.classPartition, classPartition); g.add(classPartition, VoidVocabularyDecl.class_, cls); g.add(classPartition, VoidVocabularyDecl.triples, f.createLiteral(tmp.count)); } } // end class partition statistics. }
/** * Describe the default data set (the one identified by the namespace associated with the {@link * AbstractTripleStore}. * * @param describeStatistics When <code>true</code>, the VoID description will include the {@link * VoidVocabularyDecl#vocabulary} declarations, the property partition statistics, and the * class partition statistics. * @param describeNamedGraphs When <code>true</code>, each named graph will also be described in * in the same level of detail as the default graph. Otherwise only the default graph will be * described. */ public void describeDataSet(final boolean describeStatistics, final boolean describeNamedGraphs) { final String namespace = tripleStore.getNamespace(); // This is a VoID data set. g.add(aDataset, RDF.TYPE, VoidVocabularyDecl.Dataset); // The namespace is used as a title for the data set. g.add(aDataset, DCTermsVocabularyDecl.title, f.createLiteral(namespace)); // Also present the namespace in an unambiguous manner. g.add(aDataset, SD.KB_NAMESPACE, f.createLiteral(namespace)); /** * Service end point for this namespace. * * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/689" > Missing URL encoding in * RemoteRepositoryManager </a> */ for (String uri : serviceURI) { g.add( aDataset, VoidVocabularyDecl.sparqlEndpoint, f.createURI(uri + "/" + ConnectOptions.urlEncode(namespace) + "/sparql")); } // any URI is considered to be an entity. g.add(aDataset, VoidVocabularyDecl.uriRegexPattern, f.createLiteral("^.*")); if (!describeStatistics) { // No statistics. return; } // Frequency count of the predicates in the default graph. final IVCount[] predicatePartitionCounts = predicateUsage(tripleStore); // Frequency count of the classes in the default graph. final IVCount[] classPartitionCounts = classUsage(tripleStore); // Describe vocabularies based on the predicate partitions. describeVocabularies(predicatePartitionCounts); // defaultGraph description. { // Default graph in the default data set. g.add(aDataset, SD.defaultGraph, aDefaultGraph); // Describe the default graph using statistics. describeGraph(aDefaultGraph, predicatePartitionCounts, classPartitionCounts); } // end defaultGraph // sb.append("termCount\t = " + tripleStore.getTermCount() + "\n"); // // sb.append("uriCount\t = " + tripleStore.getURICount() + "\n"); // // sb.append("literalCount\t = " + tripleStore.getLiteralCount() + // "\n"); // // /* // * Note: The blank node count is only available when using the told // * bnodes mode. // */ // sb // .append("bnodeCount\t = " // + (tripleStore.getLexiconRelation() // .isStoreBlankNodes() ? "" // + tripleStore.getBNodeCount() : "N/A") // + "\n"); /* * Report for each named graph. */ if (describeNamedGraphs && tripleStore.isQuads()) { final SPORelation r = tripleStore.getSPORelation(); // the index to use for distinct term scan. final SPOKeyOrder keyOrder = SPOKeyOrder.CSPO; // visit distinct IVs for context position on that index. @SuppressWarnings("rawtypes") final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder); // resolve IVs to terms efficiently during iteration. final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(tripleStore /* resolveTerms */, itr); try { while (itr2.hasNext()) { /* * Describe this named graph. * * Note: This is using the predicate and class partition * statistics from the default graph (RDF merge) to identify * the set of all possible predicates and classes within * each named graph. It then tests each predicate and class * partition against the named graph and ignores those which * are not present in a given named graph. This is being * done because we do not have a CPxx index. */ final BigdataResource graph = (BigdataResource) itr2.next(); final IVCount[] predicatePartitionCounts2 = predicateUsage(tripleStore, graph.getIV(), predicatePartitionCounts); final IVCount[] classPartitionCounts2 = classUsage(tripleStore, graph.getIV(), classPartitionCounts); final BNode aNamedGraph = f.createBNode(); // Named graph in the default data set. g.add(aDataset, SD.namedGraph, aNamedGraph); // The name of that named graph. g.add(aNamedGraph, SD.name, graph); // Describe the named graph. describeGraph(aNamedGraph, predicatePartitionCounts2, classPartitionCounts2); } } finally { itr2.close(); } } }
/** * Implementation using the json.org API. * * @param json The RDF/JSON string to be parsed and converted into a Sesame Graph. * @return A Sesame Graph if successful, otherwise null. */ public static Graph rdfJsonToGraph(String json) { Graph result = new GraphImpl(); ValueFactory vf = result.getValueFactory(); try { JSONObject input = new JSONObject(json); Iterator<String> subjects = input.keys(); while (subjects.hasNext()) { String subjStr = subjects.next(); Resource subject = null; subject = subjStr.startsWith("_:") ? vf.createBNode(subjStr.substring(2)) : vf.createURI(subjStr); JSONObject pObj = input.getJSONObject(subjStr); Iterator<String> predicates = pObj.keys(); while (predicates.hasNext()) { String predStr = predicates.next(); URI predicate = vf.createURI(predStr); JSONArray predArr = pObj.getJSONArray(predStr); for (int i = 0; i < predArr.length(); i++) { Value object = null; JSONObject obj = predArr.getJSONObject(i); if (!obj.has("value")) { continue; } String value = obj.getString("value"); if (!obj.has("type")) { continue; } String type = obj.getString("type"); String lang = null; if (obj.has("lang")) { lang = obj.getString("lang"); } String datatype = null; if (obj.has("datatype")) { datatype = obj.getString("datatype"); } if ("literal".equals(type)) { if (lang != null) { object = vf.createLiteral(value, lang); } else if (datatype != null) { object = vf.createLiteral(value, vf.createURI(datatype)); } else { object = vf.createLiteral(value); } } else if ("bnode".equals(type)) { object = vf.createBNode(value.substring(2)); } else if ("uri".equals(type)) { object = vf.createURI(value); } if (obj.has("graphs")) { JSONArray a = obj.getJSONArray("graphs"); // System.out.println("a.length() = " + a.length()); for (int j = 0; j < a.length(); j++) { // Note: any nulls here will result in statements in the default context. String s = a.getString(j); Resource context = s.equals("null") ? null : vf.createURI(s); // System.out.println("context = " + context); result.add(subject, predicate, object, context); } } else { result.add(subject, predicate, object); } } } } } catch (JSONException e) { log.error(e.getMessage(), e); return null; } return result; }