/** * Describe the vocabularies which are in use in the KB based on the predicate partition * statistics. * * @param predicateParitionCounts The predicate partition statistics. */ protected void describeVocabularies(final IVCount[] predicatePartitionCounts) { // Find the distinct vocabularies in use. final Set<String> namespaces = new LinkedHashSet<String>(); { // property partitions. for (IVCount tmp : predicatePartitionCounts) { final URI p = (URI) tmp.getValue(); String namespace = p.getNamespace(); if (namespace.endsWith("#")) { // Strip trailing '#' per VoID specification. namespace = namespace.substring(0, namespace.length() - 1); } namespaces.add(namespace); } } // Sort into dictionary order. final String[] a = namespaces.toArray(new String[namespaces.size()]); Arrays.sort(a); for (String namespace : a) { g.add(aDataset, VoidVocabularyDecl.vocabulary, f.createURI(namespace)); } }
/** * Describe a named or default graph. * * @param graph The named graph. * @param predicatePartitionCounts The predicate partition statistics for that graph. * @param classPartitionCounts The class partition statistics for that graph. */ protected void describeGraph( final Resource graph, final IVCount[] predicatePartitionCounts, final IVCount[] classPartitionCounts) { // The graph is a Graph. g.add(graph, RDF.TYPE, SD.Graph); // #of triples in the default graph g.add(graph, VoidVocabularyDecl.triples, f.createLiteral(tripleStore.getStatementCount())); // #of entities in the default graph. g.add(graph, VoidVocabularyDecl.entities, f.createLiteral(tripleStore.getURICount())); // #of distinct predicates in the default graph. g.add(graph, VoidVocabularyDecl.properties, f.createLiteral(predicatePartitionCounts.length)); // #of distinct classes in the default graph. g.add(graph, VoidVocabularyDecl.classes, f.createLiteral(classPartitionCounts.length)); // property partition statistics. for (IVCount tmp : predicatePartitionCounts) { final BNode propertyPartition = f.createBNode(); final URI p = (URI) tmp.getValue(); g.add(graph, VoidVocabularyDecl.propertyPartition, propertyPartition); g.add(propertyPartition, VoidVocabularyDecl.property, p); g.add(propertyPartition, VoidVocabularyDecl.triples, f.createLiteral(tmp.count)); } // class partition statistics. { // per class partition statistics. for (IVCount tmp : classPartitionCounts) { final BNode classPartition = f.createBNode(); final BigdataValue cls = tmp.getValue(); g.add(graph, VoidVocabularyDecl.classPartition, classPartition); g.add(classPartition, VoidVocabularyDecl.class_, cls); g.add(classPartition, VoidVocabularyDecl.triples, f.createLiteral(tmp.count)); } } // end class partition statistics. }
/** * Return the class partition statistics for the named graph. * * @param kb The KB instance. * @param civ The {@link IV} of a named graph (required). * @return The class partition statistics for that named graph. Only class partitions which are * non-empty are returned. */ protected static IVCount[] classUsage( final AbstractTripleStore kb, final IV<?, ?> civ, final IVCount[] classPartitionCounts) { final SPORelation r = kb.getSPORelation(); final boolean quads = kb.isQuads(); if (!quads) { // Named graph only valid in quads mode. throw new IllegalArgumentException(); } // Resolve IV for rdf:type final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE); kb.getLexiconRelation() .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */); if (rdfType.getIV() == null) { // No rdf:type assertions since rdf:type is unknown term. return new IVCount[0]; } // The non-zero counts. final List<IVCount> counts = new LinkedList<IVCount>(); // Check the known non-empty predicate partitions. for (IVCount in : classPartitionCounts) { final long n = r.getAccessPath(null, rdfType.getIV() /* p */, in.iv /* o */, civ) .rangeCount(false /* exact */); if (n == 0) continue; final IVCount out = new IVCount(in.iv, n); out.setValue(in.getValue()); counts.add(out); } final IVCount[] a = counts.toArray(new IVCount[counts.size()]); // Order by descending count. Arrays.sort(a); return a; }
/** * Return the predicate partition statistics for the named graph. * * @param kb The KB instance. * @param civ The {@link IV} of a named graph (required). * @return The predicate partition statistics for that named graph. Only predicate partitions * which are non-empty are returned. */ protected static IVCount[] predicateUsage( final AbstractTripleStore kb, final IV<?, ?> civ, final IVCount[] predicatePartitionCounts) { final SPORelation r = kb.getSPORelation(); final boolean quads = kb.isQuads(); if (!quads) { // Named graph only valid in quads mode. throw new IllegalArgumentException(); } // The non-zero counts. final List<IVCount> counts = new LinkedList<IVCount>(); // Check the known non-empty predicate partitions. for (IVCount in : predicatePartitionCounts) { final long n = r.getAccessPath(null, in.iv, null, civ).rangeCount(false /* exact */); if (n == 0) continue; final IVCount out = new IVCount(in.iv, n); out.setValue(in.getValue()); counts.add(out); } final IVCount[] a = counts.toArray(new IVCount[counts.size()]); // Order by descending count. Arrays.sort(a); return a; }
/** * Return an efficient statistical summary for the class partitions. The SPARQL query for this is * * <pre> * SELECT ?class (COUNT(?s) AS ?count ) { ?s a ?class } GROUP BY ?class ORDER BY ?count * </pre> * * However, it is much efficient to scan POS for * * <pre> * rdf:type ?o ?s * </pre> * * and report the range count of * * <pre> * rdf:type ?o ?s * </pre> * * for each distinct value of <code>?o</code>. * * @param kb The KB instance. * @return The class usage statistics. */ protected static IVCount[] classUsage(final AbstractTripleStore kb) { final SPORelation r = kb.getSPORelation(); if (r.oneAccessPath) { // The necessary index (POS or POCS) does not exist. throw new UnsupportedOperationException(); } final boolean quads = kb.isQuads(); final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS; // Resolve IV for rdf:type final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE); kb.getLexiconRelation() .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */); if (rdfType.getIV() == null) { // No rdf:type assertions since rdf:type is unknown term. return new IVCount[0]; } // visit distinct term identifiers for the rdf:type predicate. @SuppressWarnings("rawtypes") final IChunkedIterator<IV> itr = r.distinctMultiTermScan(keyOrder, new IV[] {rdfType.getIV()} /* knownTerms */); // resolve term identifiers to terms efficiently during iteration. final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr); try { final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>(); final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>(); while (itr2.hasNext()) { final BigdataValue term = itr2.next(); final IV<?, ?> iv = term.getIV(); final long n = r.getAccessPath(null, rdfType.getIV() /* p */, iv /* o */, null) .rangeCount(false /* exact */); ivs.add(iv); counts.put(iv, new IVCount(iv, n)); } // Batch resolve IVs to Values final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs); for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) { final IVCount count = counts.get(e.getKey()); count.setValue(e.getValue()); } final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]); // Order by descending count. Arrays.sort(a); return a; } finally { itr2.close(); } }
/** * Return an array of the distinct predicates in the KB ordered by their descending frequency of * use. The {@link IV}s in the returned array will have been resolved to the corresponding {@link * BigdataURI}s which can be accessed using {@link IV#getValue()}. * * @param kb The KB instance. */ protected static IVCount[] predicateUsage(final AbstractTripleStore kb) { final SPORelation r = kb.getSPORelation(); if (r.oneAccessPath) { // The necessary index (POS or POCS) does not exist. throw new UnsupportedOperationException(); } final boolean quads = kb.isQuads(); // the index to use for distinct predicate scan. final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS; // visit distinct term identifiers for predicate position on that index. @SuppressWarnings("rawtypes") final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder); // resolve term identifiers to terms efficiently during iteration. final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr); try { final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>(); final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>(); while (itr2.hasNext()) { final BigdataValue term = itr2.next(); final IV<?, ?> iv = term.getIV(); final long n = r.getAccessPath(null, iv, null, null).rangeCount(false /* exact */); ivs.add(iv); counts.put(iv, new IVCount(iv, n)); } // Batch resolve IVs to Values final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs); for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) { final IVCount count = counts.get(e.getKey()); count.setValue(e.getValue()); } final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]); // Order by descending count. Arrays.sort(a); return a; } finally { itr2.close(); } }