/** * Test that can be used to verify that we are doing an efficient scan for the distinct predicates * (distinct key prefix scan). */ public void test_rdf01_distinctPrefixScan() throws Exception { final Properties properties = super.getProperties(); // override the default axiom model. properties.setProperty( com.bigdata.rdf.store.AbstractTripleStore.Options.AXIOMS_CLASS, NoAxioms.class.getName()); final AbstractTripleStore store = getStore(properties); try { final BigdataValueFactory f = store.getValueFactory(); final URI A = f.createURI("http://www.foo.org/A"); final URI B = f.createURI("http://www.foo.org/B"); final URI C = f.createURI("http://www.foo.org/C"); final URI D = f.createURI("http://www.foo.org/D"); final URI E = f.createURI("http://www.foo.org/E"); final URI rdfType = RDF.TYPE; final URI rdfProperty = RDF.PROPERTY; /* * Three statements that will trigger the rule, but two statements * share the same predicate. When it does the minimum amount of * work, the rule will fire for each distinct predicate in the KB -- * for this KB that is only twice. */ store.addStatement(A, B, C); store.addStatement(C, B, D); store.addStatement(A, E, C); assertTrue(store.hasStatement(A, B, C)); assertTrue(store.hasStatement(C, B, D)); assertTrue(store.hasStatement(A, E, C)); assertFalse(store.hasStatement(B, rdfType, rdfProperty)); assertFalse(store.hasStatement(E, rdfType, rdfProperty)); assertEquals(3, store.getStatementCount()); final Rule r = new RuleRdf01(store.getSPORelation().getNamespace(), store.getVocabulary()); applyRule(store, r, 2 /* solutionCount */, 2 /* mutationCount */); /* * validate the state of the primary store. */ assertTrue(store.hasStatement(A, B, C)); assertTrue(store.hasStatement(C, B, D)); assertTrue(store.hasStatement(A, E, C)); assertTrue(store.hasStatement(B, rdfType, rdfProperty)); assertTrue(store.hasStatement(E, rdfType, rdfProperty)); assertEquals(5, store.getStatementCount()); } finally { store.__tearDownUnitTest(); } }
/** * Return the class partition statistics for the named graph. * * @param kb The KB instance. * @param civ The {@link IV} of a named graph (required). * @return The class partition statistics for that named graph. Only class partitions which are * non-empty are returned. */ protected static IVCount[] classUsage( final AbstractTripleStore kb, final IV<?, ?> civ, final IVCount[] classPartitionCounts) { final SPORelation r = kb.getSPORelation(); final boolean quads = kb.isQuads(); if (!quads) { // Named graph only valid in quads mode. throw new IllegalArgumentException(); } // Resolve IV for rdf:type final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE); kb.getLexiconRelation() .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */); if (rdfType.getIV() == null) { // No rdf:type assertions since rdf:type is unknown term. return new IVCount[0]; } // The non-zero counts. final List<IVCount> counts = new LinkedList<IVCount>(); // Check the known non-empty predicate partitions. for (IVCount in : classPartitionCounts) { final long n = r.getAccessPath(null, rdfType.getIV() /* p */, in.iv /* o */, civ) .rangeCount(false /* exact */); if (n == 0) continue; final IVCount out = new IVCount(in.iv, n); out.setValue(in.getValue()); counts.add(out); } final IVCount[] a = counts.toArray(new IVCount[counts.size()]); // Order by descending count. Arrays.sort(a); return a; }
/** Basic test of rule semantics. */ public void test_rdf01() throws Exception { final Properties properties = super.getProperties(); // override the default axiom model. properties.setProperty( com.bigdata.rdf.store.AbstractTripleStore.Options.AXIOMS_CLASS, NoAxioms.class.getName()); final AbstractTripleStore store = getStore(properties); try { final BigdataValueFactory f = store.getValueFactory(); final URI A = f.createURI("http://www.foo.org/A"); final URI B = f.createURI("http://www.foo.org/B"); final URI C = f.createURI("http://www.foo.org/C"); final URI rdfType = RDF.TYPE; final URI rdfProperty = RDF.PROPERTY; store.addStatement(A, B, C); assertTrue(store.hasStatement(A, B, C)); assertFalse(store.hasStatement(B, rdfType, rdfProperty)); assertEquals(1, store.getStatementCount()); final Rule r = new RuleRdf01(store.getSPORelation().getNamespace(), store.getVocabulary()); applyRule(store, r, 1 /* solutionCount*/, 1 /*mutationCount*/); /* * validate the state of the primary store. */ assertTrue(store.hasStatement(A, B, C)); assertTrue(store.hasStatement(B, rdfType, rdfProperty)); assertEquals(2, store.getStatementCount()); } finally { store.__tearDownUnitTest(); } }
/** * Return an efficient statistical summary for the class partitions. The SPARQL query for this is * * <pre> * SELECT ?class (COUNT(?s) AS ?count ) { ?s a ?class } GROUP BY ?class ORDER BY ?count * </pre> * * However, it is much efficient to scan POS for * * <pre> * rdf:type ?o ?s * </pre> * * and report the range count of * * <pre> * rdf:type ?o ?s * </pre> * * for each distinct value of <code>?o</code>. * * @param kb The KB instance. * @return The class usage statistics. */ protected static IVCount[] classUsage(final AbstractTripleStore kb) { final SPORelation r = kb.getSPORelation(); if (r.oneAccessPath) { // The necessary index (POS or POCS) does not exist. throw new UnsupportedOperationException(); } final boolean quads = kb.isQuads(); final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS; // Resolve IV for rdf:type final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE); kb.getLexiconRelation() .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */); if (rdfType.getIV() == null) { // No rdf:type assertions since rdf:type is unknown term. return new IVCount[0]; } // visit distinct term identifiers for the rdf:type predicate. @SuppressWarnings("rawtypes") final IChunkedIterator<IV> itr = r.distinctMultiTermScan(keyOrder, new IV[] {rdfType.getIV()} /* knownTerms */); // resolve term identifiers to terms efficiently during iteration. final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr); try { final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>(); final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>(); while (itr2.hasNext()) { final BigdataValue term = itr2.next(); final IV<?, ?> iv = term.getIV(); final long n = r.getAccessPath(null, rdfType.getIV() /* p */, iv /* o */, null) .rangeCount(false /* exact */); ivs.add(iv); counts.put(iv, new IVCount(iv, n)); } // Batch resolve IVs to Values final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs); for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) { final IVCount count = counts.get(e.getKey()); count.setValue(e.getValue()); } final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]); // Order by descending count. Arrays.sort(a); return a; } finally { itr2.close(); } }