/** * Test that can be used to verify that we are doing an efficient scan for the distinct predicates * (distinct key prefix scan). */ public void test_rdf01_distinctPrefixScan() throws Exception { final Properties properties = super.getProperties(); // override the default axiom model. properties.setProperty( com.bigdata.rdf.store.AbstractTripleStore.Options.AXIOMS_CLASS, NoAxioms.class.getName()); final AbstractTripleStore store = getStore(properties); try { final BigdataValueFactory f = store.getValueFactory(); final URI A = f.createURI("http://www.foo.org/A"); final URI B = f.createURI("http://www.foo.org/B"); final URI C = f.createURI("http://www.foo.org/C"); final URI D = f.createURI("http://www.foo.org/D"); final URI E = f.createURI("http://www.foo.org/E"); final URI rdfType = RDF.TYPE; final URI rdfProperty = RDF.PROPERTY; /* * Three statements that will trigger the rule, but two statements * share the same predicate. When it does the minimum amount of * work, the rule will fire for each distinct predicate in the KB -- * for this KB that is only twice. */ store.addStatement(A, B, C); store.addStatement(C, B, D); store.addStatement(A, E, C); assertTrue(store.hasStatement(A, B, C)); assertTrue(store.hasStatement(C, B, D)); assertTrue(store.hasStatement(A, E, C)); assertFalse(store.hasStatement(B, rdfType, rdfProperty)); assertFalse(store.hasStatement(E, rdfType, rdfProperty)); assertEquals(3, store.getStatementCount()); final Rule r = new RuleRdf01(store.getSPORelation().getNamespace(), store.getVocabulary()); applyRule(store, r, 2 /* solutionCount */, 2 /* mutationCount */); /* * validate the state of the primary store. */ assertTrue(store.hasStatement(A, B, C)); assertTrue(store.hasStatement(C, B, D)); assertTrue(store.hasStatement(A, E, C)); assertTrue(store.hasStatement(B, rdfType, rdfProperty)); assertTrue(store.hasStatement(E, rdfType, rdfProperty)); assertEquals(5, store.getStatementCount()); } finally { store.__tearDownUnitTest(); } }
/** * Return the class partition statistics for the named graph. * * @param kb The KB instance. * @param civ The {@link IV} of a named graph (required). * @return The class partition statistics for that named graph. Only class partitions which are * non-empty are returned. */ protected static IVCount[] classUsage( final AbstractTripleStore kb, final IV<?, ?> civ, final IVCount[] classPartitionCounts) { final SPORelation r = kb.getSPORelation(); final boolean quads = kb.isQuads(); if (!quads) { // Named graph only valid in quads mode. throw new IllegalArgumentException(); } // Resolve IV for rdf:type final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE); kb.getLexiconRelation() .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */); if (rdfType.getIV() == null) { // No rdf:type assertions since rdf:type is unknown term. return new IVCount[0]; } // The non-zero counts. final List<IVCount> counts = new LinkedList<IVCount>(); // Check the known non-empty predicate partitions. for (IVCount in : classPartitionCounts) { final long n = r.getAccessPath(null, rdfType.getIV() /* p */, in.iv /* o */, civ) .rangeCount(false /* exact */); if (n == 0) continue; final IVCount out = new IVCount(in.iv, n); out.setValue(in.getValue()); counts.add(out); } final IVCount[] a = counts.toArray(new IVCount[counts.size()]); // Order by descending count. Arrays.sort(a); return a; }
/** Basic test of rule semantics. */ public void test_rdf01() throws Exception { final Properties properties = super.getProperties(); // override the default axiom model. properties.setProperty( com.bigdata.rdf.store.AbstractTripleStore.Options.AXIOMS_CLASS, NoAxioms.class.getName()); final AbstractTripleStore store = getStore(properties); try { final BigdataValueFactory f = store.getValueFactory(); final URI A = f.createURI("http://www.foo.org/A"); final URI B = f.createURI("http://www.foo.org/B"); final URI C = f.createURI("http://www.foo.org/C"); final URI rdfType = RDF.TYPE; final URI rdfProperty = RDF.PROPERTY; store.addStatement(A, B, C); assertTrue(store.hasStatement(A, B, C)); assertFalse(store.hasStatement(B, rdfType, rdfProperty)); assertEquals(1, store.getStatementCount()); final Rule r = new RuleRdf01(store.getSPORelation().getNamespace(), store.getVocabulary()); applyRule(store, r, 1 /* solutionCount*/, 1 /*mutationCount*/); /* * validate the state of the primary store. */ assertTrue(store.hasStatement(A, B, C)); assertTrue(store.hasStatement(B, rdfType, rdfProperty)); assertEquals(2, store.getStatementCount()); } finally { store.__tearDownUnitTest(); } }
/** * Return the predicate partition statistics for the named graph. * * @param kb The KB instance. * @param civ The {@link IV} of a named graph (required). * @return The predicate partition statistics for that named graph. Only predicate partitions * which are non-empty are returned. */ protected static IVCount[] predicateUsage( final AbstractTripleStore kb, final IV<?, ?> civ, final IVCount[] predicatePartitionCounts) { final SPORelation r = kb.getSPORelation(); final boolean quads = kb.isQuads(); if (!quads) { // Named graph only valid in quads mode. throw new IllegalArgumentException(); } // The non-zero counts. final List<IVCount> counts = new LinkedList<IVCount>(); // Check the known non-empty predicate partitions. for (IVCount in : predicatePartitionCounts) { final long n = r.getAccessPath(null, in.iv, null, civ).rangeCount(false /* exact */); if (n == 0) continue; final IVCount out = new IVCount(in.iv, n); out.setValue(in.getValue()); counts.add(out); } final IVCount[] a = counts.toArray(new IVCount[counts.size()]); // Order by descending count. Arrays.sort(a); return a; }
/** * Creates a {@link Justification}, writes it on the store using {@link * RDFJoinNexus#newInsertBuffer(IMutableRelation)}, verifies that we can read it back from the * store, and then retracts the justified statement and verifies that the justification was also * retracted. */ public void test_writeReadRetract() { final Properties properties = super.getProperties(); // override the default axiom model. properties.setProperty( com.bigdata.rdf.store.AbstractTripleStore.Options.AXIOMS_CLASS, NoAxioms.class.getName()); final AbstractTripleStore store = getStore(properties); try { if (!store.isJustify()) { log.warn("Test skipped - justifications not enabled"); } /* * the explicit statement that is the support for the rule. */ final IV U = store.addTerm(new URIImpl("http://www.bigdata.com/U")); final IV A = store.addTerm(new URIImpl("http://www.bigdata.com/A")); final IV Y = store.addTerm(new URIImpl("http://www.bigdata.com/Y")); store.addStatements( new SPO[] { // new SPO(U, A, Y, StatementEnum.Explicit) // }, // 1); assertTrue(store.hasStatement(U, A, Y)); assertEquals(1, store.getStatementCount()); final InferenceEngine inf = store.getInferenceEngine(); final Vocabulary vocab = store.getVocabulary(); // the rule. final Rule rule = new RuleRdf01(store.getSPORelation().getNamespace(), vocab); final IJoinNexus joinNexus = store .newJoinNexusFactory( RuleContextEnum.DatabaseAtOnceClosure, ActionEnum.Insert, IJoinNexus.ALL, null /* filter */) .newInstance(store.getIndexManager()); /* * The buffer that accepts solutions and causes them to be written * onto the statement indices and the justifications index. */ final IBuffer<ISolution[]> insertBuffer = joinNexus.newInsertBuffer(store.getSPORelation()); // the expected justification (setup and verified below). final Justification jst; // the expected entailment. final SPO expectedEntailment = new SPO( // A, vocab.get(RDF.TYPE), vocab.get(RDF.PROPERTY), StatementEnum.Inferred); { final IBindingSet bindingSet = joinNexus.newBindingSet(rule); /* * Note: rdfs1 is implemented using a distinct term scan. This * has the effect of leaving the variables that do not appear in * the head of the rule unbound. Therefore we DO NOT bind those * variables here in the test case and they will be represented * as ZERO (0L) in the justifications index and interpreted as * wildcards. */ // bindingSet.set(Var.var("u"), new Constant<IV>(U)); bindingSet.set(Var.var("a"), new Constant<IV>(A)); // bindingSet.set(Var.var("y"), new Constant<IV>(Y)); final ISolution solution = new Solution(joinNexus, rule, bindingSet); /* * Verify the justification that will be built from that * solution. */ { jst = new Justification(solution); /* * Verify the bindings on the head of the rule as * represented by the justification. */ assertEquals(expectedEntailment, jst.getHead()); /* * Verify the bindings on the tail of the rule as * represented by the justification. Again, note that the * variables that do not appear in the head of the rule are * left unbound for rdfs1 as a side-effect of evaluation * using a distinct term scan. */ final SPO[] expectedTail = new SPO[] { // new SPO(NULL, A, NULL, StatementEnum.Inferred) // }; if (!Arrays.equals(expectedTail, jst.getTail())) { fail("Expected: " + Arrays.toString(expectedTail) + ", but actual: " + jst); } } // insert solution into the buffer. insertBuffer.add(new ISolution[] {solution}); } // SPOAssertionBuffer buf = new SPOAssertionBuffer(store, store, // null/* filter */, 100/* capacity */, true/* justified */); // // assertTrue(buf.add(head, jst)); // no justifications before hand. assertEquals(0L, store.getSPORelation().getJustificationIndex().rangeCount()); // flush the buffer. assertEquals(1L, insertBuffer.flush()); // one justification afterwards. assertEquals(1L, store.getSPORelation().getJustificationIndex().rangeCount()); /* * verify read back from the index. */ { final ITupleIterator<Justification> itr = store.getSPORelation().getJustificationIndex().rangeIterator(); while (itr.hasNext()) { final ITuple<Justification> tuple = itr.next(); // de-serialize the justification from the key. final Justification tmp = tuple.getObject(); // verify the same. assertEquals(jst, tmp); // no more justifications in the index. assertFalse(itr.hasNext()); } } /* * test iterator with a single justification. */ { final FullyBufferedJustificationIterator itr = new FullyBufferedJustificationIterator(store, expectedEntailment); assertTrue(itr.hasNext()); final Justification tmp = itr.next(); assertEquals(jst, tmp); } // an empty focusStore. final TempTripleStore focusStore = new TempTripleStore(store.getIndexManager().getTempStore(), store.getProperties(), store); try { /* * The inference (A rdf:type rdf:property) is grounded by the * explicit statement (U A Y). */ assertTrue( Justification.isGrounded( inf, focusStore, store, expectedEntailment, false /* testHead */, true /* testFocusStore */, new VisitedSPOSet(focusStore.getIndexManager()))); // add the statement (U A Y) to the focusStore. focusStore.addStatements( new SPO[] { // new SPO(U, A, Y, StatementEnum.Explicit) // }, // 1); /* * The inference is no longer grounded since we have declared * that we are also retracting its grounds. */ assertFalse( Justification.isGrounded( inf, focusStore, store, expectedEntailment, false /* testHead */, true /* testFocusStore */, new VisitedSPOSet(focusStore.getIndexManager()))); } finally { /* * Destroy the temp kb, but not the backing TemporaryStore. That * will be destroyed when we destroy the IndexManager associated * with the main store (below). */ focusStore.destroy(); } /* * remove the justified statements. */ assertEquals( 1L, store .getAccessPath(expectedEntailment.s, expectedEntailment.p, expectedEntailment.o) .removeAll()); /* * verify that the justification for that statement is gone. */ { final ITupleIterator<?> itr = store.getSPORelation().getJustificationIndex().rangeIterator(); assertFalse(itr.hasNext()); } } finally { store.__tearDownUnitTest(); } }
/** * Return an efficient statistical summary for the class partitions. The SPARQL query for this is * * <pre> * SELECT ?class (COUNT(?s) AS ?count ) { ?s a ?class } GROUP BY ?class ORDER BY ?count * </pre> * * However, it is much efficient to scan POS for * * <pre> * rdf:type ?o ?s * </pre> * * and report the range count of * * <pre> * rdf:type ?o ?s * </pre> * * for each distinct value of <code>?o</code>. * * @param kb The KB instance. * @return The class usage statistics. */ protected static IVCount[] classUsage(final AbstractTripleStore kb) { final SPORelation r = kb.getSPORelation(); if (r.oneAccessPath) { // The necessary index (POS or POCS) does not exist. throw new UnsupportedOperationException(); } final boolean quads = kb.isQuads(); final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS; // Resolve IV for rdf:type final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE); kb.getLexiconRelation() .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */); if (rdfType.getIV() == null) { // No rdf:type assertions since rdf:type is unknown term. return new IVCount[0]; } // visit distinct term identifiers for the rdf:type predicate. @SuppressWarnings("rawtypes") final IChunkedIterator<IV> itr = r.distinctMultiTermScan(keyOrder, new IV[] {rdfType.getIV()} /* knownTerms */); // resolve term identifiers to terms efficiently during iteration. final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr); try { final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>(); final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>(); while (itr2.hasNext()) { final BigdataValue term = itr2.next(); final IV<?, ?> iv = term.getIV(); final long n = r.getAccessPath(null, rdfType.getIV() /* p */, iv /* o */, null) .rangeCount(false /* exact */); ivs.add(iv); counts.put(iv, new IVCount(iv, n)); } // Batch resolve IVs to Values final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs); for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) { final IVCount count = counts.get(e.getKey()); count.setValue(e.getValue()); } final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]); // Order by descending count. Arrays.sort(a); return a; } finally { itr2.close(); } }
/** * Return an array of the distinct predicates in the KB ordered by their descending frequency of * use. The {@link IV}s in the returned array will have been resolved to the corresponding {@link * BigdataURI}s which can be accessed using {@link IV#getValue()}. * * @param kb The KB instance. */ protected static IVCount[] predicateUsage(final AbstractTripleStore kb) { final SPORelation r = kb.getSPORelation(); if (r.oneAccessPath) { // The necessary index (POS or POCS) does not exist. throw new UnsupportedOperationException(); } final boolean quads = kb.isQuads(); // the index to use for distinct predicate scan. final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS; // visit distinct term identifiers for predicate position on that index. @SuppressWarnings("rawtypes") final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder); // resolve term identifiers to terms efficiently during iteration. final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr); try { final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>(); final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>(); while (itr2.hasNext()) { final BigdataValue term = itr2.next(); final IV<?, ?> iv = term.getIV(); final long n = r.getAccessPath(null, iv, null, null).rangeCount(false /* exact */); ivs.add(iv); counts.put(iv, new IVCount(iv, n)); } // Batch resolve IVs to Values final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs); for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) { final IVCount count = counts.get(e.getKey()); count.setValue(e.getValue()); } final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]); // Order by descending count. Arrays.sort(a); return a; } finally { itr2.close(); } }
/** * Describe the default data set (the one identified by the namespace associated with the {@link * AbstractTripleStore}. * * @param describeStatistics When <code>true</code>, the VoID description will include the {@link * VoidVocabularyDecl#vocabulary} declarations, the property partition statistics, and the * class partition statistics. * @param describeNamedGraphs When <code>true</code>, each named graph will also be described in * in the same level of detail as the default graph. Otherwise only the default graph will be * described. */ public void describeDataSet(final boolean describeStatistics, final boolean describeNamedGraphs) { final String namespace = tripleStore.getNamespace(); // This is a VoID data set. g.add(aDataset, RDF.TYPE, VoidVocabularyDecl.Dataset); // The namespace is used as a title for the data set. g.add(aDataset, DCTermsVocabularyDecl.title, f.createLiteral(namespace)); // Also present the namespace in an unambiguous manner. g.add(aDataset, SD.KB_NAMESPACE, f.createLiteral(namespace)); /** * Service end point for this namespace. * * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/689" > Missing URL encoding in * RemoteRepositoryManager </a> */ for (String uri : serviceURI) { g.add( aDataset, VoidVocabularyDecl.sparqlEndpoint, f.createURI(uri + "/" + ConnectOptions.urlEncode(namespace) + "/sparql")); } // any URI is considered to be an entity. g.add(aDataset, VoidVocabularyDecl.uriRegexPattern, f.createLiteral("^.*")); if (!describeStatistics) { // No statistics. return; } // Frequency count of the predicates in the default graph. final IVCount[] predicatePartitionCounts = predicateUsage(tripleStore); // Frequency count of the classes in the default graph. final IVCount[] classPartitionCounts = classUsage(tripleStore); // Describe vocabularies based on the predicate partitions. describeVocabularies(predicatePartitionCounts); // defaultGraph description. { // Default graph in the default data set. g.add(aDataset, SD.defaultGraph, aDefaultGraph); // Describe the default graph using statistics. describeGraph(aDefaultGraph, predicatePartitionCounts, classPartitionCounts); } // end defaultGraph // sb.append("termCount\t = " + tripleStore.getTermCount() + "\n"); // // sb.append("uriCount\t = " + tripleStore.getURICount() + "\n"); // // sb.append("literalCount\t = " + tripleStore.getLiteralCount() + // "\n"); // // /* // * Note: The blank node count is only available when using the told // * bnodes mode. // */ // sb // .append("bnodeCount\t = " // + (tripleStore.getLexiconRelation() // .isStoreBlankNodes() ? "" // + tripleStore.getBNodeCount() : "N/A") // + "\n"); /* * Report for each named graph. */ if (describeNamedGraphs && tripleStore.isQuads()) { final SPORelation r = tripleStore.getSPORelation(); // the index to use for distinct term scan. final SPOKeyOrder keyOrder = SPOKeyOrder.CSPO; // visit distinct IVs for context position on that index. @SuppressWarnings("rawtypes") final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder); // resolve IVs to terms efficiently during iteration. final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(tripleStore /* resolveTerms */, itr); try { while (itr2.hasNext()) { /* * Describe this named graph. * * Note: This is using the predicate and class partition * statistics from the default graph (RDF merge) to identify * the set of all possible predicates and classes within * each named graph. It then tests each predicate and class * partition against the named graph and ignores those which * are not present in a given named graph. This is being * done because we do not have a CPxx index. */ final BigdataResource graph = (BigdataResource) itr2.next(); final IVCount[] predicatePartitionCounts2 = predicateUsage(tripleStore, graph.getIV(), predicatePartitionCounts); final IVCount[] classPartitionCounts2 = classUsage(tripleStore, graph.getIV(), classPartitionCounts); final BNode aNamedGraph = f.createBNode(); // Named graph in the default data set. g.add(aDataset, SD.namedGraph, aNamedGraph); // The name of that named graph. g.add(aNamedGraph, SD.name, graph); // Describe the named graph. describeGraph(aNamedGraph, predicatePartitionCounts2, classPartitionCounts2); } } finally { itr2.close(); } } }