示例#1
0
  /**
   * Test that can be used to verify that we are doing an efficient scan for the distinct predicates
   * (distinct key prefix scan).
   */
  public void test_rdf01_distinctPrefixScan() throws Exception {

    final Properties properties = super.getProperties();

    // override the default axiom model.
    properties.setProperty(
        com.bigdata.rdf.store.AbstractTripleStore.Options.AXIOMS_CLASS, NoAxioms.class.getName());

    final AbstractTripleStore store = getStore(properties);

    try {

      final BigdataValueFactory f = store.getValueFactory();

      final URI A = f.createURI("http://www.foo.org/A");
      final URI B = f.createURI("http://www.foo.org/B");
      final URI C = f.createURI("http://www.foo.org/C");
      final URI D = f.createURI("http://www.foo.org/D");
      final URI E = f.createURI("http://www.foo.org/E");

      final URI rdfType = RDF.TYPE;
      final URI rdfProperty = RDF.PROPERTY;

      /*
       * Three statements that will trigger the rule, but two statements
       * share the same predicate. When it does the minimum amount of
       * work, the rule will fire for each distinct predicate in the KB --
       * for this KB that is only twice.
       */
      store.addStatement(A, B, C);
      store.addStatement(C, B, D);
      store.addStatement(A, E, C);

      assertTrue(store.hasStatement(A, B, C));
      assertTrue(store.hasStatement(C, B, D));
      assertTrue(store.hasStatement(A, E, C));
      assertFalse(store.hasStatement(B, rdfType, rdfProperty));
      assertFalse(store.hasStatement(E, rdfType, rdfProperty));
      assertEquals(3, store.getStatementCount());

      final Rule r = new RuleRdf01(store.getSPORelation().getNamespace(), store.getVocabulary());

      applyRule(store, r, 2 /* solutionCount */, 2 /* mutationCount */);

      /*
       * validate the state of the primary store.
       */
      assertTrue(store.hasStatement(A, B, C));
      assertTrue(store.hasStatement(C, B, D));
      assertTrue(store.hasStatement(A, E, C));
      assertTrue(store.hasStatement(B, rdfType, rdfProperty));
      assertTrue(store.hasStatement(E, rdfType, rdfProperty));
      assertEquals(5, store.getStatementCount());

    } finally {

      store.__tearDownUnitTest();
    }
  }
示例#2
0
文件: VoID.java 项目: kietly/database
  /**
   * Return the class partition statistics for the named graph.
   *
   * @param kb The KB instance.
   * @param civ The {@link IV} of a named graph (required).
   * @return The class partition statistics for that named graph. Only class partitions which are
   *     non-empty are returned.
   */
  protected static IVCount[] classUsage(
      final AbstractTripleStore kb, final IV<?, ?> civ, final IVCount[] classPartitionCounts) {

    final SPORelation r = kb.getSPORelation();

    final boolean quads = kb.isQuads();

    if (!quads) {

      // Named graph only valid in quads mode.
      throw new IllegalArgumentException();
    }

    // Resolve IV for rdf:type
    final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE);

    kb.getLexiconRelation()
        .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */);

    if (rdfType.getIV() == null) {

      // No rdf:type assertions since rdf:type is unknown term.
      return new IVCount[0];
    }

    // The non-zero counts.
    final List<IVCount> counts = new LinkedList<IVCount>();

    // Check the known non-empty predicate partitions.
    for (IVCount in : classPartitionCounts) {

      final long n =
          r.getAccessPath(null, rdfType.getIV() /* p */, in.iv /* o */, civ)
              .rangeCount(false /* exact */);

      if (n == 0) continue;

      final IVCount out = new IVCount(in.iv, n);

      out.setValue(in.getValue());

      counts.add(out);
    }

    final IVCount[] a = counts.toArray(new IVCount[counts.size()]);

    // Order by descending count.
    Arrays.sort(a);

    return a;
  }
示例#3
0
  /** Basic test of rule semantics. */
  public void test_rdf01() throws Exception {

    final Properties properties = super.getProperties();

    // override the default axiom model.
    properties.setProperty(
        com.bigdata.rdf.store.AbstractTripleStore.Options.AXIOMS_CLASS, NoAxioms.class.getName());

    final AbstractTripleStore store = getStore(properties);

    try {

      final BigdataValueFactory f = store.getValueFactory();

      final URI A = f.createURI("http://www.foo.org/A");
      final URI B = f.createURI("http://www.foo.org/B");
      final URI C = f.createURI("http://www.foo.org/C");

      final URI rdfType = RDF.TYPE;
      final URI rdfProperty = RDF.PROPERTY;

      store.addStatement(A, B, C);

      assertTrue(store.hasStatement(A, B, C));
      assertFalse(store.hasStatement(B, rdfType, rdfProperty));
      assertEquals(1, store.getStatementCount());

      final Rule r = new RuleRdf01(store.getSPORelation().getNamespace(), store.getVocabulary());

      applyRule(store, r, 1 /* solutionCount*/, 1 /*mutationCount*/);

      /*
       * validate the state of the primary store.
       */
      assertTrue(store.hasStatement(A, B, C));
      assertTrue(store.hasStatement(B, rdfType, rdfProperty));
      assertEquals(2, store.getStatementCount());

    } finally {

      store.__tearDownUnitTest();
    }
  }
示例#4
0
文件: VoID.java 项目: kietly/database
  /**
   * Return the predicate partition statistics for the named graph.
   *
   * @param kb The KB instance.
   * @param civ The {@link IV} of a named graph (required).
   * @return The predicate partition statistics for that named graph. Only predicate partitions
   *     which are non-empty are returned.
   */
  protected static IVCount[] predicateUsage(
      final AbstractTripleStore kb, final IV<?, ?> civ, final IVCount[] predicatePartitionCounts) {

    final SPORelation r = kb.getSPORelation();

    final boolean quads = kb.isQuads();

    if (!quads) {

      // Named graph only valid in quads mode.
      throw new IllegalArgumentException();
    }

    // The non-zero counts.
    final List<IVCount> counts = new LinkedList<IVCount>();

    // Check the known non-empty predicate partitions.
    for (IVCount in : predicatePartitionCounts) {

      final long n = r.getAccessPath(null, in.iv, null, civ).rangeCount(false /* exact */);

      if (n == 0) continue;

      final IVCount out = new IVCount(in.iv, n);

      out.setValue(in.getValue());

      counts.add(out);
    }

    final IVCount[] a = counts.toArray(new IVCount[counts.size()]);

    // Order by descending count.
    Arrays.sort(a);

    return a;
  }
示例#5
0
  /**
   * Creates a {@link Justification}, writes it on the store using {@link
   * RDFJoinNexus#newInsertBuffer(IMutableRelation)}, verifies that we can read it back from the
   * store, and then retracts the justified statement and verifies that the justification was also
   * retracted.
   */
  public void test_writeReadRetract() {

    final Properties properties = super.getProperties();

    // override the default axiom model.
    properties.setProperty(
        com.bigdata.rdf.store.AbstractTripleStore.Options.AXIOMS_CLASS, NoAxioms.class.getName());

    final AbstractTripleStore store = getStore(properties);

    try {

      if (!store.isJustify()) {

        log.warn("Test skipped - justifications not enabled");
      }

      /*
       * the explicit statement that is the support for the rule.
       */

      final IV U = store.addTerm(new URIImpl("http://www.bigdata.com/U"));
      final IV A = store.addTerm(new URIImpl("http://www.bigdata.com/A"));
      final IV Y = store.addTerm(new URIImpl("http://www.bigdata.com/Y"));

      store.addStatements(
          new SPO[] { //
            new SPO(U, A, Y, StatementEnum.Explicit) //
          }, //
          1);

      assertTrue(store.hasStatement(U, A, Y));
      assertEquals(1, store.getStatementCount());

      final InferenceEngine inf = store.getInferenceEngine();

      final Vocabulary vocab = store.getVocabulary();

      // the rule.
      final Rule rule = new RuleRdf01(store.getSPORelation().getNamespace(), vocab);

      final IJoinNexus joinNexus =
          store
              .newJoinNexusFactory(
                  RuleContextEnum.DatabaseAtOnceClosure,
                  ActionEnum.Insert,
                  IJoinNexus.ALL,
                  null /* filter */)
              .newInstance(store.getIndexManager());

      /*
       * The buffer that accepts solutions and causes them to be written
       * onto the statement indices and the justifications index.
       */
      final IBuffer<ISolution[]> insertBuffer = joinNexus.newInsertBuffer(store.getSPORelation());

      // the expected justification (setup and verified below).
      final Justification jst;

      // the expected entailment.
      final SPO expectedEntailment =
          new SPO( //
              A, vocab.get(RDF.TYPE), vocab.get(RDF.PROPERTY), StatementEnum.Inferred);

      {
        final IBindingSet bindingSet = joinNexus.newBindingSet(rule);

        /*
         * Note: rdfs1 is implemented using a distinct term scan. This
         * has the effect of leaving the variables that do not appear in
         * the head of the rule unbound. Therefore we DO NOT bind those
         * variables here in the test case and they will be represented
         * as ZERO (0L) in the justifications index and interpreted as
         * wildcards.
         */
        //                bindingSet.set(Var.var("u"), new Constant<IV>(U));
        bindingSet.set(Var.var("a"), new Constant<IV>(A));
        //                bindingSet.set(Var.var("y"), new Constant<IV>(Y));

        final ISolution solution = new Solution(joinNexus, rule, bindingSet);

        /*
         * Verify the justification that will be built from that
         * solution.
         */
        {
          jst = new Justification(solution);

          /*
           * Verify the bindings on the head of the rule as
           * represented by the justification.
           */
          assertEquals(expectedEntailment, jst.getHead());

          /*
           * Verify the bindings on the tail of the rule as
           * represented by the justification. Again, note that the
           * variables that do not appear in the head of the rule are
           * left unbound for rdfs1 as a side-effect of evaluation
           * using a distinct term scan.
           */
          final SPO[] expectedTail =
              new SPO[] { //
                new SPO(NULL, A, NULL, StatementEnum.Inferred) //
              };

          if (!Arrays.equals(expectedTail, jst.getTail())) {

            fail("Expected: " + Arrays.toString(expectedTail) + ", but actual: " + jst);
          }
        }

        // insert solution into the buffer.
        insertBuffer.add(new ISolution[] {solution});
      }

      //            SPOAssertionBuffer buf = new SPOAssertionBuffer(store, store,
      //                    null/* filter */, 100/* capacity */, true/* justified */);
      //
      //            assertTrue(buf.add(head, jst));

      // no justifications before hand.
      assertEquals(0L, store.getSPORelation().getJustificationIndex().rangeCount());

      // flush the buffer.
      assertEquals(1L, insertBuffer.flush());

      // one justification afterwards.
      assertEquals(1L, store.getSPORelation().getJustificationIndex().rangeCount());

      /*
       * verify read back from the index.
       */
      {
        final ITupleIterator<Justification> itr =
            store.getSPORelation().getJustificationIndex().rangeIterator();

        while (itr.hasNext()) {

          final ITuple<Justification> tuple = itr.next();

          // de-serialize the justification from the key.
          final Justification tmp = tuple.getObject();

          // verify the same.
          assertEquals(jst, tmp);

          // no more justifications in the index.
          assertFalse(itr.hasNext());
        }
      }

      /*
       * test iterator with a single justification.
       */
      {
        final FullyBufferedJustificationIterator itr =
            new FullyBufferedJustificationIterator(store, expectedEntailment);

        assertTrue(itr.hasNext());

        final Justification tmp = itr.next();

        assertEquals(jst, tmp);
      }

      // an empty focusStore.
      final TempTripleStore focusStore =
          new TempTripleStore(store.getIndexManager().getTempStore(), store.getProperties(), store);

      try {

        /*
         * The inference (A rdf:type rdf:property) is grounded by the
         * explicit statement (U A Y).
         */

        assertTrue(
            Justification.isGrounded(
                inf,
                focusStore,
                store,
                expectedEntailment,
                false /* testHead */,
                true /* testFocusStore */,
                new VisitedSPOSet(focusStore.getIndexManager())));

        // add the statement (U A Y) to the focusStore.
        focusStore.addStatements(
            new SPO[] { //
              new SPO(U, A, Y, StatementEnum.Explicit) //
            }, //
            1);

        /*
         * The inference is no longer grounded since we have declared
         * that we are also retracting its grounds.
         */
        assertFalse(
            Justification.isGrounded(
                inf,
                focusStore,
                store,
                expectedEntailment,
                false /* testHead */,
                true /* testFocusStore */,
                new VisitedSPOSet(focusStore.getIndexManager())));

      } finally {

        /*
         * Destroy the temp kb, but not the backing TemporaryStore. That
         * will be destroyed when we destroy the IndexManager associated
         * with the main store (below).
         */
        focusStore.destroy();
      }

      /*
       * remove the justified statements.
       */

      assertEquals(
          1L,
          store
              .getAccessPath(expectedEntailment.s, expectedEntailment.p, expectedEntailment.o)
              .removeAll());

      /*
       * verify that the justification for that statement is gone.
       */
      {
        final ITupleIterator<?> itr =
            store.getSPORelation().getJustificationIndex().rangeIterator();

        assertFalse(itr.hasNext());
      }

    } finally {

      store.__tearDownUnitTest();
    }
  }
示例#6
0
文件: VoID.java 项目: kietly/database
  /**
   * Return an efficient statistical summary for the class partitions. The SPARQL query for this is
   *
   * <pre>
   * SELECT  ?class (COUNT(?s) AS ?count ) { ?s a ?class } GROUP BY ?class ORDER BY ?count
   * </pre>
   *
   * However, it is much efficient to scan POS for
   *
   * <pre>
   * rdf:type ?o ?s
   * </pre>
   *
   * and report the range count of
   *
   * <pre>
   * rdf:type ?o ?s
   * </pre>
   *
   * for each distinct value of <code>?o</code>.
   *
   * @param kb The KB instance.
   * @return The class usage statistics.
   */
  protected static IVCount[] classUsage(final AbstractTripleStore kb) {

    final SPORelation r = kb.getSPORelation();

    if (r.oneAccessPath) {

      // The necessary index (POS or POCS) does not exist.
      throw new UnsupportedOperationException();
    }

    final boolean quads = kb.isQuads();

    final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS;

    // Resolve IV for rdf:type
    final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE);

    kb.getLexiconRelation()
        .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */);

    if (rdfType.getIV() == null) {

      // No rdf:type assertions since rdf:type is unknown term.
      return new IVCount[0];
    }

    // visit distinct term identifiers for the rdf:type predicate.
    @SuppressWarnings("rawtypes")
    final IChunkedIterator<IV> itr =
        r.distinctMultiTermScan(keyOrder, new IV[] {rdfType.getIV()} /* knownTerms */);

    // resolve term identifiers to terms efficiently during iteration.
    final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr);

    try {

      final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>();

      final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>();

      while (itr2.hasNext()) {

        final BigdataValue term = itr2.next();

        final IV<?, ?> iv = term.getIV();

        final long n =
            r.getAccessPath(null, rdfType.getIV() /* p */, iv /* o */, null)
                .rangeCount(false /* exact */);

        ivs.add(iv);

        counts.put(iv, new IVCount(iv, n));
      }

      // Batch resolve IVs to Values
      final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs);

      for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) {

        final IVCount count = counts.get(e.getKey());

        count.setValue(e.getValue());
      }

      final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]);

      // Order by descending count.
      Arrays.sort(a);

      return a;

    } finally {

      itr2.close();
    }
  }
示例#7
0
文件: VoID.java 项目: kietly/database
  /**
   * Return an array of the distinct predicates in the KB ordered by their descending frequency of
   * use. The {@link IV}s in the returned array will have been resolved to the corresponding {@link
   * BigdataURI}s which can be accessed using {@link IV#getValue()}.
   *
   * @param kb The KB instance.
   */
  protected static IVCount[] predicateUsage(final AbstractTripleStore kb) {

    final SPORelation r = kb.getSPORelation();

    if (r.oneAccessPath) {

      // The necessary index (POS or POCS) does not exist.
      throw new UnsupportedOperationException();
    }

    final boolean quads = kb.isQuads();

    // the index to use for distinct predicate scan.
    final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS;

    // visit distinct term identifiers for predicate position on that index.
    @SuppressWarnings("rawtypes")
    final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder);

    // resolve term identifiers to terms efficiently during iteration.
    final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr);

    try {

      final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>();

      final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>();

      while (itr2.hasNext()) {

        final BigdataValue term = itr2.next();

        final IV<?, ?> iv = term.getIV();

        final long n = r.getAccessPath(null, iv, null, null).rangeCount(false /* exact */);

        ivs.add(iv);

        counts.put(iv, new IVCount(iv, n));
      }

      // Batch resolve IVs to Values
      final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs);

      for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) {

        final IVCount count = counts.get(e.getKey());

        count.setValue(e.getValue());
      }

      final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]);

      // Order by descending count.
      Arrays.sort(a);

      return a;

    } finally {

      itr2.close();
    }
  }
示例#8
0
文件: VoID.java 项目: kietly/database
  /**
   * Describe the default data set (the one identified by the namespace associated with the {@link
   * AbstractTripleStore}.
   *
   * @param describeStatistics When <code>true</code>, the VoID description will include the {@link
   *     VoidVocabularyDecl#vocabulary} declarations, the property partition statistics, and the
   *     class partition statistics.
   * @param describeNamedGraphs When <code>true</code>, each named graph will also be described in
   *     in the same level of detail as the default graph. Otherwise only the default graph will be
   *     described.
   */
  public void describeDataSet(final boolean describeStatistics, final boolean describeNamedGraphs) {

    final String namespace = tripleStore.getNamespace();

    // This is a VoID data set.
    g.add(aDataset, RDF.TYPE, VoidVocabularyDecl.Dataset);

    // The namespace is used as a title for the data set.
    g.add(aDataset, DCTermsVocabularyDecl.title, f.createLiteral(namespace));

    // Also present the namespace in an unambiguous manner.
    g.add(aDataset, SD.KB_NAMESPACE, f.createLiteral(namespace));

    /**
     * Service end point for this namespace.
     *
     * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/689" > Missing URL encoding in
     *     RemoteRepositoryManager </a>
     */
    for (String uri : serviceURI) {
      g.add(
          aDataset,
          VoidVocabularyDecl.sparqlEndpoint,
          f.createURI(uri + "/" + ConnectOptions.urlEncode(namespace) + "/sparql"));
    }

    // any URI is considered to be an entity.
    g.add(aDataset, VoidVocabularyDecl.uriRegexPattern, f.createLiteral("^.*"));

    if (!describeStatistics) {

      // No statistics.
      return;
    }

    // Frequency count of the predicates in the default graph.
    final IVCount[] predicatePartitionCounts = predicateUsage(tripleStore);

    // Frequency count of the classes in the default graph.
    final IVCount[] classPartitionCounts = classUsage(tripleStore);

    // Describe vocabularies based on the predicate partitions.
    describeVocabularies(predicatePartitionCounts);

    // defaultGraph description.
    {

      // Default graph in the default data set.
      g.add(aDataset, SD.defaultGraph, aDefaultGraph);

      // Describe the default graph using statistics.
      describeGraph(aDefaultGraph, predicatePartitionCounts, classPartitionCounts);
    } // end defaultGraph

    // sb.append("termCount\t = " + tripleStore.getTermCount() + "\n");
    //
    // sb.append("uriCount\t = " + tripleStore.getURICount() + "\n");
    //
    // sb.append("literalCount\t = " + tripleStore.getLiteralCount() +
    // "\n");
    //
    // /*
    // * Note: The blank node count is only available when using the told
    // * bnodes mode.
    // */
    // sb
    // .append("bnodeCount\t = "
    // + (tripleStore.getLexiconRelation()
    // .isStoreBlankNodes() ? ""
    // + tripleStore.getBNodeCount() : "N/A")
    // + "\n");

    /*
     * Report for each named graph.
     */
    if (describeNamedGraphs && tripleStore.isQuads()) {

      final SPORelation r = tripleStore.getSPORelation();

      // the index to use for distinct term scan.
      final SPOKeyOrder keyOrder = SPOKeyOrder.CSPO;

      // visit distinct IVs for context position on that index.
      @SuppressWarnings("rawtypes")
      final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder);

      // resolve IVs to terms efficiently during iteration.
      final BigdataValueIterator itr2 =
          new BigdataValueIteratorImpl(tripleStore /* resolveTerms */, itr);

      try {

        while (itr2.hasNext()) {

          /*
           * Describe this named graph.
           *
           * Note: This is using the predicate and class partition
           * statistics from the default graph (RDF merge) to identify
           * the set of all possible predicates and classes within
           * each named graph. It then tests each predicate and class
           * partition against the named graph and ignores those which
           * are not present in a given named graph. This is being
           * done because we do not have a CPxx index.
           */

          final BigdataResource graph = (BigdataResource) itr2.next();

          final IVCount[] predicatePartitionCounts2 =
              predicateUsage(tripleStore, graph.getIV(), predicatePartitionCounts);

          final IVCount[] classPartitionCounts2 =
              classUsage(tripleStore, graph.getIV(), classPartitionCounts);

          final BNode aNamedGraph = f.createBNode();

          // Named graph in the default data set.
          g.add(aDataset, SD.namedGraph, aNamedGraph);

          // The name of that named graph.
          g.add(aNamedGraph, SD.name, graph);

          // Describe the named graph.
          describeGraph(aNamedGraph, predicatePartitionCounts2, classPartitionCounts2);
        }

      } finally {

        itr2.close();
      }
    }
  }