Esempio n. 1
0
  /**
   * Describe the vocabularies which are in use in the KB based on the predicate partition
   * statistics.
   *
   * @param predicateParitionCounts The predicate partition statistics.
   */
  protected void describeVocabularies(final IVCount[] predicatePartitionCounts) {

    // Find the distinct vocabularies in use.
    final Set<String> namespaces = new LinkedHashSet<String>();
    {

      // property partitions.
      for (IVCount tmp : predicatePartitionCounts) {

        final URI p = (URI) tmp.getValue();

        String namespace = p.getNamespace();

        if (namespace.endsWith("#")) {

          // Strip trailing '#' per VoID specification.
          namespace = namespace.substring(0, namespace.length() - 1);
        }

        namespaces.add(namespace);
      }
    }

    // Sort into dictionary order.
    final String[] a = namespaces.toArray(new String[namespaces.size()]);

    Arrays.sort(a);

    for (String namespace : a) {

      g.add(aDataset, VoidVocabularyDecl.vocabulary, f.createURI(namespace));
    }
  }
Esempio n. 2
0
  /**
   * Describe a named or default graph.
   *
   * @param graph The named graph.
   * @param predicatePartitionCounts The predicate partition statistics for that graph.
   * @param classPartitionCounts The class partition statistics for that graph.
   */
  protected void describeGraph(
      final Resource graph,
      final IVCount[] predicatePartitionCounts,
      final IVCount[] classPartitionCounts) {

    // The graph is a Graph.
    g.add(graph, RDF.TYPE, SD.Graph);

    // #of triples in the default graph
    g.add(graph, VoidVocabularyDecl.triples, f.createLiteral(tripleStore.getStatementCount()));

    // #of entities in the default graph.
    g.add(graph, VoidVocabularyDecl.entities, f.createLiteral(tripleStore.getURICount()));

    // #of distinct predicates in the default graph.
    g.add(graph, VoidVocabularyDecl.properties, f.createLiteral(predicatePartitionCounts.length));

    // #of distinct classes in the default graph.
    g.add(graph, VoidVocabularyDecl.classes, f.createLiteral(classPartitionCounts.length));

    // property partition statistics.
    for (IVCount tmp : predicatePartitionCounts) {

      final BNode propertyPartition = f.createBNode();

      final URI p = (URI) tmp.getValue();

      g.add(graph, VoidVocabularyDecl.propertyPartition, propertyPartition);

      g.add(propertyPartition, VoidVocabularyDecl.property, p);

      g.add(propertyPartition, VoidVocabularyDecl.triples, f.createLiteral(tmp.count));
    }

    // class partition statistics.
    {

      // per class partition statistics.
      for (IVCount tmp : classPartitionCounts) {

        final BNode classPartition = f.createBNode();

        final BigdataValue cls = tmp.getValue();

        g.add(graph, VoidVocabularyDecl.classPartition, classPartition);

        g.add(classPartition, VoidVocabularyDecl.class_, cls);

        g.add(classPartition, VoidVocabularyDecl.triples, f.createLiteral(tmp.count));
      }
    } // end class partition statistics.
  }
Esempio n. 3
0
  /**
   * Return the class partition statistics for the named graph.
   *
   * @param kb The KB instance.
   * @param civ The {@link IV} of a named graph (required).
   * @return The class partition statistics for that named graph. Only class partitions which are
   *     non-empty are returned.
   */
  protected static IVCount[] classUsage(
      final AbstractTripleStore kb, final IV<?, ?> civ, final IVCount[] classPartitionCounts) {

    final SPORelation r = kb.getSPORelation();

    final boolean quads = kb.isQuads();

    if (!quads) {

      // Named graph only valid in quads mode.
      throw new IllegalArgumentException();
    }

    // Resolve IV for rdf:type
    final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE);

    kb.getLexiconRelation()
        .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */);

    if (rdfType.getIV() == null) {

      // No rdf:type assertions since rdf:type is unknown term.
      return new IVCount[0];
    }

    // The non-zero counts.
    final List<IVCount> counts = new LinkedList<IVCount>();

    // Check the known non-empty predicate partitions.
    for (IVCount in : classPartitionCounts) {

      final long n =
          r.getAccessPath(null, rdfType.getIV() /* p */, in.iv /* o */, civ)
              .rangeCount(false /* exact */);

      if (n == 0) continue;

      final IVCount out = new IVCount(in.iv, n);

      out.setValue(in.getValue());

      counts.add(out);
    }

    final IVCount[] a = counts.toArray(new IVCount[counts.size()]);

    // Order by descending count.
    Arrays.sort(a);

    return a;
  }
Esempio n. 4
0
  /**
   * Return the predicate partition statistics for the named graph.
   *
   * @param kb The KB instance.
   * @param civ The {@link IV} of a named graph (required).
   * @return The predicate partition statistics for that named graph. Only predicate partitions
   *     which are non-empty are returned.
   */
  protected static IVCount[] predicateUsage(
      final AbstractTripleStore kb, final IV<?, ?> civ, final IVCount[] predicatePartitionCounts) {

    final SPORelation r = kb.getSPORelation();

    final boolean quads = kb.isQuads();

    if (!quads) {

      // Named graph only valid in quads mode.
      throw new IllegalArgumentException();
    }

    // The non-zero counts.
    final List<IVCount> counts = new LinkedList<IVCount>();

    // Check the known non-empty predicate partitions.
    for (IVCount in : predicatePartitionCounts) {

      final long n = r.getAccessPath(null, in.iv, null, civ).rangeCount(false /* exact */);

      if (n == 0) continue;

      final IVCount out = new IVCount(in.iv, n);

      out.setValue(in.getValue());

      counts.add(out);
    }

    final IVCount[] a = counts.toArray(new IVCount[counts.size()]);

    // Order by descending count.
    Arrays.sort(a);

    return a;
  }
Esempio n. 5
0
  /**
   * Return an efficient statistical summary for the class partitions. The SPARQL query for this is
   *
   * <pre>
   * SELECT  ?class (COUNT(?s) AS ?count ) { ?s a ?class } GROUP BY ?class ORDER BY ?count
   * </pre>
   *
   * However, it is much efficient to scan POS for
   *
   * <pre>
   * rdf:type ?o ?s
   * </pre>
   *
   * and report the range count of
   *
   * <pre>
   * rdf:type ?o ?s
   * </pre>
   *
   * for each distinct value of <code>?o</code>.
   *
   * @param kb The KB instance.
   * @return The class usage statistics.
   */
  protected static IVCount[] classUsage(final AbstractTripleStore kb) {

    final SPORelation r = kb.getSPORelation();

    if (r.oneAccessPath) {

      // The necessary index (POS or POCS) does not exist.
      throw new UnsupportedOperationException();
    }

    final boolean quads = kb.isQuads();

    final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS;

    // Resolve IV for rdf:type
    final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE);

    kb.getLexiconRelation()
        .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */);

    if (rdfType.getIV() == null) {

      // No rdf:type assertions since rdf:type is unknown term.
      return new IVCount[0];
    }

    // visit distinct term identifiers for the rdf:type predicate.
    @SuppressWarnings("rawtypes")
    final IChunkedIterator<IV> itr =
        r.distinctMultiTermScan(keyOrder, new IV[] {rdfType.getIV()} /* knownTerms */);

    // resolve term identifiers to terms efficiently during iteration.
    final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr);

    try {

      final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>();

      final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>();

      while (itr2.hasNext()) {

        final BigdataValue term = itr2.next();

        final IV<?, ?> iv = term.getIV();

        final long n =
            r.getAccessPath(null, rdfType.getIV() /* p */, iv /* o */, null)
                .rangeCount(false /* exact */);

        ivs.add(iv);

        counts.put(iv, new IVCount(iv, n));
      }

      // Batch resolve IVs to Values
      final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs);

      for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) {

        final IVCount count = counts.get(e.getKey());

        count.setValue(e.getValue());
      }

      final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]);

      // Order by descending count.
      Arrays.sort(a);

      return a;

    } finally {

      itr2.close();
    }
  }
Esempio n. 6
0
  /**
   * Return an array of the distinct predicates in the KB ordered by their descending frequency of
   * use. The {@link IV}s in the returned array will have been resolved to the corresponding {@link
   * BigdataURI}s which can be accessed using {@link IV#getValue()}.
   *
   * @param kb The KB instance.
   */
  protected static IVCount[] predicateUsage(final AbstractTripleStore kb) {

    final SPORelation r = kb.getSPORelation();

    if (r.oneAccessPath) {

      // The necessary index (POS or POCS) does not exist.
      throw new UnsupportedOperationException();
    }

    final boolean quads = kb.isQuads();

    // the index to use for distinct predicate scan.
    final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS;

    // visit distinct term identifiers for predicate position on that index.
    @SuppressWarnings("rawtypes")
    final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder);

    // resolve term identifiers to terms efficiently during iteration.
    final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr);

    try {

      final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>();

      final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>();

      while (itr2.hasNext()) {

        final BigdataValue term = itr2.next();

        final IV<?, ?> iv = term.getIV();

        final long n = r.getAccessPath(null, iv, null, null).rangeCount(false /* exact */);

        ivs.add(iv);

        counts.put(iv, new IVCount(iv, n));
      }

      // Batch resolve IVs to Values
      final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs);

      for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) {

        final IVCount count = counts.get(e.getKey());

        count.setValue(e.getValue());
      }

      final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]);

      // Order by descending count.
      Arrays.sort(a);

      return a;

    } finally {

      itr2.close();
    }
  }