Esempio n. 1
0
  /**
   * Return the class partition statistics for the named graph.
   *
   * @param kb The KB instance.
   * @param civ The {@link IV} of a named graph (required).
   * @return The class partition statistics for that named graph. Only class partitions which are
   *     non-empty are returned.
   */
  protected static IVCount[] classUsage(
      final AbstractTripleStore kb, final IV<?, ?> civ, final IVCount[] classPartitionCounts) {

    final SPORelation r = kb.getSPORelation();

    final boolean quads = kb.isQuads();

    if (!quads) {

      // Named graph only valid in quads mode.
      throw new IllegalArgumentException();
    }

    // Resolve IV for rdf:type
    final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE);

    kb.getLexiconRelation()
        .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */);

    if (rdfType.getIV() == null) {

      // No rdf:type assertions since rdf:type is unknown term.
      return new IVCount[0];
    }

    // The non-zero counts.
    final List<IVCount> counts = new LinkedList<IVCount>();

    // Check the known non-empty predicate partitions.
    for (IVCount in : classPartitionCounts) {

      final long n =
          r.getAccessPath(null, rdfType.getIV() /* p */, in.iv /* o */, civ)
              .rangeCount(false /* exact */);

      if (n == 0) continue;

      final IVCount out = new IVCount(in.iv, n);

      out.setValue(in.getValue());

      counts.add(out);
    }

    final IVCount[] a = counts.toArray(new IVCount[counts.size()]);

    // Order by descending count.
    Arrays.sort(a);

    return a;
  }
Esempio n. 2
0
  /**
   * Return the predicate partition statistics for the named graph.
   *
   * @param kb The KB instance.
   * @param civ The {@link IV} of a named graph (required).
   * @return The predicate partition statistics for that named graph. Only predicate partitions
   *     which are non-empty are returned.
   */
  protected static IVCount[] predicateUsage(
      final AbstractTripleStore kb, final IV<?, ?> civ, final IVCount[] predicatePartitionCounts) {

    final SPORelation r = kb.getSPORelation();

    final boolean quads = kb.isQuads();

    if (!quads) {

      // Named graph only valid in quads mode.
      throw new IllegalArgumentException();
    }

    // The non-zero counts.
    final List<IVCount> counts = new LinkedList<IVCount>();

    // Check the known non-empty predicate partitions.
    for (IVCount in : predicatePartitionCounts) {

      final long n = r.getAccessPath(null, in.iv, null, civ).rangeCount(false /* exact */);

      if (n == 0) continue;

      final IVCount out = new IVCount(in.iv, n);

      out.setValue(in.getValue());

      counts.add(out);
    }

    final IVCount[] a = counts.toArray(new IVCount[counts.size()]);

    // Order by descending count.
    Arrays.sort(a);

    return a;
  }
Esempio n. 3
0
  /**
   * Return an efficient statistical summary for the class partitions. The SPARQL query for this is
   *
   * <pre>
   * SELECT  ?class (COUNT(?s) AS ?count ) { ?s a ?class } GROUP BY ?class ORDER BY ?count
   * </pre>
   *
   * However, it is much efficient to scan POS for
   *
   * <pre>
   * rdf:type ?o ?s
   * </pre>
   *
   * and report the range count of
   *
   * <pre>
   * rdf:type ?o ?s
   * </pre>
   *
   * for each distinct value of <code>?o</code>.
   *
   * @param kb The KB instance.
   * @return The class usage statistics.
   */
  protected static IVCount[] classUsage(final AbstractTripleStore kb) {

    final SPORelation r = kb.getSPORelation();

    if (r.oneAccessPath) {

      // The necessary index (POS or POCS) does not exist.
      throw new UnsupportedOperationException();
    }

    final boolean quads = kb.isQuads();

    final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS;

    // Resolve IV for rdf:type
    final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE);

    kb.getLexiconRelation()
        .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */);

    if (rdfType.getIV() == null) {

      // No rdf:type assertions since rdf:type is unknown term.
      return new IVCount[0];
    }

    // visit distinct term identifiers for the rdf:type predicate.
    @SuppressWarnings("rawtypes")
    final IChunkedIterator<IV> itr =
        r.distinctMultiTermScan(keyOrder, new IV[] {rdfType.getIV()} /* knownTerms */);

    // resolve term identifiers to terms efficiently during iteration.
    final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr);

    try {

      final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>();

      final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>();

      while (itr2.hasNext()) {

        final BigdataValue term = itr2.next();

        final IV<?, ?> iv = term.getIV();

        final long n =
            r.getAccessPath(null, rdfType.getIV() /* p */, iv /* o */, null)
                .rangeCount(false /* exact */);

        ivs.add(iv);

        counts.put(iv, new IVCount(iv, n));
      }

      // Batch resolve IVs to Values
      final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs);

      for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) {

        final IVCount count = counts.get(e.getKey());

        count.setValue(e.getValue());
      }

      final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]);

      // Order by descending count.
      Arrays.sort(a);

      return a;

    } finally {

      itr2.close();
    }
  }
Esempio n. 4
0
  /**
   * Return an array of the distinct predicates in the KB ordered by their descending frequency of
   * use. The {@link IV}s in the returned array will have been resolved to the corresponding {@link
   * BigdataURI}s which can be accessed using {@link IV#getValue()}.
   *
   * @param kb The KB instance.
   */
  protected static IVCount[] predicateUsage(final AbstractTripleStore kb) {

    final SPORelation r = kb.getSPORelation();

    if (r.oneAccessPath) {

      // The necessary index (POS or POCS) does not exist.
      throw new UnsupportedOperationException();
    }

    final boolean quads = kb.isQuads();

    // the index to use for distinct predicate scan.
    final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS;

    // visit distinct term identifiers for predicate position on that index.
    @SuppressWarnings("rawtypes")
    final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder);

    // resolve term identifiers to terms efficiently during iteration.
    final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr);

    try {

      final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>();

      final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>();

      while (itr2.hasNext()) {

        final BigdataValue term = itr2.next();

        final IV<?, ?> iv = term.getIV();

        final long n = r.getAccessPath(null, iv, null, null).rangeCount(false /* exact */);

        ivs.add(iv);

        counts.put(iv, new IVCount(iv, n));
      }

      // Batch resolve IVs to Values
      final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs);

      for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) {

        final IVCount count = counts.get(e.getKey());

        count.setValue(e.getValue());
      }

      final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]);

      // Order by descending count.
      Arrays.sort(a);

      return a;

    } finally {

      itr2.close();
    }
  }