Пример #1
0
  @SuppressWarnings("rawtypes")
  public final IV get(final Value value) {

    if (val2iv == null) throw new IllegalStateException();

    if (value == null) throw new IllegalArgumentException();

    final BigdataValue tmp = val2iv.get(value);

    if (tmp == null) return null;

    return tmp.getIV();
  }
Пример #2
0
  /**
   * Make a stable assignment of {@link IV}s to declared {@link Value}s.
   *
   * <p>Note: The {@link Value}s are converted to {@link BigdataValue}s by {@link #add(Value)} so
   * that we can invoke {@link AbstractTripleStore#addTerms(BigdataValue[])} directly and get back
   * the assigned {@link IV}s. We rely on the <code>namespace</code> of the {@link
   * AbstractTripleStore} to deserialize {@link BigdataValue}s using the appropriate {@link
   * BigdataValueFactory}.
   */
  private void generateIVs() {

    /*
     * Assign IVs to each vocabulary item.
     */
    final int n = size();

    if (n > MAX_ITEMS)
      throw new UnsupportedOperationException(
          "Too many vocabulary items: n=" + n + ", but maximum is " + MAX_ITEMS);

    // The #of generated IVs.
    int i = 0;

    // The Values in the order in which they were declared.
    for (Map.Entry<Value, BigdataValue> e : val2iv.entrySet()) {

      final BigdataValue value = e.getValue();

      @SuppressWarnings("rawtypes")
      final IV iv;

      if (i <= 255) {

        // Use a byte for the 1st 256 declared vocabulary items.
        iv = new VocabURIByteIV<BigdataURI>((byte) i);

      } else {

        // Use a short for the next 64k declared vocabulary items.
        iv = new VocabURIShortIV<BigdataURI>((short) i);
      }

      // Cache the IV on the Value.
      value.setIV(iv);

      // Note: Do not cache the Value on the IV.
      // iv.setValue(value);

      iv2val.put(iv, value);

      i++;
    }

    assert iv2val.size() == val2iv.size();
  }
  @SuppressWarnings("unchecked")
  private void addResolveIVs(final BigdataValue... values) {

    tripleStore.getLexiconRelation().addTerms(values, values.length, false /* readOnly */);

    /*
     * Cache value on IVs to align with behavior of the SPARQL parser.
     *
     * Note: BatchRDFValueResolver does this, so we have to do it to in
     * order to have an exact structural match when we parse the generated
     * SPARQL query and then verify the AST model.
     */
    for (BigdataValue v : values) {

      v.getIV().setValue(v);
    }
  }
Пример #4
0
  /**
   * Return an efficient statistical summary for the class partitions. The SPARQL query for this is
   *
   * <pre>
   * SELECT  ?class (COUNT(?s) AS ?count ) { ?s a ?class } GROUP BY ?class ORDER BY ?count
   * </pre>
   *
   * However, it is much efficient to scan POS for
   *
   * <pre>
   * rdf:type ?o ?s
   * </pre>
   *
   * and report the range count of
   *
   * <pre>
   * rdf:type ?o ?s
   * </pre>
   *
   * for each distinct value of <code>?o</code>.
   *
   * @param kb The KB instance.
   * @return The class usage statistics.
   */
  protected static IVCount[] classUsage(final AbstractTripleStore kb) {

    final SPORelation r = kb.getSPORelation();

    if (r.oneAccessPath) {

      // The necessary index (POS or POCS) does not exist.
      throw new UnsupportedOperationException();
    }

    final boolean quads = kb.isQuads();

    final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS;

    // Resolve IV for rdf:type
    final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE);

    kb.getLexiconRelation()
        .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */);

    if (rdfType.getIV() == null) {

      // No rdf:type assertions since rdf:type is unknown term.
      return new IVCount[0];
    }

    // visit distinct term identifiers for the rdf:type predicate.
    @SuppressWarnings("rawtypes")
    final IChunkedIterator<IV> itr =
        r.distinctMultiTermScan(keyOrder, new IV[] {rdfType.getIV()} /* knownTerms */);

    // resolve term identifiers to terms efficiently during iteration.
    final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr);

    try {

      final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>();

      final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>();

      while (itr2.hasNext()) {

        final BigdataValue term = itr2.next();

        final IV<?, ?> iv = term.getIV();

        final long n =
            r.getAccessPath(null, rdfType.getIV() /* p */, iv /* o */, null)
                .rangeCount(false /* exact */);

        ivs.add(iv);

        counts.put(iv, new IVCount(iv, n));
      }

      // Batch resolve IVs to Values
      final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs);

      for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) {

        final IVCount count = counts.get(e.getKey());

        count.setValue(e.getValue());
      }

      final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]);

      // Order by descending count.
      Arrays.sort(a);

      return a;

    } finally {

      itr2.close();
    }
  }
Пример #5
0
  /**
   * Return an array of the distinct predicates in the KB ordered by their descending frequency of
   * use. The {@link IV}s in the returned array will have been resolved to the corresponding {@link
   * BigdataURI}s which can be accessed using {@link IV#getValue()}.
   *
   * @param kb The KB instance.
   */
  protected static IVCount[] predicateUsage(final AbstractTripleStore kb) {

    final SPORelation r = kb.getSPORelation();

    if (r.oneAccessPath) {

      // The necessary index (POS or POCS) does not exist.
      throw new UnsupportedOperationException();
    }

    final boolean quads = kb.isQuads();

    // the index to use for distinct predicate scan.
    final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS;

    // visit distinct term identifiers for predicate position on that index.
    @SuppressWarnings("rawtypes")
    final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder);

    // resolve term identifiers to terms efficiently during iteration.
    final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr);

    try {

      final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>();

      final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>();

      while (itr2.hasNext()) {

        final BigdataValue term = itr2.next();

        final IV<?, ?> iv = term.getIV();

        final long n = r.getAccessPath(null, iv, null, null).rangeCount(false /* exact */);

        ivs.add(iv);

        counts.put(iv, new IVCount(iv, n));
      }

      // Batch resolve IVs to Values
      final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs);

      for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) {

        final IVCount count = counts.get(e.getKey());

        count.setValue(e.getValue());
      }

      final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]);

      // Order by descending count.
      Arrays.sort(a);

      return a;

    } finally {

      itr2.close();
    }
  }