@SuppressWarnings("rawtypes") public final IV get(final Value value) { if (val2iv == null) throw new IllegalStateException(); if (value == null) throw new IllegalArgumentException(); final BigdataValue tmp = val2iv.get(value); if (tmp == null) return null; return tmp.getIV(); }
/** * Make a stable assignment of {@link IV}s to declared {@link Value}s. * * <p>Note: The {@link Value}s are converted to {@link BigdataValue}s by {@link #add(Value)} so * that we can invoke {@link AbstractTripleStore#addTerms(BigdataValue[])} directly and get back * the assigned {@link IV}s. We rely on the <code>namespace</code> of the {@link * AbstractTripleStore} to deserialize {@link BigdataValue}s using the appropriate {@link * BigdataValueFactory}. */ private void generateIVs() { /* * Assign IVs to each vocabulary item. */ final int n = size(); if (n > MAX_ITEMS) throw new UnsupportedOperationException( "Too many vocabulary items: n=" + n + ", but maximum is " + MAX_ITEMS); // The #of generated IVs. int i = 0; // The Values in the order in which they were declared. for (Map.Entry<Value, BigdataValue> e : val2iv.entrySet()) { final BigdataValue value = e.getValue(); @SuppressWarnings("rawtypes") final IV iv; if (i <= 255) { // Use a byte for the 1st 256 declared vocabulary items. iv = new VocabURIByteIV<BigdataURI>((byte) i); } else { // Use a short for the next 64k declared vocabulary items. iv = new VocabURIShortIV<BigdataURI>((short) i); } // Cache the IV on the Value. value.setIV(iv); // Note: Do not cache the Value on the IV. // iv.setValue(value); iv2val.put(iv, value); i++; } assert iv2val.size() == val2iv.size(); }
@SuppressWarnings("unchecked") private void addResolveIVs(final BigdataValue... values) { tripleStore.getLexiconRelation().addTerms(values, values.length, false /* readOnly */); /* * Cache value on IVs to align with behavior of the SPARQL parser. * * Note: BatchRDFValueResolver does this, so we have to do it to in * order to have an exact structural match when we parse the generated * SPARQL query and then verify the AST model. */ for (BigdataValue v : values) { v.getIV().setValue(v); } }
/** * Return an efficient statistical summary for the class partitions. The SPARQL query for this is * * <pre> * SELECT ?class (COUNT(?s) AS ?count ) { ?s a ?class } GROUP BY ?class ORDER BY ?count * </pre> * * However, it is much efficient to scan POS for * * <pre> * rdf:type ?o ?s * </pre> * * and report the range count of * * <pre> * rdf:type ?o ?s * </pre> * * for each distinct value of <code>?o</code>. * * @param kb The KB instance. * @return The class usage statistics. */ protected static IVCount[] classUsage(final AbstractTripleStore kb) { final SPORelation r = kb.getSPORelation(); if (r.oneAccessPath) { // The necessary index (POS or POCS) does not exist. throw new UnsupportedOperationException(); } final boolean quads = kb.isQuads(); final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS; // Resolve IV for rdf:type final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE); kb.getLexiconRelation() .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */); if (rdfType.getIV() == null) { // No rdf:type assertions since rdf:type is unknown term. return new IVCount[0]; } // visit distinct term identifiers for the rdf:type predicate. @SuppressWarnings("rawtypes") final IChunkedIterator<IV> itr = r.distinctMultiTermScan(keyOrder, new IV[] {rdfType.getIV()} /* knownTerms */); // resolve term identifiers to terms efficiently during iteration. final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr); try { final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>(); final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>(); while (itr2.hasNext()) { final BigdataValue term = itr2.next(); final IV<?, ?> iv = term.getIV(); final long n = r.getAccessPath(null, rdfType.getIV() /* p */, iv /* o */, null) .rangeCount(false /* exact */); ivs.add(iv); counts.put(iv, new IVCount(iv, n)); } // Batch resolve IVs to Values final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs); for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) { final IVCount count = counts.get(e.getKey()); count.setValue(e.getValue()); } final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]); // Order by descending count. Arrays.sort(a); return a; } finally { itr2.close(); } }
/** * Return an array of the distinct predicates in the KB ordered by their descending frequency of * use. The {@link IV}s in the returned array will have been resolved to the corresponding {@link * BigdataURI}s which can be accessed using {@link IV#getValue()}. * * @param kb The KB instance. */ protected static IVCount[] predicateUsage(final AbstractTripleStore kb) { final SPORelation r = kb.getSPORelation(); if (r.oneAccessPath) { // The necessary index (POS or POCS) does not exist. throw new UnsupportedOperationException(); } final boolean quads = kb.isQuads(); // the index to use for distinct predicate scan. final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS; // visit distinct term identifiers for predicate position on that index. @SuppressWarnings("rawtypes") final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder); // resolve term identifiers to terms efficiently during iteration. final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr); try { final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>(); final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>(); while (itr2.hasNext()) { final BigdataValue term = itr2.next(); final IV<?, ?> iv = term.getIV(); final long n = r.getAccessPath(null, iv, null, null).rangeCount(false /* exact */); ivs.add(iv); counts.put(iv, new IVCount(iv, n)); } // Batch resolve IVs to Values final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs); for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) { final IVCount count = counts.get(e.getKey()); count.setValue(e.getValue()); } final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]); // Order by descending count. Arrays.sort(a); return a; } finally { itr2.close(); } }