@SuppressWarnings("rawtypes") public final IV get(final Value value) { if (val2iv == null) throw new IllegalStateException(); if (value == null) throw new IllegalArgumentException(); final BigdataValue tmp = val2iv.get(value); if (tmp == null) return null; return tmp.getIV(); }
@SuppressWarnings("unchecked") private void addResolveIVs(final BigdataValue... values) { tripleStore.getLexiconRelation().addTerms(values, values.length, false /* readOnly */); /* * Cache value on IVs to align with behavior of the SPARQL parser. * * Note: BatchRDFValueResolver does this, so we have to do it to in * order to have an exact structural match when we parse the generated * SPARQL query and then verify the AST model. */ for (BigdataValue v : values) { v.getIV().setValue(v); } }
/** * Return an efficient statistical summary for the class partitions. The SPARQL query for this is * * <pre> * SELECT ?class (COUNT(?s) AS ?count ) { ?s a ?class } GROUP BY ?class ORDER BY ?count * </pre> * * However, it is much efficient to scan POS for * * <pre> * rdf:type ?o ?s * </pre> * * and report the range count of * * <pre> * rdf:type ?o ?s * </pre> * * for each distinct value of <code>?o</code>. * * @param kb The KB instance. * @return The class usage statistics. */ protected static IVCount[] classUsage(final AbstractTripleStore kb) { final SPORelation r = kb.getSPORelation(); if (r.oneAccessPath) { // The necessary index (POS or POCS) does not exist. throw new UnsupportedOperationException(); } final boolean quads = kb.isQuads(); final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS; // Resolve IV for rdf:type final BigdataURI rdfType = kb.getValueFactory().asValue(RDF.TYPE); kb.getLexiconRelation() .addTerms(new BigdataValue[] {rdfType}, 1 /* numTerms */, true /* readOnly */); if (rdfType.getIV() == null) { // No rdf:type assertions since rdf:type is unknown term. return new IVCount[0]; } // visit distinct term identifiers for the rdf:type predicate. @SuppressWarnings("rawtypes") final IChunkedIterator<IV> itr = r.distinctMultiTermScan(keyOrder, new IV[] {rdfType.getIV()} /* knownTerms */); // resolve term identifiers to terms efficiently during iteration. final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr); try { final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>(); final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>(); while (itr2.hasNext()) { final BigdataValue term = itr2.next(); final IV<?, ?> iv = term.getIV(); final long n = r.getAccessPath(null, rdfType.getIV() /* p */, iv /* o */, null) .rangeCount(false /* exact */); ivs.add(iv); counts.put(iv, new IVCount(iv, n)); } // Batch resolve IVs to Values final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs); for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) { final IVCount count = counts.get(e.getKey()); count.setValue(e.getValue()); } final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]); // Order by descending count. Arrays.sort(a); return a; } finally { itr2.close(); } }
/** * Return an array of the distinct predicates in the KB ordered by their descending frequency of * use. The {@link IV}s in the returned array will have been resolved to the corresponding {@link * BigdataURI}s which can be accessed using {@link IV#getValue()}. * * @param kb The KB instance. */ protected static IVCount[] predicateUsage(final AbstractTripleStore kb) { final SPORelation r = kb.getSPORelation(); if (r.oneAccessPath) { // The necessary index (POS or POCS) does not exist. throw new UnsupportedOperationException(); } final boolean quads = kb.isQuads(); // the index to use for distinct predicate scan. final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS; // visit distinct term identifiers for predicate position on that index. @SuppressWarnings("rawtypes") final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder); // resolve term identifiers to terms efficiently during iteration. final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr); try { final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>(); final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>(); while (itr2.hasNext()) { final BigdataValue term = itr2.next(); final IV<?, ?> iv = term.getIV(); final long n = r.getAccessPath(null, iv, null, null).rangeCount(false /* exact */); ivs.add(iv); counts.put(iv, new IVCount(iv, n)); } // Batch resolve IVs to Values final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs); for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) { final IVCount count = counts.get(e.getKey()); count.setValue(e.getValue()); } final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]); // Order by descending count. Arrays.sort(a); return a; } finally { itr2.close(); } }