Java SPORelation.distinctTermScan Examples

Programming Language: Java

Namespace/Package Name: com.bigdata.rdf.spo

Class/Type: SPORelation

Method/Function: distinctTermScan

Examples at hotexamples.com: 2

Java SPORelation.distinctTermScan - 2 examples found. These are the top rated real world Java examples of com.bigdata.rdf.spo.SPORelation.distinctTermScan extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getAccessPath(4)

distinctTermScan(2)

distinctMultiTermScan(1)

Example #1

Show file

File: VoID.java Project: kietly/database

  /**
   * Return an array of the distinct predicates in the KB ordered by their descending frequency of
   * use. The {@link IV}s in the returned array will have been resolved to the corresponding {@link
   * BigdataURI}s which can be accessed using {@link IV#getValue()}.
   *
   * @param kb The KB instance.
   */
  protected static IVCount[] predicateUsage(final AbstractTripleStore kb) {

    final SPORelation r = kb.getSPORelation();

    if (r.oneAccessPath) {

      // The necessary index (POS or POCS) does not exist.
      throw new UnsupportedOperationException();
    }

    final boolean quads = kb.isQuads();

    // the index to use for distinct predicate scan.
    final SPOKeyOrder keyOrder = quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS;

    // visit distinct term identifiers for predicate position on that index.
    @SuppressWarnings("rawtypes")
    final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder);

    // resolve term identifiers to terms efficiently during iteration.
    final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(kb /* resolveTerms */, itr);

    try {

      final Set<IV<?, ?>> ivs = new LinkedHashSet<IV<?, ?>>();

      final Map<IV<?, ?>, IVCount> counts = new LinkedHashMap<IV<?, ?>, IVCount>();

      while (itr2.hasNext()) {

        final BigdataValue term = itr2.next();

        final IV<?, ?> iv = term.getIV();

        final long n = r.getAccessPath(null, iv, null, null).rangeCount(false /* exact */);

        ivs.add(iv);

        counts.put(iv, new IVCount(iv, n));
      }

      // Batch resolve IVs to Values
      final Map<IV<?, ?>, BigdataValue> x = kb.getLexiconRelation().getTerms(ivs);

      for (Map.Entry<IV<?, ?>, BigdataValue> e : x.entrySet()) {

        final IVCount count = counts.get(e.getKey());

        count.setValue(e.getValue());
      }

      final IVCount[] a = counts.values().toArray(new IVCount[counts.size()]);

      // Order by descending count.
      Arrays.sort(a);

      return a;

    } finally {

      itr2.close();
    }
  }

Example #2

Show file

File: VoID.java Project: kietly/database

  /**
   * Describe the default data set (the one identified by the namespace associated with the {@link
   * AbstractTripleStore}.
   *
   * @param describeStatistics When <code>true</code>, the VoID description will include the {@link
   *     VoidVocabularyDecl#vocabulary} declarations, the property partition statistics, and the
   *     class partition statistics.
   * @param describeNamedGraphs When <code>true</code>, each named graph will also be described in
   *     in the same level of detail as the default graph. Otherwise only the default graph will be
   *     described.
   */
  public void describeDataSet(final boolean describeStatistics, final boolean describeNamedGraphs) {

    final String namespace = tripleStore.getNamespace();

    // This is a VoID data set.
    g.add(aDataset, RDF.TYPE, VoidVocabularyDecl.Dataset);

    // The namespace is used as a title for the data set.
    g.add(aDataset, DCTermsVocabularyDecl.title, f.createLiteral(namespace));

    // Also present the namespace in an unambiguous manner.
    g.add(aDataset, SD.KB_NAMESPACE, f.createLiteral(namespace));

    /**
     * Service end point for this namespace.
     *
     * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/689" > Missing URL encoding in
     *     RemoteRepositoryManager </a>
     */
    for (String uri : serviceURI) {
      g.add(
          aDataset,
          VoidVocabularyDecl.sparqlEndpoint,
          f.createURI(uri + "/" + ConnectOptions.urlEncode(namespace) + "/sparql"));
    }

    // any URI is considered to be an entity.
    g.add(aDataset, VoidVocabularyDecl.uriRegexPattern, f.createLiteral("^.*"));

    if (!describeStatistics) {

      // No statistics.
      return;
    }

    // Frequency count of the predicates in the default graph.
    final IVCount[] predicatePartitionCounts = predicateUsage(tripleStore);

    // Frequency count of the classes in the default graph.
    final IVCount[] classPartitionCounts = classUsage(tripleStore);

    // Describe vocabularies based on the predicate partitions.
    describeVocabularies(predicatePartitionCounts);

    // defaultGraph description.
    {

      // Default graph in the default data set.
      g.add(aDataset, SD.defaultGraph, aDefaultGraph);

      // Describe the default graph using statistics.
      describeGraph(aDefaultGraph, predicatePartitionCounts, classPartitionCounts);
    } // end defaultGraph

    // sb.append("termCount\t = " + tripleStore.getTermCount() + "\n");
    //
    // sb.append("uriCount\t = " + tripleStore.getURICount() + "\n");
    //
    // sb.append("literalCount\t = " + tripleStore.getLiteralCount() +
    // "\n");
    //
    // /*
    // * Note: The blank node count is only available when using the told
    // * bnodes mode.
    // */
    // sb
    // .append("bnodeCount\t = "
    // + (tripleStore.getLexiconRelation()
    // .isStoreBlankNodes() ? ""
    // + tripleStore.getBNodeCount() : "N/A")
    // + "\n");

    /*
     * Report for each named graph.
     */
    if (describeNamedGraphs && tripleStore.isQuads()) {

      final SPORelation r = tripleStore.getSPORelation();

      // the index to use for distinct term scan.
      final SPOKeyOrder keyOrder = SPOKeyOrder.CSPO;

      // visit distinct IVs for context position on that index.
      @SuppressWarnings("rawtypes")
      final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder);

      // resolve IVs to terms efficiently during iteration.
      final BigdataValueIterator itr2 =
          new BigdataValueIteratorImpl(tripleStore /* resolveTerms */, itr);

      try {

        while (itr2.hasNext()) {

          /*
           * Describe this named graph.
           *
           * Note: This is using the predicate and class partition
           * statistics from the default graph (RDF merge) to identify
           * the set of all possible predicates and classes within
           * each named graph. It then tests each predicate and class
           * partition against the named graph and ignores those which
           * are not present in a given named graph. This is being
           * done because we do not have a CPxx index.
           */

          final BigdataResource graph = (BigdataResource) itr2.next();

          final IVCount[] predicatePartitionCounts2 =
              predicateUsage(tripleStore, graph.getIV(), predicatePartitionCounts);

          final IVCount[] classPartitionCounts2 =
              classUsage(tripleStore, graph.getIV(), classPartitionCounts);

          final BNode aNamedGraph = f.createBNode();

          // Named graph in the default data set.
          g.add(aDataset, SD.namedGraph, aNamedGraph);

          // The name of that named graph.
          g.add(aNamedGraph, SD.name, graph);

          // Describe the named graph.
          describeGraph(aNamedGraph, predicatePartitionCounts2, classPartitionCounts2);
        }

      } finally {

        itr2.close();
      }
    }
  }