Exemple #1
0
  /**
   * Describe the vocabularies which are in use in the KB based on the predicate partition
   * statistics.
   *
   * @param predicateParitionCounts The predicate partition statistics.
   */
  protected void describeVocabularies(final IVCount[] predicatePartitionCounts) {

    // Find the distinct vocabularies in use.
    final Set<String> namespaces = new LinkedHashSet<String>();
    {

      // property partitions.
      for (IVCount tmp : predicatePartitionCounts) {

        final URI p = (URI) tmp.getValue();

        String namespace = p.getNamespace();

        if (namespace.endsWith("#")) {

          // Strip trailing '#' per VoID specification.
          namespace = namespace.substring(0, namespace.length() - 1);
        }

        namespaces.add(namespace);
      }
    }

    // Sort into dictionary order.
    final String[] a = namespaces.toArray(new String[namespaces.size()]);

    Arrays.sort(a);

    for (String namespace : a) {

      g.add(aDataset, VoidVocabularyDecl.vocabulary, f.createURI(namespace));
    }
  }
Exemple #2
0
  /**
   * Describe a named or default graph.
   *
   * @param graph The named graph.
   * @param predicatePartitionCounts The predicate partition statistics for that graph.
   * @param classPartitionCounts The class partition statistics for that graph.
   */
  protected void describeGraph(
      final Resource graph,
      final IVCount[] predicatePartitionCounts,
      final IVCount[] classPartitionCounts) {

    // The graph is a Graph.
    g.add(graph, RDF.TYPE, SD.Graph);

    // #of triples in the default graph
    g.add(graph, VoidVocabularyDecl.triples, f.createLiteral(tripleStore.getStatementCount()));

    // #of entities in the default graph.
    g.add(graph, VoidVocabularyDecl.entities, f.createLiteral(tripleStore.getURICount()));

    // #of distinct predicates in the default graph.
    g.add(graph, VoidVocabularyDecl.properties, f.createLiteral(predicatePartitionCounts.length));

    // #of distinct classes in the default graph.
    g.add(graph, VoidVocabularyDecl.classes, f.createLiteral(classPartitionCounts.length));

    // property partition statistics.
    for (IVCount tmp : predicatePartitionCounts) {

      final BNode propertyPartition = f.createBNode();

      final URI p = (URI) tmp.getValue();

      g.add(graph, VoidVocabularyDecl.propertyPartition, propertyPartition);

      g.add(propertyPartition, VoidVocabularyDecl.property, p);

      g.add(propertyPartition, VoidVocabularyDecl.triples, f.createLiteral(tmp.count));
    }

    // class partition statistics.
    {

      // per class partition statistics.
      for (IVCount tmp : classPartitionCounts) {

        final BNode classPartition = f.createBNode();

        final BigdataValue cls = tmp.getValue();

        g.add(graph, VoidVocabularyDecl.classPartition, classPartition);

        g.add(classPartition, VoidVocabularyDecl.class_, cls);

        g.add(classPartition, VoidVocabularyDecl.triples, f.createLiteral(tmp.count));
      }
    } // end class partition statistics.
  }
Exemple #3
0
  /**
   * Describe the default data set (the one identified by the namespace associated with the {@link
   * AbstractTripleStore}.
   *
   * @param describeStatistics When <code>true</code>, the VoID description will include the {@link
   *     VoidVocabularyDecl#vocabulary} declarations, the property partition statistics, and the
   *     class partition statistics.
   * @param describeNamedGraphs When <code>true</code>, each named graph will also be described in
   *     in the same level of detail as the default graph. Otherwise only the default graph will be
   *     described.
   */
  public void describeDataSet(final boolean describeStatistics, final boolean describeNamedGraphs) {

    final String namespace = tripleStore.getNamespace();

    // This is a VoID data set.
    g.add(aDataset, RDF.TYPE, VoidVocabularyDecl.Dataset);

    // The namespace is used as a title for the data set.
    g.add(aDataset, DCTermsVocabularyDecl.title, f.createLiteral(namespace));

    // Also present the namespace in an unambiguous manner.
    g.add(aDataset, SD.KB_NAMESPACE, f.createLiteral(namespace));

    /**
     * Service end point for this namespace.
     *
     * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/689" > Missing URL encoding in
     *     RemoteRepositoryManager </a>
     */
    for (String uri : serviceURI) {
      g.add(
          aDataset,
          VoidVocabularyDecl.sparqlEndpoint,
          f.createURI(uri + "/" + ConnectOptions.urlEncode(namespace) + "/sparql"));
    }

    // any URI is considered to be an entity.
    g.add(aDataset, VoidVocabularyDecl.uriRegexPattern, f.createLiteral("^.*"));

    if (!describeStatistics) {

      // No statistics.
      return;
    }

    // Frequency count of the predicates in the default graph.
    final IVCount[] predicatePartitionCounts = predicateUsage(tripleStore);

    // Frequency count of the classes in the default graph.
    final IVCount[] classPartitionCounts = classUsage(tripleStore);

    // Describe vocabularies based on the predicate partitions.
    describeVocabularies(predicatePartitionCounts);

    // defaultGraph description.
    {

      // Default graph in the default data set.
      g.add(aDataset, SD.defaultGraph, aDefaultGraph);

      // Describe the default graph using statistics.
      describeGraph(aDefaultGraph, predicatePartitionCounts, classPartitionCounts);
    } // end defaultGraph

    // sb.append("termCount\t = " + tripleStore.getTermCount() + "\n");
    //
    // sb.append("uriCount\t = " + tripleStore.getURICount() + "\n");
    //
    // sb.append("literalCount\t = " + tripleStore.getLiteralCount() +
    // "\n");
    //
    // /*
    // * Note: The blank node count is only available when using the told
    // * bnodes mode.
    // */
    // sb
    // .append("bnodeCount\t = "
    // + (tripleStore.getLexiconRelation()
    // .isStoreBlankNodes() ? ""
    // + tripleStore.getBNodeCount() : "N/A")
    // + "\n");

    /*
     * Report for each named graph.
     */
    if (describeNamedGraphs && tripleStore.isQuads()) {

      final SPORelation r = tripleStore.getSPORelation();

      // the index to use for distinct term scan.
      final SPOKeyOrder keyOrder = SPOKeyOrder.CSPO;

      // visit distinct IVs for context position on that index.
      @SuppressWarnings("rawtypes")
      final IChunkedIterator<IV> itr = r.distinctTermScan(keyOrder);

      // resolve IVs to terms efficiently during iteration.
      final BigdataValueIterator itr2 =
          new BigdataValueIteratorImpl(tripleStore /* resolveTerms */, itr);

      try {

        while (itr2.hasNext()) {

          /*
           * Describe this named graph.
           *
           * Note: This is using the predicate and class partition
           * statistics from the default graph (RDF merge) to identify
           * the set of all possible predicates and classes within
           * each named graph. It then tests each predicate and class
           * partition against the named graph and ignores those which
           * are not present in a given named graph. This is being
           * done because we do not have a CPxx index.
           */

          final BigdataResource graph = (BigdataResource) itr2.next();

          final IVCount[] predicatePartitionCounts2 =
              predicateUsage(tripleStore, graph.getIV(), predicatePartitionCounts);

          final IVCount[] classPartitionCounts2 =
              classUsage(tripleStore, graph.getIV(), classPartitionCounts);

          final BNode aNamedGraph = f.createBNode();

          // Named graph in the default data set.
          g.add(aDataset, SD.namedGraph, aNamedGraph);

          // The name of that named graph.
          g.add(aNamedGraph, SD.name, graph);

          // Describe the named graph.
          describeGraph(aNamedGraph, predicatePartitionCounts2, classPartitionCounts2);
        }

      } finally {

        itr2.close();
      }
    }
  }
Exemple #4
0
  /**
   * Implementation using the json.org API.
   *
   * @param json The RDF/JSON string to be parsed and converted into a Sesame Graph.
   * @return A Sesame Graph if successful, otherwise null.
   */
  public static Graph rdfJsonToGraph(String json) {
    Graph result = new GraphImpl();
    ValueFactory vf = result.getValueFactory();

    try {
      JSONObject input = new JSONObject(json);
      Iterator<String> subjects = input.keys();
      while (subjects.hasNext()) {
        String subjStr = subjects.next();
        Resource subject = null;
        subject =
            subjStr.startsWith("_:") ? vf.createBNode(subjStr.substring(2)) : vf.createURI(subjStr);
        JSONObject pObj = input.getJSONObject(subjStr);
        Iterator<String> predicates = pObj.keys();
        while (predicates.hasNext()) {
          String predStr = predicates.next();
          URI predicate = vf.createURI(predStr);
          JSONArray predArr = pObj.getJSONArray(predStr);
          for (int i = 0; i < predArr.length(); i++) {
            Value object = null;
            JSONObject obj = predArr.getJSONObject(i);
            if (!obj.has("value")) {
              continue;
            }
            String value = obj.getString("value");
            if (!obj.has("type")) {
              continue;
            }
            String type = obj.getString("type");
            String lang = null;
            if (obj.has("lang")) {
              lang = obj.getString("lang");
            }
            String datatype = null;
            if (obj.has("datatype")) {
              datatype = obj.getString("datatype");
            }
            if ("literal".equals(type)) {
              if (lang != null) {
                object = vf.createLiteral(value, lang);
              } else if (datatype != null) {
                object = vf.createLiteral(value, vf.createURI(datatype));
              } else {
                object = vf.createLiteral(value);
              }
            } else if ("bnode".equals(type)) {
              object = vf.createBNode(value.substring(2));
            } else if ("uri".equals(type)) {
              object = vf.createURI(value);
            }

            if (obj.has("graphs")) {
              JSONArray a = obj.getJSONArray("graphs");
              // System.out.println("a.length() = " + a.length());
              for (int j = 0; j < a.length(); j++) {
                // Note: any nulls here will result in statements in the default context.
                String s = a.getString(j);
                Resource context = s.equals("null") ? null : vf.createURI(s);
                // System.out.println("context = " + context);
                result.add(subject, predicate, object, context);
              }
            } else {
              result.add(subject, predicate, object);
            }
          }
        }
      }
    } catch (JSONException e) {
      log.error(e.getMessage(), e);
      return null;
    }

    return result;
  }