private static List<Resource> equivalentTypes(Resource type) {
   List<Resource> types = new ArrayList<Resource>();
   types.add(type);
   for (StmtIterator it = type.getModel().listStatements(ANY, OWL.equivalentClass, type);
       it.hasNext(); ) types.add(it.nextStatement().getSubject());
   return types;
 }
 private static void addTypeToAll(Resource type, Set<Resource> candidates) {
   List<Resource> types = equivalentTypes(type);
   for (Resource element : candidates) {
     Resource resource = element;
     for (int i = 0; i < types.size(); i += 1) resource.addProperty(RDF.type, types.get(i));
   }
 }
 private static void addIntersections(Model result, Model schema, Statement s) {
   Resource type = s.getSubject();
   List<RDFNode> types = asJavaList(AssemblerHelp.getResource(s));
   Set<Resource> candidates = subjectSet(result, ANY, RDF.type, types.get(0));
   for (int i = 1; i < types.size(); i += 1)
     removeElementsWithoutType(candidates, (Resource) types.get(i));
   addTypeToAll(type, candidates);
 }
  /**
   * Index all the resources in a Jena Model to ES
   *
   * @param model the model to index
   * @param bulkRequest a BulkRequestBuilder
   * @param getPropLabel if set to true all URI property values will be indexed as their label. The
   *     label is taken as the value of one of the properties set in {@link #uriDescriptionList}.
   */
  private void addModelToES(Model model, BulkRequestBuilder bulkRequest, boolean getPropLabel) {
    long startTime = System.currentTimeMillis();
    long bulkLength = 0;
    HashSet<Property> properties = new HashSet<Property>();

    StmtIterator it = model.listStatements();
    while (it.hasNext()) {
      Statement st = it.nextStatement();
      Property prop = st.getPredicate();
      String property = prop.toString();

      if (rdfPropList.isEmpty()
          || (isWhitePropList && rdfPropList.contains(property))
          || (!isWhitePropList && !rdfPropList.contains(property))
          || (normalizeProp.containsKey(property))) {
        properties.add(prop);
      }
    }

    ResIterator resIt = model.listSubjects();

    while (resIt.hasNext()) {
      Resource rs = resIt.nextResource();
      Map<String, ArrayList<String>> jsonMap = getJsonMap(rs, properties, model, getPropLabel);

      bulkRequest.add(
          client.prepareIndex(indexName, typeName, rs.toString()).setSource(mapToString(jsonMap)));
      bulkLength++;

      // We want to execute the bulk for every  DEFAULT_BULK_SIZE requests
      if (bulkLength % EEASettings.DEFAULT_BULK_SIZE == 0) {
        BulkResponse bulkResponse = bulkRequest.execute().actionGet();
        // After executing, flush the BulkRequestBuilder.
        bulkRequest = client.prepareBulk();

        if (bulkResponse.hasFailures()) {
          processBulkResponseFailure(bulkResponse);
        }
      }
    }

    // Execute remaining requests
    if (bulkRequest.numberOfActions() > 0) {
      BulkResponse response = bulkRequest.execute().actionGet();
      // Handle failure by iterating through each bulk response item
      if (response.hasFailures()) {
        processBulkResponseFailure(response);
      }
    }

    // Show time taken to index the documents
    logger.info(
        "Indexed {} documents on {}/{} in {} seconds",
        bulkLength,
        indexName,
        typeName,
        (System.currentTimeMillis() - startTime) / 1000.0);
  }
  /**
   * Answer a list of the named hierarchy roots of a given {@link OntModel}. This will be similar to
   * the results of {@link OntModel#listHierarchyRootClasses()}, with the added constraint that
   * every member of the returned iterator will be a named class, not an anonymous class expression.
   * The named root classes are calculated from the root classes, by recursively replacing every
   * anonymous class with its direct sub-classes. Thus it can be seen that the values in the list
   * consists of the shallowest fringe of named classes in the hierarchy.
   *
   * @param m An ontology model
   * @return A list of classes whose members are the named root classes of the class hierarchy in
   *     <code>m</code>
   */
  public static List<OntClass> namedHierarchyRoots(OntModel m) {
    List<OntClass> nhr = new ArrayList<OntClass>(); // named roots
    List<OntClass> ahr = new ArrayList<OntClass>(); // anon roots

    // do the initial partition of the root classes
    partitionByNamed(m.listHierarchyRootClasses(), nhr, ahr);

    // now push the fringe down until we have only named classes
    while (!ahr.isEmpty()) {
      OntClass c = ahr.remove(0);
      partitionByNamed(c.listSubClasses(true), nhr, ahr);
    }

    return nhr;
  }
 public Collection<URI> getSupportedFacets(URI needUri) throws NoSuchNeedException {
   List<URI> ret = new LinkedList<URI>();
   Need need = DataAccessUtils.loadNeed(needRepository, needUri);
   Model content = rdfStorageService.loadContent(need);
   if (content == null) return ret;
   Resource baseRes = content.getResource(content.getNsPrefixURI(""));
   StmtIterator stmtIterator = baseRes.listProperties(WON.HAS_FACET);
   while (stmtIterator.hasNext()) {
     RDFNode object = stmtIterator.nextStatement().getObject();
     if (object.isURIResource()) {
       ret.add(URI.create(object.toString()));
     }
   }
   return ret;
 }
  /**
   * Partition the members of an iterator into two lists, according to whether they are named or
   * anonymous classes
   *
   * @param i An iterator to partition
   * @param named A list of named classes
   * @param anon A list of anonymous classes
   */
  protected static void partitionByNamed(
      Iterator<? extends OntClass> i, List<OntClass> named, List<OntClass> anon) {
    while (i.hasNext()) {
      OntClass c = i.next();
      boolean ignore = false;

      // duplicate check: we ignore this class if we've already got it
      if (named.contains(c)) {
        ignore = true;
      }

      // subsumption check: c must have only anon classes or Thing
      // as super-classes to still qualify as a root class
      Resource thing = c.getProfile().THING();
      for (Iterator<OntClass> j = c.listSuperClasses(); !ignore && j.hasNext(); ) {
        OntClass sup = j.next();
        if (!((thing != null && sup.equals(thing)) || sup.isAnon() || sup.equals(c))) {
          ignore = true;
        }
      }

      if (!ignore) {
        // place the class in the appropriate partition
        (c.isAnon() ? anon : named).add(c);
      }
    }
  }
  private static ResultSetRewindable unique(ResultSetRewindable results) {
    // VERY crude.  Utilises the fact that bindings have value equality.
    List<Binding> x = new ArrayList<Binding>();
    Set<Binding> seen = new HashSet<Binding>();

    for (; results.hasNext(); ) {
      Binding b = results.nextBinding();
      if (seen.contains(b)) continue;
      seen.add(b);
      x.add(b);
    }
    QueryIterator qIter = new QueryIterPlainWrapper(x.iterator());
    ResultSet rs =
        new ResultSetStream(results.getResultVars(), ModelFactory.createDefaultModel(), qIter);
    return ResultSetFactory.makeRewindable(rs);
  }
  /** Starts the harvester for queries and/or URLs */
  public boolean runIndexAll() {
    logger.info(
        "Starting RDF harvester: endpoint [{}], queries [{}],"
            + "URIs [{}], index name [{}], typeName [{}]",
        rdfEndpoint,
        rdfQueries,
        rdfUris,
        indexName,
        typeName);

    while (true) {
      if (this.closed) {
        logger.info(
            "Ended harvest for endpoint [{}], queries [{}],"
                + "URIs [{}], index name {}, type name {}",
            rdfEndpoint,
            rdfQueries,
            rdfUris,
            indexName,
            typeName);
        return true;
      }

      /** Harvest from a SPARQL endpoint */
      if (!rdfQueries.isEmpty()) {
        harvestFromEndpoint();
      }

      /** Harvest from RDF dumps */
      harvestFromDumps();

      closed = true;
    }
  }
Exemple #10
0
  /**
   * Answer the shortest path from the <code>start</code> resource to the <code>end</code> RDF node,
   * such that every step on the path is accepted by the given filter. A path is a {@link List} of
   * RDF {@link Statement}s. The subject of the first statement in the list is <code>start</code>,
   * and the object of the last statement in the list is <code>end</code>.
   *
   * <p>The <code>onPath</code> argument is a {@link Filter}, which accepts a statement and returns
   * true if the statement should be considered to be on the path. To search for an unconstrained
   * path, pass {@link Filter#any} as an argument. To search for a path whose predicates match a
   * fixed restricted set of property names, pass an instance of {@link PredicatesFilter}.
   *
   * <p>If there is more than one path of minimal length from <code>start</code> to <code>end</code>
   * , this method returns an arbitrary one. The algorithm is blind breadth-first search, with loop
   * detection.
   *
   * @param m The model in which we are seeking a path
   * @param start The starting resource
   * @param end The end, or goal, node
   * @param onPath A filter which determines whether a given statement can be considered part of the
   *     path
   * @return A path, consisting of a list of statements whose first subject is <code>start</code>,
   *     and whose last object is <code>end</code>, or null if no such path exists.
   */
  public static Path findShortestPath(
      Model m, Resource start, RDFNode end, Filter<Statement> onPath) {
    List<Path> bfs = new LinkedList<Path>();
    Set<Resource> seen = new HashSet<Resource>();

    // initialise the paths
    for (Iterator<Statement> i = m.listStatements(start, null, (RDFNode) null).filterKeep(onPath);
        i.hasNext(); ) {
      bfs.add(new Path().append(i.next()));
    }

    // search
    Path solution = null;
    while (solution == null && !bfs.isEmpty()) {
      Path candidate = bfs.remove(0);

      if (candidate.hasTerminus(end)) {
        solution = candidate;
      } else {
        Resource terminus = candidate.getTerminalResource();
        if (terminus != null) {
          seen.add(terminus);

          // breadth-first expansion
          for (Iterator<Statement> i = terminus.listProperties().filterKeep(onPath);
              i.hasNext(); ) {
            Statement link = i.next();

            // no looping allowed, so we skip this link if it takes us to a node we've seen
            if (!seen.contains(link.getObject())) {
              bfs.add(candidate.append(link));
            }
          }
        }
      }
    }

    return solution;
  }
Exemple #11
0
  private ResultSetRewindable convertToStrings(ResultSetRewindable resultsActual) {
    List<Binding> bindings = new ArrayList<Binding>();
    while (resultsActual.hasNext()) {
      Binding b = resultsActual.nextBinding();
      BindingMap b2 = BindingFactory.create();

      for (String vn : resultsActual.getResultVars()) {
        Var v = Var.alloc(vn);
        Node n = b.get(v);
        String s;
        if (n == null) s = "";
        else if (n.isBlank()) s = "_:" + n.getBlankNodeLabel();
        else s = NodeFunctions.str(n);
        b2.add(v, NodeFactory.createLiteral(s));
      }
      bindings.add(b2);
    }
    ResultSet rs =
        new ResultSetStream(
            resultsActual.getResultVars(), null, new QueryIterPlainWrapper(bindings.iterator()));
    return ResultSetFactory.makeRewindable(rs);
  }
 /**
  * Return all values for the given options as Strings, either locally or from the parent options
  * object.
  */
 @Override
 protected List<String> getAllValues(OPT option) {
   List<String> l = super.getAllValues(option);
   return (l.isEmpty() && hasParent()) ? getParent().getAllValues(option) : l;
 }
 public void testOmits(Model m, List<Statement> statements) {
   for (int i = 0; i < statements.size(); i += 1)
     assertFalse("it should not be here", m.contains(statements.get(i)));
 }
  /**
   * Build a query returning all triples in which members of uris are the subjects of the triplets.
   *
   * <p>If toDescribeURIs is true the query will automatically add logic to retrieve the labels
   * directly from the SPARQL endpoint.
   *
   * @param uris URIs for queried resources
   * @return a CONSTRUCT query string
   */
  private String getSyncQueryStr(Iterable<String> uris) {
    StringBuilder uriSetStrBuilder = new StringBuilder();
    String delimiter = "";

    uriSetStrBuilder.append("(");
    for (String uri : uris) {
      uriSetStrBuilder.append(delimiter).append(String.format("<%s>", uri));
      delimiter = ", ";
    }
    uriSetStrBuilder.append(")");

    String uriSet = uriSetStrBuilder.toString();

    /* Get base triplets having any element from uris as subject */
    StringBuilder queryBuilder = new StringBuilder();
    queryBuilder
        .append("CONSTRUCT { ?s ?p ?o } WHERE {")
        .append("{?s ?p ?o")
        .append(String.format(" . FILTER (?s in %s )", uriSet));

    /* Perform uri label resolution only if desired */
    if (uriDescriptionList.isEmpty()) {
      queryBuilder.append("}}");
      return queryBuilder.toString();
    }

    /* Filter out properties having a label */
    int index = 0;
    for (String prop : uriDescriptionList) {
      index++;
      String filterTemplate = " . OPTIONAL { ?o <%s> ?o%d } " + " . FILTER(!BOUND(?o%d))";
      queryBuilder.append(String.format(filterTemplate, prop, index, index));
    }
    queryBuilder.append("}");

    /* We need this redundant clause as UNION queries can't handle sub-selects
     * without a prior clause.
     */
    String redundantClause =
        "<http://www.w3.org/2000/01/rdf-schema#Class> "
            + "a <http://www.w3.org/2000/01/rdf-schema#Class>";

    /* Add labels for filtered out properties */
    for (String prop : uriDescriptionList) {
      /* Resolve ?o as str(?label) for the resource ?res
       * label is taken as being ?res <prop> ?label
       *
       * We need to take str(?label) in order to drop
       * language references of the terms so that the document
       * is indexed with a language present only in it's top-level
       * properties.
       *
       * As some Virtuoso versions do not allow the usage
       * of BIND so we have to create a sub-select in order to bind
       * ?o to str(?label)
       *
       * The sub-select works only with a prior clause.
       * We are using a redundant clause that is always true
       */
      String partQueryTemplate =
          " UNION "
              + "{ "
              + redundantClause
              + " . "
              + "{ SELECT ?s ?p (str(?label) as ?o) { "
              + "   ?s ?p ?res"
              + "   . FILTER (?s in %s)"
              + "   . ?res <%s> ?label }}}";
      queryBuilder.append(String.format(partQueryTemplate, uriSet, prop));
    }

    queryBuilder.append("}");
    return queryBuilder.toString();
  }
 /**
  * Sets the {@link Harvester}'s {@link #rdfPropList} parameter
  *
  * @param list - a list of properties names that are either required in the object description, or
  *     undesired, depending on its {@link #isWhitePropList}
  * @return the same {@link Harvester} with the {@link #rdfPropList} parameter set
  */
 public Harvester rdfPropList(List<String> list) {
   if (!list.isEmpty()) {
     rdfPropList = new ArrayList<String>(list);
   }
   return this;
 }