/**
   * Index all the resources in a Jena Model to ES
   *
   * @param model the model to index
   * @param bulkRequest a BulkRequestBuilder
   * @param getPropLabel if set to true all URI property values will be indexed as their label. The
   *     label is taken as the value of one of the properties set in {@link #uriDescriptionList}.
   */
  private void addModelToES(Model model, BulkRequestBuilder bulkRequest, boolean getPropLabel) {
    long startTime = System.currentTimeMillis();
    long bulkLength = 0;
    HashSet<Property> properties = new HashSet<Property>();

    StmtIterator it = model.listStatements();
    while (it.hasNext()) {
      Statement st = it.nextStatement();
      Property prop = st.getPredicate();
      String property = prop.toString();

      if (rdfPropList.isEmpty()
          || (isWhitePropList && rdfPropList.contains(property))
          || (!isWhitePropList && !rdfPropList.contains(property))
          || (normalizeProp.containsKey(property))) {
        properties.add(prop);
      }
    }

    ResIterator resIt = model.listSubjects();

    while (resIt.hasNext()) {
      Resource rs = resIt.nextResource();
      Map<String, ArrayList<String>> jsonMap = getJsonMap(rs, properties, model, getPropLabel);

      bulkRequest.add(
          client.prepareIndex(indexName, typeName, rs.toString()).setSource(mapToString(jsonMap)));
      bulkLength++;

      // We want to execute the bulk for every  DEFAULT_BULK_SIZE requests
      if (bulkLength % EEASettings.DEFAULT_BULK_SIZE == 0) {
        BulkResponse bulkResponse = bulkRequest.execute().actionGet();
        // After executing, flush the BulkRequestBuilder.
        bulkRequest = client.prepareBulk();

        if (bulkResponse.hasFailures()) {
          processBulkResponseFailure(bulkResponse);
        }
      }
    }

    // Execute remaining requests
    if (bulkRequest.numberOfActions() > 0) {
      BulkResponse response = bulkRequest.execute().actionGet();
      // Handle failure by iterating through each bulk response item
      if (response.hasFailures()) {
        processBulkResponseFailure(response);
      }
    }

    // Show time taken to index the documents
    logger.info(
        "Indexed {} documents on {}/{} in {} seconds",
        bulkLength,
        indexName,
        typeName,
        (System.currentTimeMillis() - startTime) / 1000.0);
  }
예제 #2
0
  private static ResultSetRewindable unique(ResultSetRewindable results) {
    // VERY crude.  Utilises the fact that bindings have value equality.
    List<Binding> x = new ArrayList<Binding>();
    Set<Binding> seen = new HashSet<Binding>();

    for (; results.hasNext(); ) {
      Binding b = results.nextBinding();
      if (seen.contains(b)) continue;
      seen.add(b);
      x.add(b);
    }
    QueryIterator qIter = new QueryIterPlainWrapper(x.iterator());
    ResultSet rs =
        new ResultSetStream(results.getResultVars(), ModelFactory.createDefaultModel(), qIter);
    return ResultSetFactory.makeRewindable(rs);
  }
예제 #3
0
 private void LoadQueriesBtnMouseClicked(
     java.awt.event.MouseEvent evt) { // GEN-FIRST:event_LoadQueriesBtnMouseClicked
   // Algorithm
   /*
    * Goto queries' directory and load all queryfile.query to Combobox
    * Change local directory to repository diretory
    * List all file there
    * Try to load all file and add to combobox
    *
    *
    */
   String sQueryRepositoryPath = "/home/natuan/Documents/OntologyMysql/QueriesRepository/";
   File dir = new File(sQueryRepositoryPath);
   String[] children = dir.list();
   if (children == null) {
     // Either dir does not exist or is not a directory
   } else {
     System.out.println("list files");
     for (int i = 0; i < children.length; i++) {
       // Get filename of file or directory
       String filename = children[i];
       String sContent = ReadWholeFileToString(sQueryRepositoryPath + filename);
       queryList.add(sContent);
       QueriesCmb.addItem(filename);
       SparqlTxtArea.setText(sContent);
       //   System.out.println(filename);
     }
     QueriesCmb.setSelectedIndex(children.length - 1);
   }
 } // GEN-LAST:event_LoadQueriesBtnMouseClicked
  /** Starts the harvester for queries and/or URLs */
  public boolean runIndexAll() {
    logger.info(
        "Starting RDF harvester: endpoint [{}], queries [{}],"
            + "URIs [{}], index name [{}], typeName [{}]",
        rdfEndpoint,
        rdfQueries,
        rdfUris,
        indexName,
        typeName);

    while (true) {
      if (this.closed) {
        logger.info(
            "Ended harvest for endpoint [{}], queries [{}],"
                + "URIs [{}], index name {}, type name {}",
            rdfEndpoint,
            rdfQueries,
            rdfUris,
            indexName,
            typeName);
        return true;
      }

      /** Harvest from a SPARQL endpoint */
      if (!rdfQueries.isEmpty()) {
        harvestFromEndpoint();
      }

      /** Harvest from RDF dumps */
      harvestFromDumps();

      closed = true;
    }
  }
예제 #5
0
  private ResultSetRewindable convertToStrings(ResultSetRewindable resultsActual) {
    List<Binding> bindings = new ArrayList<Binding>();
    while (resultsActual.hasNext()) {
      Binding b = resultsActual.nextBinding();
      BindingMap b2 = BindingFactory.create();

      for (String vn : resultsActual.getResultVars()) {
        Var v = Var.alloc(vn);
        Node n = b.get(v);
        String s;
        if (n == null) s = "";
        else if (n.isBlank()) s = "_:" + n.getBlankNodeLabel();
        else s = NodeFunctions.str(n);
        b2.add(v, NodeFactory.createLiteral(s));
      }
      bindings.add(b2);
    }
    ResultSet rs =
        new ResultSetStream(
            resultsActual.getResultVars(), null, new QueryIterPlainWrapper(bindings.iterator()));
    return ResultSetFactory.makeRewindable(rs);
  }
예제 #6
0
 private void QueriesCmbItemStateChanged(
     java.awt.event.ItemEvent evt) { // GEN-FIRST:event_QueriesCmbItemStateChanged
   // TODO add your handling code here:
   int iQueryNumber = QueriesCmb.getSelectedIndex();
   SparqlTxtArea.setText(queryList.get(iQueryNumber).toString());
 } // GEN-LAST:event_QueriesCmbItemStateChanged
  /**
   * Build a query returning all triples in which members of uris are the subjects of the triplets.
   *
   * <p>If toDescribeURIs is true the query will automatically add logic to retrieve the labels
   * directly from the SPARQL endpoint.
   *
   * @param uris URIs for queried resources
   * @return a CONSTRUCT query string
   */
  private String getSyncQueryStr(Iterable<String> uris) {
    StringBuilder uriSetStrBuilder = new StringBuilder();
    String delimiter = "";

    uriSetStrBuilder.append("(");
    for (String uri : uris) {
      uriSetStrBuilder.append(delimiter).append(String.format("<%s>", uri));
      delimiter = ", ";
    }
    uriSetStrBuilder.append(")");

    String uriSet = uriSetStrBuilder.toString();

    /* Get base triplets having any element from uris as subject */
    StringBuilder queryBuilder = new StringBuilder();
    queryBuilder
        .append("CONSTRUCT { ?s ?p ?o } WHERE {")
        .append("{?s ?p ?o")
        .append(String.format(" . FILTER (?s in %s )", uriSet));

    /* Perform uri label resolution only if desired */
    if (uriDescriptionList.isEmpty()) {
      queryBuilder.append("}}");
      return queryBuilder.toString();
    }

    /* Filter out properties having a label */
    int index = 0;
    for (String prop : uriDescriptionList) {
      index++;
      String filterTemplate = " . OPTIONAL { ?o <%s> ?o%d } " + " . FILTER(!BOUND(?o%d))";
      queryBuilder.append(String.format(filterTemplate, prop, index, index));
    }
    queryBuilder.append("}");

    /* We need this redundant clause as UNION queries can't handle sub-selects
     * without a prior clause.
     */
    String redundantClause =
        "<http://www.w3.org/2000/01/rdf-schema#Class> "
            + "a <http://www.w3.org/2000/01/rdf-schema#Class>";

    /* Add labels for filtered out properties */
    for (String prop : uriDescriptionList) {
      /* Resolve ?o as str(?label) for the resource ?res
       * label is taken as being ?res <prop> ?label
       *
       * We need to take str(?label) in order to drop
       * language references of the terms so that the document
       * is indexed with a language present only in it's top-level
       * properties.
       *
       * As some Virtuoso versions do not allow the usage
       * of BIND so we have to create a sub-select in order to bind
       * ?o to str(?label)
       *
       * The sub-select works only with a prior clause.
       * We are using a redundant clause that is always true
       */
      String partQueryTemplate =
          " UNION "
              + "{ "
              + redundantClause
              + " . "
              + "{ SELECT ?s ?p (str(?label) as ?o) { "
              + "   ?s ?p ?res"
              + "   . FILTER (?s in %s)"
              + "   . ?res <%s> ?label }}}";
      queryBuilder.append(String.format(partQueryTemplate, uriSet, prop));
    }

    queryBuilder.append("}");
    return queryBuilder.toString();
  }
 /**
  * Sets the {@link Harvester}'s {@link #rdfPropList} parameter
  *
  * @param list - a list of properties names that are either required in the object description, or
  *     undesired, depending on its {@link #isWhitePropList}
  * @return the same {@link Harvester} with the {@link #rdfPropList} parameter set
  */
 public Harvester rdfPropList(List<String> list) {
   if (!list.isEmpty()) {
     rdfPropList = new ArrayList<String>(list);
   }
   return this;
 }