/**
   * Index all the resources in a Jena Model to ES
   *
   * @param model the model to index
   * @param bulkRequest a BulkRequestBuilder
   * @param getPropLabel if set to true all URI property values will be indexed as their label. The
   *     label is taken as the value of one of the properties set in {@link #uriDescriptionList}.
   */
  private void addModelToES(Model model, BulkRequestBuilder bulkRequest, boolean getPropLabel) {
    long startTime = System.currentTimeMillis();
    long bulkLength = 0;
    HashSet<Property> properties = new HashSet<Property>();

    StmtIterator it = model.listStatements();
    while (it.hasNext()) {
      Statement st = it.nextStatement();
      Property prop = st.getPredicate();
      String property = prop.toString();

      if (rdfPropList.isEmpty()
          || (isWhitePropList && rdfPropList.contains(property))
          || (!isWhitePropList && !rdfPropList.contains(property))
          || (normalizeProp.containsKey(property))) {
        properties.add(prop);
      }
    }

    ResIterator resIt = model.listSubjects();

    while (resIt.hasNext()) {
      Resource rs = resIt.nextResource();
      Map<String, ArrayList<String>> jsonMap = getJsonMap(rs, properties, model, getPropLabel);

      bulkRequest.add(
          client.prepareIndex(indexName, typeName, rs.toString()).setSource(mapToString(jsonMap)));
      bulkLength++;

      // We want to execute the bulk for every  DEFAULT_BULK_SIZE requests
      if (bulkLength % EEASettings.DEFAULT_BULK_SIZE == 0) {
        BulkResponse bulkResponse = bulkRequest.execute().actionGet();
        // After executing, flush the BulkRequestBuilder.
        bulkRequest = client.prepareBulk();

        if (bulkResponse.hasFailures()) {
          processBulkResponseFailure(bulkResponse);
        }
      }
    }

    // Execute remaining requests
    if (bulkRequest.numberOfActions() > 0) {
      BulkResponse response = bulkRequest.execute().actionGet();
      // Handle failure by iterating through each bulk response item
      if (response.hasFailures()) {
        processBulkResponseFailure(response);
      }
    }

    // Show time taken to index the documents
    logger.info(
        "Indexed {} documents on {}/{} in {} seconds",
        bulkLength,
        indexName,
        typeName,
        (System.currentTimeMillis() - startTime) / 1000.0);
  }
Пример #2
0
  /**
   * Partition the members of an iterator into two lists, according to whether they are named or
   * anonymous classes
   *
   * @param i An iterator to partition
   * @param named A list of named classes
   * @param anon A list of anonymous classes
   */
  protected static void partitionByNamed(
      Iterator<? extends OntClass> i, List<OntClass> named, List<OntClass> anon) {
    while (i.hasNext()) {
      OntClass c = i.next();
      boolean ignore = false;

      // duplicate check: we ignore this class if we've already got it
      if (named.contains(c)) {
        ignore = true;
      }

      // subsumption check: c must have only anon classes or Thing
      // as super-classes to still qualify as a root class
      Resource thing = c.getProfile().THING();
      for (Iterator<OntClass> j = c.listSuperClasses(); !ignore && j.hasNext(); ) {
        OntClass sup = j.next();
        if (!((thing != null && sup.equals(thing)) || sup.isAnon() || sup.equals(c))) {
          ignore = true;
        }
      }

      if (!ignore) {
        // place the class in the appropriate partition
        (c.isAnon() ? anon : named).add(c);
      }
    }
  }