Beispiel #1
0
 public void setLCA(Resource u, Resource v, Resource lca) {
   Map<Resource, Resource> uMap = m_lcaIndex.get(u);
   if (uMap == null) {
     uMap = new HashMap<Resource, Resource>();
     m_lcaIndex.put(u, uMap);
   }
   uMap.put(v, lca);
 }
 /**
  * Sets the {@link Harvester}'s {@link #whiteMap} parameter. A whiteMap contains all the pairs
  * property - list of objects that are meant to be indexed.
  *
  * @param whiteMap - a new value for the parameter
  * @return the same {@link Harvester} with the {@link #whiteMap} parameter set
  */
 @SuppressWarnings("unchecked")
 public Harvester rdfWhiteMap(Map<String, Object> whiteMap) {
   if (whiteMap != null && !whiteMap.isEmpty()) {
     this.whiteMap = new HashMap<String, Set<String>>();
     for (Map.Entry<String, Object> entry : whiteMap.entrySet()) {
       this.whiteMap.put(entry.getKey(), new HashSet((List<String>) entry.getValue()));
     }
   }
   return this;
 }
Beispiel #3
0
    public Resource getLCA(Resource u, Resource v) {
      Map<Resource, Resource> map = m_lcaIndex.get(u);
      Resource lca = (map == null) ? null : (Resource) map.get(v);

      if (lca == null) {
        map = m_lcaIndex.get(v);
        lca = (map == null) ? null : (Resource) map.get(u);
      }

      return lca;
    }
Beispiel #4
0
    public DisjointSet getSet(Resource r) {
      DisjointSet s = m_setIndex.get(r);
      if (s == null) {
        //                log.debug( "Generating new set for " + r );
        s = new DisjointSet(r);
        m_setIndex.put(r, s);
      } else {
        //                log.debug( "Retrieving old set for " + r );

      }
      return s;
    }
  /**
   * Index all the resources in a Jena Model to ES
   *
   * @param model the model to index
   * @param bulkRequest a BulkRequestBuilder
   * @param getPropLabel if set to true all URI property values will be indexed as their label. The
   *     label is taken as the value of one of the properties set in {@link #uriDescriptionList}.
   */
  private void addModelToES(Model model, BulkRequestBuilder bulkRequest, boolean getPropLabel) {
    long startTime = System.currentTimeMillis();
    long bulkLength = 0;
    HashSet<Property> properties = new HashSet<Property>();

    StmtIterator it = model.listStatements();
    while (it.hasNext()) {
      Statement st = it.nextStatement();
      Property prop = st.getPredicate();
      String property = prop.toString();

      if (rdfPropList.isEmpty()
          || (isWhitePropList && rdfPropList.contains(property))
          || (!isWhitePropList && !rdfPropList.contains(property))
          || (normalizeProp.containsKey(property))) {
        properties.add(prop);
      }
    }

    ResIterator resIt = model.listSubjects();

    while (resIt.hasNext()) {
      Resource rs = resIt.nextResource();
      Map<String, ArrayList<String>> jsonMap = getJsonMap(rs, properties, model, getPropLabel);

      bulkRequest.add(
          client.prepareIndex(indexName, typeName, rs.toString()).setSource(mapToString(jsonMap)));
      bulkLength++;

      // We want to execute the bulk for every  DEFAULT_BULK_SIZE requests
      if (bulkLength % EEASettings.DEFAULT_BULK_SIZE == 0) {
        BulkResponse bulkResponse = bulkRequest.execute().actionGet();
        // After executing, flush the BulkRequestBuilder.
        bulkRequest = client.prepareBulk();

        if (bulkResponse.hasFailures()) {
          processBulkResponseFailure(bulkResponse);
        }
      }
    }

    // Execute remaining requests
    if (bulkRequest.numberOfActions() > 0) {
      BulkResponse response = bulkRequest.execute().actionGet();
      // Handle failure by iterating through each bulk response item
      if (response.hasFailures()) {
        processBulkResponseFailure(response);
      }
    }

    // Show time taken to index the documents
    logger.info(
        "Indexed {} documents on {}/{} in {} seconds",
        bulkLength,
        indexName,
        typeName,
        (System.currentTimeMillis() - startTime) / 1000.0);
  }
  /**
   * Get JSON map for a given resource by applying the river settings
   *
   * @param rs resource being processed
   * @param properties properties to be indexed
   * @param model model returned by the indexing query
   * @param getPropLabel if set to true all URI property values will be indexed as their label. The
   *     label is taken as the value of one of the properties set in {@link #uriDescriptionList}.
   * @return map of properties to be indexed for res
   */
  private Map<String, ArrayList<String>> getJsonMap(
      Resource rs, Set<Property> properties, Model model, boolean getPropLabel) {
    Map<String, ArrayList<String>> jsonMap = new HashMap<String, ArrayList<String>>();
    ArrayList<String> results = new ArrayList<String>();

    if (addUriForResource) {
      results.add("\"" + rs.toString() + "\"");
      jsonMap.put("http://www.w3.org/1999/02/22-rdf-syntax-ns#about", results);
    }

    Set<String> rdfLanguages = new HashSet<String>();

    for (Property prop : properties) {
      NodeIterator niter = model.listObjectsOfProperty(rs, prop);
      String property = prop.toString();
      results = new ArrayList<String>();

      String lang;
      String currValue;

      while (niter.hasNext()) {
        RDFNode node = niter.next();
        currValue = getStringForResult(node, getPropLabel);
        if (addLanguage) {
          if (node.isLiteral()) {
            lang = node.asLiteral().getLanguage();
            if (!lang.isEmpty()) {
              rdfLanguages.add("\"" + lang + "\"");
            }
          }
        }

        String shortValue = currValue;

        int currLen = currValue.length();
        // Unquote string
        if (currLen > 1) shortValue = currValue.substring(1, currLen - 1);

        // If either whiteMap does contains shortValue
        // or blackMap contains the value
        // skip adding it to the index
        boolean whiteMapCond =
            whiteMap.containsKey(property) && !whiteMap.get(property).contains(shortValue);
        boolean blackMapCond =
            blackMap.containsKey(property) && blackMap.get(property).contains(shortValue);

        if (whiteMapCond || blackMapCond) {
          continue;
        }

        if (normalizeObj.containsKey(shortValue)) {
          results.add("\"" + normalizeObj.get(shortValue) + "\"");
        } else {
          results.add(currValue);
        }
      }

      // Do not index empty properties
      if (results.isEmpty()) continue;

      if (normalizeProp.containsKey(property)) {
        property = normalizeProp.get(property);
        if (jsonMap.containsKey(property)) {
          jsonMap.get(property).addAll(results);
        } else {
          jsonMap.put(property, results);
        }
      } else {
        jsonMap.put(property, results);
      }
    }

    if (addLanguage) {
      if (rdfLanguages.isEmpty() && !language.isEmpty()) rdfLanguages.add(language);
      if (!rdfLanguages.isEmpty()) jsonMap.put("language", new ArrayList<String>(rdfLanguages));
    }

    for (Map.Entry<String, String> it : normalizeMissing.entrySet()) {
      if (!jsonMap.containsKey(it.getKey())) {
        ArrayList<String> res = new ArrayList<String>();
        res.add("\"" + it.getValue() + "\"");
        jsonMap.put(it.getKey(), res);
      }
    }

    return jsonMap;
  }
 /**
  * Sets the {@link Harvester}'s {@link #normalizeMissing} parameter. {@link #normalizeMissing}
  * contains pairs of property-value. Missing properties are indexed with the given value.
  *
  * @param normalizeMissing - new value for the parameter
  * @return the same {@link Harvester} with the {@link #normalizeMissing} parameter set
  */
 public Harvester rdfNormalizationMissing(Map<String, String> normalizeMissing) {
   if (normalizeMissing != null && !normalizeMissing.isEmpty()) {
     this.normalizeMissing = normalizeMissing;
   }
   return this;
 }
 /**
  * Sets the {@link Harvester}'s {@link #normalizeObj} parameter. {@link #normalizeObj} contains
  * pairs of object-replacement. Objects are replaced with given values no matter of the property
  * whose value they represent.
  *
  * @param normalizeObj - new value for the parameter
  * @return the same {@link Harvester} with the {@link #normalizeObj} parameter set
  */
 public Harvester rdfNormalizationObj(Map<String, String> normalizeObj) {
   if (normalizeObj != null && !normalizeObj.isEmpty()) {
     this.normalizeObj = normalizeObj;
   }
   return this;
 }
 /**
  * Sets the {@link Harvester}'s {@link #normalizeProp} parameter. {@link #normalizeProp} contains
  * pairs of property-replacement. The properties are replaced with the given values and if one
  * resource has both properties their values are grouped in a list.
  *
  * @param normalizeProp - new value for the parameter
  * @return the same {@link Harvester} with the {@link #normalizeProp} parameter set
  */
 public Harvester rdfNormalizationProp(Map<String, String> normalizeProp) {
   if (normalizeProp != null && !normalizeProp.isEmpty()) {
     this.normalizeProp = normalizeProp;
   }
   return this;
 }