/** * Sets the {@link Harvester}'s {@link #whiteMap} parameter. A whiteMap contains all the pairs * property - list of objects that are meant to be indexed. * * @param whiteMap - a new value for the parameter * @return the same {@link Harvester} with the {@link #whiteMap} parameter set */ @SuppressWarnings("unchecked") public Harvester rdfWhiteMap(Map<String, Object> whiteMap) { if (whiteMap != null && !whiteMap.isEmpty()) { this.whiteMap = new HashMap<String, Set<String>>(); for (Map.Entry<String, Object> entry : whiteMap.entrySet()) { this.whiteMap.put(entry.getKey(), new HashSet((List<String>) entry.getValue())); } } return this; }
/** * Get JSON map for a given resource by applying the river settings * * @param rs resource being processed * @param properties properties to be indexed * @param model model returned by the indexing query * @param getPropLabel if set to true all URI property values will be indexed as their label. The * label is taken as the value of one of the properties set in {@link #uriDescriptionList}. * @return map of properties to be indexed for res */ private Map<String, ArrayList<String>> getJsonMap( Resource rs, Set<Property> properties, Model model, boolean getPropLabel) { Map<String, ArrayList<String>> jsonMap = new HashMap<String, ArrayList<String>>(); ArrayList<String> results = new ArrayList<String>(); if (addUriForResource) { results.add("\"" + rs.toString() + "\""); jsonMap.put("http://www.w3.org/1999/02/22-rdf-syntax-ns#about", results); } Set<String> rdfLanguages = new HashSet<String>(); for (Property prop : properties) { NodeIterator niter = model.listObjectsOfProperty(rs, prop); String property = prop.toString(); results = new ArrayList<String>(); String lang; String currValue; while (niter.hasNext()) { RDFNode node = niter.next(); currValue = getStringForResult(node, getPropLabel); if (addLanguage) { if (node.isLiteral()) { lang = node.asLiteral().getLanguage(); if (!lang.isEmpty()) { rdfLanguages.add("\"" + lang + "\""); } } } String shortValue = currValue; int currLen = currValue.length(); // Unquote string if (currLen > 1) shortValue = currValue.substring(1, currLen - 1); // If either whiteMap does contains shortValue // or blackMap contains the value // skip adding it to the index boolean whiteMapCond = whiteMap.containsKey(property) && !whiteMap.get(property).contains(shortValue); boolean blackMapCond = blackMap.containsKey(property) && blackMap.get(property).contains(shortValue); if (whiteMapCond || blackMapCond) { continue; } if (normalizeObj.containsKey(shortValue)) { results.add("\"" + normalizeObj.get(shortValue) + "\""); } else { results.add(currValue); } } // Do not index empty properties if (results.isEmpty()) continue; if (normalizeProp.containsKey(property)) { property = normalizeProp.get(property); if (jsonMap.containsKey(property)) { jsonMap.get(property).addAll(results); } else { jsonMap.put(property, results); } } else { jsonMap.put(property, results); } } if (addLanguage) { if (rdfLanguages.isEmpty() && !language.isEmpty()) rdfLanguages.add(language); if (!rdfLanguages.isEmpty()) jsonMap.put("language", new ArrayList<String>(rdfLanguages)); } for (Map.Entry<String, String> it : normalizeMissing.entrySet()) { if (!jsonMap.containsKey(it.getKey())) { ArrayList<String> res = new ArrayList<String>(); res.add("\"" + it.getValue() + "\""); jsonMap.put(it.getKey(), res); } } return jsonMap; }