public static List<Element> handlePropertyName( String[] propertyNames, ServiceContext context, boolean freq, int maxRecords, String cswServiceSpecificConstraint, LuceneConfig luceneConfig) throws Exception { List<Element> domainValuesList = null; if (Log.isDebugEnabled(Geonet.CSW)) Log.debug( Geonet.CSW, "Handling property names '" + Arrays.toString(propertyNames) + "' with max records of " + maxRecords); for (int i = 0; i < propertyNames.length; i++) { if (i == 0) domainValuesList = new ArrayList<Element>(); // Initialize list of values element. Element listOfValues = null; // Generate DomainValues element Element domainValues = new Element("DomainValues", Csw.NAMESPACE_CSW); // FIXME what should be the type ??? domainValues.setAttribute("type", "csw:Record"); String property = propertyNames[i].trim(); // Set propertyName in any case. Element pn = new Element("PropertyName", Csw.NAMESPACE_CSW); domainValues.addContent(pn.setText(property)); GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME); SearchManager sm = gc.getSearchmanager(); IndexAndTaxonomy indexAndTaxonomy = sm.getNewIndexReader(null); try { GeonetworkMultiReader reader = indexAndTaxonomy.indexReader; BooleanQuery groupsQuery = (BooleanQuery) CatalogSearcher.getGroupsQuery(context); BooleanQuery query = null; // Apply CSW service specific constraint if (StringUtils.isNotEmpty(cswServiceSpecificConstraint)) { Query constraintQuery = CatalogSearcher.getCswServiceSpecificConstraintQuery( cswServiceSpecificConstraint, luceneConfig); query = new BooleanQuery(); BooleanClause.Occur occur = LuceneUtils.convertRequiredAndProhibitedToOccur(true, false); query.add(groupsQuery, occur); query.add(constraintQuery, occur); } else { query = groupsQuery; } List<Pair<String, Boolean>> sortFields = Collections.singletonList(Pair.read(Geonet.SearchResult.SortBy.RELEVANCE, true)); Sort sort = LuceneSearcher.makeSort(sortFields, context.getLanguage(), false); CachingWrapperFilter filter = null; Pair<TopDocs, Element> searchResults = LuceneSearcher.doSearchAndMakeSummary( maxRecords, 0, maxRecords, context.getLanguage(), null, reader, query, filter, sort, null, false, false, false, false // Scoring is useless for GetDomain operation ); TopDocs hits = searchResults.one(); try { // Get mapped lucene field in CSW configuration String indexField = CatalogConfiguration.getFieldMapping().get(property.toLowerCase()); if (indexField != null) property = indexField; // check if params asked is in the index using getFieldNames ? FieldInfos fi = new SlowCompositeReaderWrapper(reader).getFieldInfos(); if (fi.fieldInfo(property) == null) continue; boolean isRange = false; if (CatalogConfiguration.getGetRecordsRangeFields().contains(property)) isRange = true; if (isRange) listOfValues = new Element("RangeOfValues", Csw.NAMESPACE_CSW); else listOfValues = new Element("ListOfValues", Csw.NAMESPACE_CSW); Set<String> fields = new HashSet<String>(); fields.add(property); fields.add("_isTemplate"); // parse each document in the index String[] fieldValues; SortedSet<String> sortedValues = new TreeSet<String>(); HashMap<String, Integer> duplicateValues = new HashMap<String, Integer>(); for (int j = 0; j < hits.scoreDocs.length; j++) { DocumentStoredFieldVisitor selector = new DocumentStoredFieldVisitor(fields); reader.document(hits.scoreDocs[j].doc, selector); Document doc = selector.getDocument(); // Skip templates and subTemplates String[] isTemplate = doc.getValues("_isTemplate"); if (isTemplate[0] != null && !isTemplate[0].equals("n")) continue; // Get doc values for specified property fieldValues = doc.getValues(property); if (fieldValues == null) continue; addtoSortedSet(sortedValues, fieldValues, duplicateValues); } SummaryComparator valuesComparator = new SummaryComparator(SortOption.FREQUENCY, Type.STRING, context.getLanguage(), null); TreeSet<Map.Entry<String, Integer>> sortedValuesFrequency = new TreeSet<Map.Entry<String, Integer>>(valuesComparator); sortedValuesFrequency.addAll(duplicateValues.entrySet()); if (freq) return createValuesByFrequency(sortedValuesFrequency); else listOfValues.addContent(createValuesElement(sortedValues, isRange)); } finally { // any children means that the catalog was unable to determine // anything about the specified parameter if (listOfValues != null && listOfValues.getChildren().size() != 0) domainValues.addContent(listOfValues); // Add current DomainValues to the list domainValuesList.add(domainValues); } } finally { sm.releaseIndexReader(indexAndTaxonomy); } } return domainValuesList; }
/** * Check for metadata in the catalog having the same resource identifier as the harvested record. * * <p>If one dataset (same MD_metadata/../identificationInfo/../identifier/../code) (eg. a NMA * layer for roads) is described in 2 or more catalogs with different metadata uuids. The metadata * may be slightly different depending on the author, but the resource is the same. When * harvesting, some users would like to have the capability to exclude "duplicate" description of * the same dataset. * * <p>The check is made searching the identifier field in the index using {@link * LuceneSearcher#getAllMetadataFromIndexFor(String, String, String, Set, boolean)} * * @param uuid the metadata unique identifier * @param response the XML document to check * @return true if a record with same resource identifier is found. false otherwise. */ private boolean foundDuplicateForResource(String uuid, Element response) { String schema = dataMan.autodetectSchema(response); if (schema.startsWith("iso19139")) { String resourceIdentifierXPath = "gmd:identificationInfo/*/gmd:citation/gmd:CI_Citation/gmd:identifier/*/gmd:code/gco:CharacterString"; String resourceIdentifierLuceneIndexField = "identifier"; String defaultLanguage = "eng"; try { // Extract resource identifier XPath xp = XPath.newInstance(resourceIdentifierXPath); xp.addNamespace("gmd", "http://www.isotc211.org/2005/gmd"); xp.addNamespace("gco", "http://www.isotc211.org/2005/gco"); @SuppressWarnings("unchecked") List<Element> resourceIdentifiers = xp.selectNodes(response); if (resourceIdentifiers.size() > 0) { // Check if the metadata to import has a resource identifier // existing in current catalog for a record with a different UUID log.debug(" - Resource identifiers found : " + resourceIdentifiers.size()); for (Element identifierNode : resourceIdentifiers) { String identifier = identifierNode.getTextTrim(); log.debug(" - Searching for duplicates for resource identifier: " + identifier); Map<String, Map<String, String>> values = LuceneSearcher.getAllMetadataFromIndexFor( defaultLanguage, resourceIdentifierLuceneIndexField, identifier, Collections.singleton("_uuid"), true); log.debug(" - Number of resources with same identifier: " + values.size()); for (Map<String, String> recordFieldValues : values.values()) { String indexRecordUuid = recordFieldValues.get("_uuid"); if (!indexRecordUuid.equals(uuid)) { log.debug( " - UUID " + indexRecordUuid + " in index does not match harvested record UUID " + uuid); log.warning( " - Duplicates found. Skipping record with UUID " + uuid + " and resource identifier " + identifier); result.duplicatedResource++; return true; } } } } } catch (Exception e) { log.warning( " - Error when searching for resource duplicate " + uuid + ". Error is: " + e.getMessage()); e.printStackTrace(); } } return false; }