public GNResultSet(GNXMLQuery query, Object userInfo, Observer[] observers, ServiceContext srvctx) throws Exception { super(observers); this.query = query; this.srvxtx = srvctx; try { GeonetContext gc = (GeonetContext) this.srvxtx.getHandlerContext(Geonet.CONTEXT_NAME); SearchManager searchMan = gc.getBean(SearchManager.class); metasearcher = searchMan.newSearcher(SearchManager.LUCENE, Geonet.File.SEARCH_Z3950_SERVER); } catch (Exception e) { if (Log.isDebugEnabled(Geonet.Z3950_SERVER)) Log.debug(Geonet.Z3950_SERVER, "error constructing GNresult set: " + e); e.printStackTrace(); } }
public Z3950ServerResults harvest() throws Exception { Set<String> newUuids = new HashSet<String>(); int groupSize = 10; log.info("Retrieving remote metadata information:" + params.uuid); Z3950ServerResults serverResults = new Z3950ServerResults(); // --- Clean all before harvest : Remove/Add mechanism localUuids = new UUIDMapper(dbms, params.uuid); // --- remove old metadata for (String uuid : localUuids.getUUIDs()) { String id = localUuids.getID(uuid); if (log.isDebugEnabled()) log.debug(" - Removing old metadata before update with id: " + id); dataMan.deleteMetadataGroup(context, dbms, id); serverResults.locallyRemoved++; } if (serverResults.locallyRemoved > 0) dbms.commit(); // --- Search remote node MetaSearcher s = searchMan.newSearcher(SearchManager.Z3950, Geonet.File.SEARCH_Z3950_CLIENT); ServiceConfig config = new ServiceConfig(); Element request = new Element("request"); // --- Z39.50 servers from harvest params for (String id : params.getRepositories()) { request.addContent(new Element(Geonet.SearchResult.SERVERS).setText(id)); } // --- Z39.50 query from harvest params request.addContent(new Element(Geonet.SearchResult.ZQUERY).setText(params.query)); // --- don't get html presentations (get them later) request.addContent(new Element(Geonet.SearchResult.SERVERHTML).setText("off")); // --- set timeout to be 100 seconds request.addContent(new Element(Geonet.SearchResult.TIMEOUT).setText("100")); // --- set hitsPerPage request.addContent(new Element(Geonet.SearchResult.HITS_PER_PAGE).setText(groupSize + "")); // --- do the search s.search(context, request, config); if (s.getSize() == 0) { log.error("Search failed or returned 0 results, trying again"); s.search(context, request, config); if (s.getSize() == 0) { throw new Exception("Bad luck, Search failed or returned 0 results"); } } if (log.isDebugEnabled()) log.debug("Search returned " + s.getSize() + " hits"); // -- process the hits in groups of groupSize int numberOfHits = Math.min(Integer.parseInt(params.maximumHits), s.getSize()); // -- add from and to placeholders to request request.addContent(new Element("from")); request.addContent(new Element("to")); Element categories = Lib.local.retrieve(dbms, "Categories"); if (log.isDebugEnabled()) log.debug("categories " + Xml.getString(categories)); Element repositories = new Info().getZRepositories(context, settingMan); if (log.isDebugEnabled()) log.debug("repos " + Xml.getString(repositories)); // -- build a map of collection code versus repository name for // -- assigning the categories Map<String, String> codes = new HashMap<String, String>(); Map<String, String> catCodes = new HashMap<String, String>(); // -- add new category for each repository boolean addcateg = false; for (String repo : params.getRepositories()) { Element repoElem = Xml.selectElement(repositories, "record[id='" + repo + "']"); if (repoElem != null) { Element repoId = repoElem.getChild("id"); String repoName = repoElem.getChildText("name"); codes.put( repoId.getAttributeValue("serverCode") + ":" + repoId.getAttributeValue("code"), repoName); // create a result holder for this repository serverResults.getServerResult(repoName); // sanitize the name of the category String categName = repoName.replaceAll("[^\\w]", ""); categName = categName.toLowerCase(); catCodes.put( repoId.getAttributeValue("serverCode") + ":" + repoId.getAttributeValue("code"), categName); if (Xml.selectElement(categories, "record[name='" + categName + "']") == null) { int newId = context.getSerialFactory().getSerial(dbms, "Categories"); dbms.execute("INSERT INTO Categories(id, name) VALUES (?, ?)", newId, categName); Lib.local.insert(dbms, "Categories", newId, repoName); addcateg = true; } } } if (addcateg) dbms.commit(); // --- return only maximum hits as directed by the harvest params int nrGroups = (numberOfHits / groupSize) + 1; for (int i = 1; i <= nrGroups; i++) { int lower = ((i - 1) * groupSize) + 1; int upper = Math.min((i * groupSize), numberOfHits); request.getChild("from").setText("" + lower); request.getChild("to").setText("" + upper); // --- Loading results List<Document> list = s.presentDocuments(context, request, config); // --- Loading categories and groups localCateg = new CategoryMapper(dbms); localGroups = new GroupMapper(dbms); if (log.isDebugEnabled()) log.debug( "There are " + (list.size() - 1) + " children in the results (" + lower + " to " + upper + ")"); boolean transformIt = false; String thisXslt = context.getAppPath() + Geonet.Path.IMPORT_STYLESHEETS + "/"; if (!params.importXslt.equals("none")) { thisXslt = thisXslt + params.importXslt; transformIt = true; } // --- For each record.... for (Document doc : list) { Element md = doc.getRootElement(); String eName = md.getQualifiedName(); if (eName.equals("summary")) continue; // -- Remove existing geonet:info children as for example // -- GeoNetwork Z39.50 server return when full mode // -- an extra element with server info not needed // -- once harvested String colCode = ""; Element info = md.getChild(Edit.RootChild.INFO, Edit.NAMESPACE); if (info != null) { String serverCode = info.getChildText("server"); int colPos = serverCode.indexOf(':'); if (colPos != -1) { colCode = serverCode.substring(0, colPos) + ":" + info.getChildText("collection"); } } md.removeChildren(Edit.RootChild.INFO, Edit.NAMESPACE); String repoName = codes.get(colCode); if (log.isDebugEnabled()) log.debug("Processing record from server " + repoName); HarvestResult result = serverResults.getServerResult(repoName); result.totalMetadata++; if (eName.equals("error")) { log.error("JZKit could not retrieve record - returned " + Xml.getString(md)); result.unretrievable++; continue; } // transform using importxslt if not none if (transformIt) { try { if (log.isDebugEnabled()) log.debug("Before transform: " + Xml.getString(md)); md = Xml.transform(md, thisXslt); if (log.isDebugEnabled()) log.debug("After transform: " + Xml.getString(md)); } catch (Exception e) { System.out.println("Cannot transform XML, ignoring. Error was: " + e.getMessage()); result.badFormat++; continue; // skip this one } } // detect schema, extract uuid and add String schema = dataMan.autodetectSchema(md, null); if (schema == null) { log.warning("Skipping metadata with unknown schema."); result.unknownSchema++; continue; } String uuid = null; try { uuid = dataMan.extractUUID(schema, md); } catch (Exception e) { log.error("Unable to extract UUID: " + e.getMessage()); e.printStackTrace(); } if (uuid == null || uuid.equals("")) { log.warning("Skipping metadata due to failure extracting uuid (uuid null or empty)."); result.unretrievable++; continue; } log.info(" - Adding metadata with " + uuid); // --- generate a new metadata id int id = context.getSerialFactory().getSerial(dbms, "Metadata"); // TODO end confusion about datatypes String id$ = Integer.toString(id); String docType = ""; if (!transformIt && (doc.getDocType() != null)) { docType = Xml.getString(doc.getDocType()); } // --- check for duplicate uuid - violates constraints on metadata table // --- if we attempt insert boolean alreadyAdded = !newUuids.add(uuid); boolean alreadyInDb = (dataMan.getMetadataId(dbms, uuid) != null); if (alreadyAdded || alreadyInDb) { log.error("Uuid " + uuid + " already exists in this set/database - cannot insert"); result.couldNotInsert++; continue; } // // insert metadata // try { String groupOwner = "1", isTemplate = "n", title = null; int owner = 1; String category = null, createDate = new ISODate().toString(), changeDate = createDate; boolean ufo = false, indexImmediate = false; dataMan.insertMetadata( context, dbms, schema, md, id, uuid, owner, groupOwner, params.uuid, isTemplate, docType, title, category, createDate, changeDate, ufo, indexImmediate); } catch (Exception e) { log.error("Unable to insert metadata " + e.getMessage()); e.printStackTrace(); result.couldNotInsert++; continue; } addPrivileges(id$, params.getPrivileges(), localGroups, dataMan, context, dbms, log); addCategories( id$, params.getCategories(), localCateg, dataMan, dbms, context, log, catCodes.get(colCode)); dataMan.setTemplateExt(dbms, id, "n", null); dataMan.setHarvestedExt(dbms, id, params.uuid, params.name); // validate it here if requested if (params.validate) { Document docVal; if (!transformIt && (doc.getDocType() != null)) { docVal = new Document(md, (DocType) doc.getDocType().detach()); } else { docVal = new Document(md); } if (!dataMan.doValidate(dbms, schema, id$, docVal, context.getLanguage())) { result.doesNotValidate++; } } dataMan.indexMetadata(dbms, id$); result.addedMetadata++; } } dbms.commit(); return serverResults; }
/** * Create a MEF2 file in ZIP format. * * @param context * @param uuids List of records to export. * @param format {@link Format} to export. * @param skipUUID * @param stylePath * @return MEF2 File * @throws Exception */ public static Path doExport( ServiceContext context, Set<String> uuids, Format format, boolean skipUUID, Path stylePath, boolean resolveXlink, boolean removeXlinkAttribute) throws Exception { Path file = Files.createTempFile("mef-", ".mef"); SearchManager searchManager = context.getBean(SearchManager.class); String contextLang = context.getLanguage() == null ? Geonet.DEFAULT_LANGUAGE : context.getLanguage(); try (FileSystem zipFs = ZipUtil.createZipFs(file); IndexAndTaxonomy indexReaderAndTaxonomy = searchManager.getNewIndexReader(contextLang); ) { StringBuilder csvBuilder = new StringBuilder( "\"schema\";\"uuid\";\"id\";\"type\";\"isHarvested\";\"title\";\"abstract\"\n"); Element html = new Element("html") .addContent( new Element("head") .addContent( Arrays.asList( new Element("title").setText("Export Index"), new Element("link") .setAttribute("rel", "stylesheet") .setAttribute( "href", "https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"), new Element("style") .setText( "body {\n" + " padding-left: 10px;\n" + "}\n" + "p.abstract {\n" + " font-style: italic;\n" + "}\n" + ".entry {\n" + " padding: 20px;\n" + " margin: 20px 0;\n" + " border: 1px solid #eee;\n" + " border-left-width: 5px;\n" + " border-radius: 3px;\n" + " border-left-color: #1b809e;\n" + "}\n" + ".entry:hover {\n" + " background-color: #f5f5f5;\n" + "}\n")))); Element body = new Element("body"); html.addContent(body); for (Object uuid1 : uuids) { String uuid = (String) uuid1; IndexSearcher searcher = new IndexSearcher(indexReaderAndTaxonomy.indexReader); BooleanQuery query = new BooleanQuery(); query.add(new BooleanClause(new TermQuery(new Term(UUID, uuid)), BooleanClause.Occur.MUST)); query.add( new BooleanClause( new TermQuery(new Term(LOCALE, contextLang)), BooleanClause.Occur.SHOULD)); TopDocs topDocs = searcher.search(query, NoFilterFilter.instance(), 5); String mdSchema = null, mdTitle = null, mdAbstract = null, id = null, isHarvested = null; MetadataType mdType = null; for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document doc = searcher.doc(scoreDoc.doc); String locale = doc.get(Geonet.IndexFieldNames.LOCALE); if (mdSchema == null) { mdSchema = doc.get(Geonet.IndexFieldNames.SCHEMA); } if (mdTitle == null || contextLang.equals(locale)) { mdTitle = doc.get(LuceneIndexField.TITLE); } if (mdAbstract == null || contextLang.equals(locale)) { mdAbstract = doc.get(LuceneIndexField.ABSTRACT); } if (id == null) { id = doc.get(LuceneIndexField.ID); } if (isHarvested == null) { isHarvested = doc.get(Geonet.IndexFieldNames.IS_HARVESTED); } if (mdType == null) { String tmp = doc.get(LuceneIndexField.IS_TEMPLATE); mdType = MetadataType.lookup(tmp.charAt(0)); } } if (mdType == null) { mdType = MetadataType.METADATA; } csvBuilder .append('"') .append(cleanForCsv(mdSchema)) .append("\";\"") .append(cleanForCsv(uuid)) .append("\";\"") .append(cleanForCsv(id)) .append("\";\"") .append(mdType.toString()) .append("\";\"") .append(cleanForCsv(isHarvested)) .append("\";\"") .append(cleanForCsv(mdTitle)) .append("\";\"") .append(cleanForCsv(mdAbstract)) .append("\"\n"); body.addContent( new Element("div") .setAttribute("class", "entry") .addContent( Arrays.asList( new Element("h4") .setAttribute("class", "title") .addContent( new Element("a") .setAttribute("href", uuid) .setText(cleanXml(mdTitle))), new Element("p") .setAttribute("class", "abstract") .setText(cleanXml(mdAbstract)), new Element("table") .setAttribute("class", "table") .addContent( Arrays.asList( new Element("thead") .addContent( new Element("tr") .addContent( Arrays.asList( new Element("th").setText("ID"), new Element("th").setText("UUID"), new Element("th").setText("Type"), new Element("th").setText("isHarvested")))), new Element("tbody") .addContent( new Element("tr") .addContent( Arrays.asList( new Element("td") .setAttribute("class", "id") .setText(id), new Element("td") .setAttribute("class", "uuid") .setText( xmlContentEscaper().escape(uuid)), new Element("td") .setAttribute("class", "type") .setText(mdType.toString()), new Element("td") .setAttribute("class", "isHarvested") .setText(isHarvested))))))))); createMetadataFolder( context, uuid, zipFs, skipUUID, stylePath, format, resolveXlink, removeXlinkAttribute); } Files.write(zipFs.getPath("/index.csv"), csvBuilder.toString().getBytes(Constants.CHARSET)); Files.write(zipFs.getPath("/index.html"), Xml.getString(html).getBytes(Constants.CHARSET)); } return file; }
public static List<Element> handlePropertyName( String[] propertyNames, ServiceContext context, boolean freq, int maxRecords, String cswServiceSpecificConstraint, LuceneConfig luceneConfig) throws Exception { List<Element> domainValuesList = null; if (Log.isDebugEnabled(Geonet.CSW)) Log.debug( Geonet.CSW, "Handling property names '" + Arrays.toString(propertyNames) + "' with max records of " + maxRecords); for (int i = 0; i < propertyNames.length; i++) { if (i == 0) domainValuesList = new ArrayList<Element>(); // Initialize list of values element. Element listOfValues = null; // Generate DomainValues element Element domainValues = new Element("DomainValues", Csw.NAMESPACE_CSW); // FIXME what should be the type ??? domainValues.setAttribute("type", "csw:Record"); String property = propertyNames[i].trim(); // Set propertyName in any case. Element pn = new Element("PropertyName", Csw.NAMESPACE_CSW); domainValues.addContent(pn.setText(property)); GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME); SearchManager sm = gc.getSearchmanager(); IndexAndTaxonomy indexAndTaxonomy = sm.getNewIndexReader(null); try { GeonetworkMultiReader reader = indexAndTaxonomy.indexReader; BooleanQuery groupsQuery = (BooleanQuery) CatalogSearcher.getGroupsQuery(context); BooleanQuery query = null; // Apply CSW service specific constraint if (StringUtils.isNotEmpty(cswServiceSpecificConstraint)) { Query constraintQuery = CatalogSearcher.getCswServiceSpecificConstraintQuery( cswServiceSpecificConstraint, luceneConfig); query = new BooleanQuery(); BooleanClause.Occur occur = LuceneUtils.convertRequiredAndProhibitedToOccur(true, false); query.add(groupsQuery, occur); query.add(constraintQuery, occur); } else { query = groupsQuery; } List<Pair<String, Boolean>> sortFields = Collections.singletonList(Pair.read(Geonet.SearchResult.SortBy.RELEVANCE, true)); Sort sort = LuceneSearcher.makeSort(sortFields, context.getLanguage(), false); CachingWrapperFilter filter = null; Pair<TopDocs, Element> searchResults = LuceneSearcher.doSearchAndMakeSummary( maxRecords, 0, maxRecords, context.getLanguage(), null, reader, query, filter, sort, null, false, false, false, false // Scoring is useless for GetDomain operation ); TopDocs hits = searchResults.one(); try { // Get mapped lucene field in CSW configuration String indexField = CatalogConfiguration.getFieldMapping().get(property.toLowerCase()); if (indexField != null) property = indexField; // check if params asked is in the index using getFieldNames ? FieldInfos fi = new SlowCompositeReaderWrapper(reader).getFieldInfos(); if (fi.fieldInfo(property) == null) continue; boolean isRange = false; if (CatalogConfiguration.getGetRecordsRangeFields().contains(property)) isRange = true; if (isRange) listOfValues = new Element("RangeOfValues", Csw.NAMESPACE_CSW); else listOfValues = new Element("ListOfValues", Csw.NAMESPACE_CSW); Set<String> fields = new HashSet<String>(); fields.add(property); fields.add("_isTemplate"); // parse each document in the index String[] fieldValues; SortedSet<String> sortedValues = new TreeSet<String>(); HashMap<String, Integer> duplicateValues = new HashMap<String, Integer>(); for (int j = 0; j < hits.scoreDocs.length; j++) { DocumentStoredFieldVisitor selector = new DocumentStoredFieldVisitor(fields); reader.document(hits.scoreDocs[j].doc, selector); Document doc = selector.getDocument(); // Skip templates and subTemplates String[] isTemplate = doc.getValues("_isTemplate"); if (isTemplate[0] != null && !isTemplate[0].equals("n")) continue; // Get doc values for specified property fieldValues = doc.getValues(property); if (fieldValues == null) continue; addtoSortedSet(sortedValues, fieldValues, duplicateValues); } SummaryComparator valuesComparator = new SummaryComparator(SortOption.FREQUENCY, Type.STRING, context.getLanguage(), null); TreeSet<Map.Entry<String, Integer>> sortedValuesFrequency = new TreeSet<Map.Entry<String, Integer>>(valuesComparator); sortedValuesFrequency.addAll(duplicateValues.entrySet()); if (freq) return createValuesByFrequency(sortedValuesFrequency); else listOfValues.addContent(createValuesElement(sortedValues, isRange)); } finally { // any children means that the catalog was unable to determine // anything about the specified parameter if (listOfValues != null && listOfValues.getChildren().size() != 0) domainValues.addContent(listOfValues); // Add current DomainValues to the list domainValuesList.add(domainValues); } } finally { sm.releaseIndexReader(indexAndTaxonomy); } } return domainValuesList; }