public int evaluate(int timeout) { try { if (Log.isDebugEnabled(Geonet.Z3950_SERVER)) Log.debug(Geonet.Z3950_SERVER, "INCOMING XML QUERY:\n" + query); Element request = new Element("request"); request.addContent(query.toGNXMLRep()); List<String> categories = query.getCollections(); for (String category : categories) { if (!category.equals("geonetwork") && !category.equals("Default")) request.addContent(new Element("category").setText(category)); } ServiceConfig config = new ServiceConfig(); // perform the search and save search results metasearcher.search(this.srvxtx, request, config); // System.out.println("summary:\n" + Xml.getString(s.getSummary())); // // DEBUG // Random number of records.. Set up the result set setFragmentCount(metasearcher.getSize()); setTaskStatusCode(IRResultSetStatus.COMPLETE); this.srvxtx.getResourceManager().close(); } catch (Throwable e) { Log.error(Geonet.Z3950_SERVER, "error evaluating query.." + e); e.printStackTrace(); try { this.srvxtx.getResourceManager().abort(); } catch (Exception e2) { e2.printStackTrace(); } } return (getStatus()); }
public Z3950ServerResults harvest() throws Exception { Set<String> newUuids = new HashSet<String>(); int groupSize = 10; log.info("Retrieving remote metadata information:" + params.uuid); Z3950ServerResults serverResults = new Z3950ServerResults(); // --- Clean all before harvest : Remove/Add mechanism localUuids = new UUIDMapper(dbms, params.uuid); // --- remove old metadata for (String uuid : localUuids.getUUIDs()) { String id = localUuids.getID(uuid); if (log.isDebugEnabled()) log.debug(" - Removing old metadata before update with id: " + id); dataMan.deleteMetadataGroup(context, dbms, id); serverResults.locallyRemoved++; } if (serverResults.locallyRemoved > 0) dbms.commit(); // --- Search remote node MetaSearcher s = searchMan.newSearcher(SearchManager.Z3950, Geonet.File.SEARCH_Z3950_CLIENT); ServiceConfig config = new ServiceConfig(); Element request = new Element("request"); // --- Z39.50 servers from harvest params for (String id : params.getRepositories()) { request.addContent(new Element(Geonet.SearchResult.SERVERS).setText(id)); } // --- Z39.50 query from harvest params request.addContent(new Element(Geonet.SearchResult.ZQUERY).setText(params.query)); // --- don't get html presentations (get them later) request.addContent(new Element(Geonet.SearchResult.SERVERHTML).setText("off")); // --- set timeout to be 100 seconds request.addContent(new Element(Geonet.SearchResult.TIMEOUT).setText("100")); // --- set hitsPerPage request.addContent(new Element(Geonet.SearchResult.HITS_PER_PAGE).setText(groupSize + "")); // --- do the search s.search(context, request, config); if (s.getSize() == 0) { log.error("Search failed or returned 0 results, trying again"); s.search(context, request, config); if (s.getSize() == 0) { throw new Exception("Bad luck, Search failed or returned 0 results"); } } if (log.isDebugEnabled()) log.debug("Search returned " + s.getSize() + " hits"); // -- process the hits in groups of groupSize int numberOfHits = Math.min(Integer.parseInt(params.maximumHits), s.getSize()); // -- add from and to placeholders to request request.addContent(new Element("from")); request.addContent(new Element("to")); Element categories = Lib.local.retrieve(dbms, "Categories"); if (log.isDebugEnabled()) log.debug("categories " + Xml.getString(categories)); Element repositories = new Info().getZRepositories(context, settingMan); if (log.isDebugEnabled()) log.debug("repos " + Xml.getString(repositories)); // -- build a map of collection code versus repository name for // -- assigning the categories Map<String, String> codes = new HashMap<String, String>(); Map<String, String> catCodes = new HashMap<String, String>(); // -- add new category for each repository boolean addcateg = false; for (String repo : params.getRepositories()) { Element repoElem = Xml.selectElement(repositories, "record[id='" + repo + "']"); if (repoElem != null) { Element repoId = repoElem.getChild("id"); String repoName = repoElem.getChildText("name"); codes.put( repoId.getAttributeValue("serverCode") + ":" + repoId.getAttributeValue("code"), repoName); // create a result holder for this repository serverResults.getServerResult(repoName); // sanitize the name of the category String categName = repoName.replaceAll("[^\\w]", ""); categName = categName.toLowerCase(); catCodes.put( repoId.getAttributeValue("serverCode") + ":" + repoId.getAttributeValue("code"), categName); if (Xml.selectElement(categories, "record[name='" + categName + "']") == null) { int newId = context.getSerialFactory().getSerial(dbms, "Categories"); dbms.execute("INSERT INTO Categories(id, name) VALUES (?, ?)", newId, categName); Lib.local.insert(dbms, "Categories", newId, repoName); addcateg = true; } } } if (addcateg) dbms.commit(); // --- return only maximum hits as directed by the harvest params int nrGroups = (numberOfHits / groupSize) + 1; for (int i = 1; i <= nrGroups; i++) { int lower = ((i - 1) * groupSize) + 1; int upper = Math.min((i * groupSize), numberOfHits); request.getChild("from").setText("" + lower); request.getChild("to").setText("" + upper); // --- Loading results List<Document> list = s.presentDocuments(context, request, config); // --- Loading categories and groups localCateg = new CategoryMapper(dbms); localGroups = new GroupMapper(dbms); if (log.isDebugEnabled()) log.debug( "There are " + (list.size() - 1) + " children in the results (" + lower + " to " + upper + ")"); boolean transformIt = false; String thisXslt = context.getAppPath() + Geonet.Path.IMPORT_STYLESHEETS + "/"; if (!params.importXslt.equals("none")) { thisXslt = thisXslt + params.importXslt; transformIt = true; } // --- For each record.... for (Document doc : list) { Element md = doc.getRootElement(); String eName = md.getQualifiedName(); if (eName.equals("summary")) continue; // -- Remove existing geonet:info children as for example // -- GeoNetwork Z39.50 server return when full mode // -- an extra element with server info not needed // -- once harvested String colCode = ""; Element info = md.getChild(Edit.RootChild.INFO, Edit.NAMESPACE); if (info != null) { String serverCode = info.getChildText("server"); int colPos = serverCode.indexOf(':'); if (colPos != -1) { colCode = serverCode.substring(0, colPos) + ":" + info.getChildText("collection"); } } md.removeChildren(Edit.RootChild.INFO, Edit.NAMESPACE); String repoName = codes.get(colCode); if (log.isDebugEnabled()) log.debug("Processing record from server " + repoName); HarvestResult result = serverResults.getServerResult(repoName); result.totalMetadata++; if (eName.equals("error")) { log.error("JZKit could not retrieve record - returned " + Xml.getString(md)); result.unretrievable++; continue; } // transform using importxslt if not none if (transformIt) { try { if (log.isDebugEnabled()) log.debug("Before transform: " + Xml.getString(md)); md = Xml.transform(md, thisXslt); if (log.isDebugEnabled()) log.debug("After transform: " + Xml.getString(md)); } catch (Exception e) { System.out.println("Cannot transform XML, ignoring. Error was: " + e.getMessage()); result.badFormat++; continue; // skip this one } } // detect schema, extract uuid and add String schema = dataMan.autodetectSchema(md, null); if (schema == null) { log.warning("Skipping metadata with unknown schema."); result.unknownSchema++; continue; } String uuid = null; try { uuid = dataMan.extractUUID(schema, md); } catch (Exception e) { log.error("Unable to extract UUID: " + e.getMessage()); e.printStackTrace(); } if (uuid == null || uuid.equals("")) { log.warning("Skipping metadata due to failure extracting uuid (uuid null or empty)."); result.unretrievable++; continue; } log.info(" - Adding metadata with " + uuid); // --- generate a new metadata id int id = context.getSerialFactory().getSerial(dbms, "Metadata"); // TODO end confusion about datatypes String id$ = Integer.toString(id); String docType = ""; if (!transformIt && (doc.getDocType() != null)) { docType = Xml.getString(doc.getDocType()); } // --- check for duplicate uuid - violates constraints on metadata table // --- if we attempt insert boolean alreadyAdded = !newUuids.add(uuid); boolean alreadyInDb = (dataMan.getMetadataId(dbms, uuid) != null); if (alreadyAdded || alreadyInDb) { log.error("Uuid " + uuid + " already exists in this set/database - cannot insert"); result.couldNotInsert++; continue; } // // insert metadata // try { String groupOwner = "1", isTemplate = "n", title = null; int owner = 1; String category = null, createDate = new ISODate().toString(), changeDate = createDate; boolean ufo = false, indexImmediate = false; dataMan.insertMetadata( context, dbms, schema, md, id, uuid, owner, groupOwner, params.uuid, isTemplate, docType, title, category, createDate, changeDate, ufo, indexImmediate); } catch (Exception e) { log.error("Unable to insert metadata " + e.getMessage()); e.printStackTrace(); result.couldNotInsert++; continue; } addPrivileges(id$, params.getPrivileges(), localGroups, dataMan, context, dbms, log); addCategories( id$, params.getCategories(), localCateg, dataMan, dbms, context, log, catCodes.get(colCode)); dataMan.setTemplateExt(dbms, id, "n", null); dataMan.setHarvestedExt(dbms, id, params.uuid, params.name); // validate it here if requested if (params.validate) { Document docVal; if (!transformIt && (doc.getDocType() != null)) { docVal = new Document(md, (DocType) doc.getDocType().detach()); } else { docVal = new Document(md); } if (!dataMan.doValidate(dbms, schema, id$, docVal, context.getLanguage())) { result.doesNotValidate++; } } dataMan.indexMetadata(dbms, id$); result.addedMetadata++; } } dbms.commit(); return serverResults; }