Пример #1
0
  public GNResultSet(GNXMLQuery query, Object userInfo, Observer[] observers, ServiceContext srvctx)
      throws Exception {
    super(observers);
    this.query = query;
    this.srvxtx = srvctx;

    try {

      GeonetContext gc = (GeonetContext) this.srvxtx.getHandlerContext(Geonet.CONTEXT_NAME);
      SearchManager searchMan = gc.getBean(SearchManager.class);

      metasearcher = searchMan.newSearcher(SearchManager.LUCENE, Geonet.File.SEARCH_Z3950_SERVER);

    } catch (Exception e) {
      if (Log.isDebugEnabled(Geonet.Z3950_SERVER))
        Log.debug(Geonet.Z3950_SERVER, "error constructing GNresult set: " + e);
      e.printStackTrace();
    }
  }
Пример #2
0
  public Z3950ServerResults harvest() throws Exception {
    Set<String> newUuids = new HashSet<String>();

    int groupSize = 10;

    log.info("Retrieving remote metadata information:" + params.uuid);

    Z3950ServerResults serverResults = new Z3950ServerResults();

    // --- Clean all before harvest : Remove/Add mechanism
    localUuids = new UUIDMapper(dbms, params.uuid);

    // --- remove old metadata
    for (String uuid : localUuids.getUUIDs()) {
      String id = localUuids.getID(uuid);
      if (log.isDebugEnabled()) log.debug("  - Removing old metadata before update with id: " + id);
      dataMan.deleteMetadataGroup(context, dbms, id);
      serverResults.locallyRemoved++;
    }

    if (serverResults.locallyRemoved > 0) dbms.commit();

    // --- Search remote node
    MetaSearcher s = searchMan.newSearcher(SearchManager.Z3950, Geonet.File.SEARCH_Z3950_CLIENT);

    ServiceConfig config = new ServiceConfig();

    Element request = new Element("request");

    // --- Z39.50 servers from harvest params
    for (String id : params.getRepositories()) {
      request.addContent(new Element(Geonet.SearchResult.SERVERS).setText(id));
    }

    // --- Z39.50 query from harvest params
    request.addContent(new Element(Geonet.SearchResult.ZQUERY).setText(params.query));

    // --- don't get html presentations (get them later)
    request.addContent(new Element(Geonet.SearchResult.SERVERHTML).setText("off"));

    // --- set timeout to be 100 seconds
    request.addContent(new Element(Geonet.SearchResult.TIMEOUT).setText("100"));
    // --- set hitsPerPage
    request.addContent(new Element(Geonet.SearchResult.HITS_PER_PAGE).setText(groupSize + ""));

    // --- do the search
    s.search(context, request, config);

    if (s.getSize() == 0) {
      log.error("Search failed or returned 0 results, trying again");
      s.search(context, request, config);
      if (s.getSize() == 0) {
        throw new Exception("Bad luck, Search failed or returned 0 results");
      }
    }

    if (log.isDebugEnabled()) log.debug("Search returned " + s.getSize() + " hits");

    // -- process the hits in groups of groupSize
    int numberOfHits = Math.min(Integer.parseInt(params.maximumHits), s.getSize());
    // -- add from and to placeholders to request

    request.addContent(new Element("from"));
    request.addContent(new Element("to"));

    Element categories = Lib.local.retrieve(dbms, "Categories");
    if (log.isDebugEnabled()) log.debug("categories " + Xml.getString(categories));

    Element repositories = new Info().getZRepositories(context, settingMan);
    if (log.isDebugEnabled()) log.debug("repos " + Xml.getString(repositories));

    // -- build a map of collection code versus repository name for
    // -- assigning the categories
    Map<String, String> codes = new HashMap<String, String>();
    Map<String, String> catCodes = new HashMap<String, String>();

    // -- add new category for each repository
    boolean addcateg = false;
    for (String repo : params.getRepositories()) {
      Element repoElem = Xml.selectElement(repositories, "record[id='" + repo + "']");
      if (repoElem != null) {
        Element repoId = repoElem.getChild("id");
        String repoName = repoElem.getChildText("name");
        codes.put(
            repoId.getAttributeValue("serverCode") + ":" + repoId.getAttributeValue("code"),
            repoName);
        // create a result holder for this repository
        serverResults.getServerResult(repoName);

        // sanitize the name of the category
        String categName = repoName.replaceAll("[^\\w]", "");
        categName = categName.toLowerCase();
        catCodes.put(
            repoId.getAttributeValue("serverCode") + ":" + repoId.getAttributeValue("code"),
            categName);

        if (Xml.selectElement(categories, "record[name='" + categName + "']") == null) {
          int newId = context.getSerialFactory().getSerial(dbms, "Categories");
          dbms.execute("INSERT INTO Categories(id, name) VALUES (?, ?)", newId, categName);
          Lib.local.insert(dbms, "Categories", newId, repoName);
          addcateg = true;
        }
      }
    }

    if (addcateg) dbms.commit();

    // --- return only maximum hits as directed by the harvest params
    int nrGroups = (numberOfHits / groupSize) + 1;
    for (int i = 1; i <= nrGroups; i++) {
      int lower = ((i - 1) * groupSize) + 1;
      int upper = Math.min((i * groupSize), numberOfHits);
      request.getChild("from").setText("" + lower);
      request.getChild("to").setText("" + upper);

      // --- Loading results
      List<Document> list = s.presentDocuments(context, request, config);

      // --- Loading categories and groups
      localCateg = new CategoryMapper(dbms);
      localGroups = new GroupMapper(dbms);

      if (log.isDebugEnabled())
        log.debug(
            "There are "
                + (list.size() - 1)
                + " children in the results ("
                + lower
                + " to "
                + upper
                + ")");

      boolean transformIt = false;
      String thisXslt = context.getAppPath() + Geonet.Path.IMPORT_STYLESHEETS + "/";
      if (!params.importXslt.equals("none")) {
        thisXslt = thisXslt + params.importXslt;
        transformIt = true;
      }

      // --- For each record....
      for (Document doc : list) {
        Element md = doc.getRootElement();
        String eName = md.getQualifiedName();
        if (eName.equals("summary")) continue;

        // -- Remove existing geonet:info children as for example
        // -- GeoNetwork Z39.50 server return when full mode
        // -- an extra element with server info not needed
        // -- once harvested
        String colCode = "";
        Element info = md.getChild(Edit.RootChild.INFO, Edit.NAMESPACE);
        if (info != null) {
          String serverCode = info.getChildText("server");
          int colPos = serverCode.indexOf(':');
          if (colPos != -1) {
            colCode = serverCode.substring(0, colPos) + ":" + info.getChildText("collection");
          }
        }
        md.removeChildren(Edit.RootChild.INFO, Edit.NAMESPACE);
        String repoName = codes.get(colCode);
        if (log.isDebugEnabled()) log.debug("Processing record from server " + repoName);
        HarvestResult result = serverResults.getServerResult(repoName);
        result.totalMetadata++;

        if (eName.equals("error")) {
          log.error("JZKit could not retrieve record - returned " + Xml.getString(md));
          result.unretrievable++;
          continue;
        }

        // transform using importxslt if not none
        if (transformIt) {
          try {
            if (log.isDebugEnabled()) log.debug("Before transform: " + Xml.getString(md));
            md = Xml.transform(md, thisXslt);
            if (log.isDebugEnabled()) log.debug("After transform: " + Xml.getString(md));
          } catch (Exception e) {
            System.out.println("Cannot transform XML, ignoring. Error was: " + e.getMessage());
            result.badFormat++;
            continue; // skip this one
          }
        }

        // detect schema, extract uuid and add
        String schema = dataMan.autodetectSchema(md, null);
        if (schema == null) {
          log.warning("Skipping metadata with unknown schema.");
          result.unknownSchema++;
          continue;
        }

        String uuid = null;
        try {
          uuid = dataMan.extractUUID(schema, md);
        } catch (Exception e) {
          log.error("Unable to extract UUID: " + e.getMessage());
          e.printStackTrace();
        }

        if (uuid == null || uuid.equals("")) {
          log.warning("Skipping metadata due to failure extracting uuid (uuid null or empty).");
          result.unretrievable++;
          continue;
        }

        log.info("  - Adding metadata with " + uuid);

        // --- generate a new metadata id

        int id = context.getSerialFactory().getSerial(dbms, "Metadata");
        // TODO end confusion about datatypes
        String id$ = Integer.toString(id);

        String docType = "";
        if (!transformIt && (doc.getDocType() != null)) {
          docType = Xml.getString(doc.getDocType());
        }

        // --- check for duplicate uuid - violates constraints on metadata table
        // --- if we attempt insert
        boolean alreadyAdded = !newUuids.add(uuid);
        boolean alreadyInDb = (dataMan.getMetadataId(dbms, uuid) != null);
        if (alreadyAdded || alreadyInDb) {
          log.error("Uuid " + uuid + " already exists in this set/database - cannot insert");
          result.couldNotInsert++;
          continue;
        }

        //
        // insert metadata
        //
        try {
          String groupOwner = "1", isTemplate = "n", title = null;
          int owner = 1;
          String category = null, createDate = new ISODate().toString(), changeDate = createDate;
          boolean ufo = false, indexImmediate = false;
          dataMan.insertMetadata(
              context,
              dbms,
              schema,
              md,
              id,
              uuid,
              owner,
              groupOwner,
              params.uuid,
              isTemplate,
              docType,
              title,
              category,
              createDate,
              changeDate,
              ufo,
              indexImmediate);

        } catch (Exception e) {
          log.error("Unable to insert metadata " + e.getMessage());
          e.printStackTrace();
          result.couldNotInsert++;
          continue;
        }

        addPrivileges(id$, params.getPrivileges(), localGroups, dataMan, context, dbms, log);
        addCategories(
            id$,
            params.getCategories(),
            localCateg,
            dataMan,
            dbms,
            context,
            log,
            catCodes.get(colCode));

        dataMan.setTemplateExt(dbms, id, "n", null);
        dataMan.setHarvestedExt(dbms, id, params.uuid, params.name);

        // validate it here if requested
        if (params.validate) {
          Document docVal;
          if (!transformIt && (doc.getDocType() != null)) {
            docVal = new Document(md, (DocType) doc.getDocType().detach());
          } else {
            docVal = new Document(md);
          }

          if (!dataMan.doValidate(dbms, schema, id$, docVal, context.getLanguage())) {
            result.doesNotValidate++;
          }
        }

        dataMan.indexMetadata(dbms, id$);

        result.addedMetadata++;
      }
    }

    dbms.commit();
    return serverResults;
  }
  /**
   * Create a MEF2 file in ZIP format.
   *
   * @param context
   * @param uuids List of records to export.
   * @param format {@link Format} to export.
   * @param skipUUID
   * @param stylePath
   * @return MEF2 File
   * @throws Exception
   */
  public static Path doExport(
      ServiceContext context,
      Set<String> uuids,
      Format format,
      boolean skipUUID,
      Path stylePath,
      boolean resolveXlink,
      boolean removeXlinkAttribute)
      throws Exception {

    Path file = Files.createTempFile("mef-", ".mef");
    SearchManager searchManager = context.getBean(SearchManager.class);
    String contextLang =
        context.getLanguage() == null ? Geonet.DEFAULT_LANGUAGE : context.getLanguage();
    try (FileSystem zipFs = ZipUtil.createZipFs(file);
        IndexAndTaxonomy indexReaderAndTaxonomy = searchManager.getNewIndexReader(contextLang); ) {
      StringBuilder csvBuilder =
          new StringBuilder(
              "\"schema\";\"uuid\";\"id\";\"type\";\"isHarvested\";\"title\";\"abstract\"\n");
      Element html =
          new Element("html")
              .addContent(
                  new Element("head")
                      .addContent(
                          Arrays.asList(
                              new Element("title").setText("Export Index"),
                              new Element("link")
                                  .setAttribute("rel", "stylesheet")
                                  .setAttribute(
                                      "href",
                                      "https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"),
                              new Element("style")
                                  .setText(
                                      "body {\n"
                                          + "  padding-left: 10px;\n"
                                          + "}\n"
                                          + "p.abstract {\n"
                                          + "  font-style: italic;\n"
                                          + "}\n"
                                          + ".entry {\n"
                                          + "  padding: 20px;\n"
                                          + "  margin: 20px 0;\n"
                                          + "  border: 1px solid #eee;\n"
                                          + "  border-left-width: 5px;\n"
                                          + "  border-radius: 3px;\n"
                                          + "  border-left-color: #1b809e;\n"
                                          + "}\n"
                                          + ".entry:hover {\n"
                                          + "  background-color: #f5f5f5;\n"
                                          + "}\n"))));
      Element body = new Element("body");
      html.addContent(body);
      for (Object uuid1 : uuids) {
        String uuid = (String) uuid1;
        IndexSearcher searcher = new IndexSearcher(indexReaderAndTaxonomy.indexReader);
        BooleanQuery query = new BooleanQuery();
        query.add(new BooleanClause(new TermQuery(new Term(UUID, uuid)), BooleanClause.Occur.MUST));
        query.add(
            new BooleanClause(
                new TermQuery(new Term(LOCALE, contextLang)), BooleanClause.Occur.SHOULD));
        TopDocs topDocs = searcher.search(query, NoFilterFilter.instance(), 5);
        String mdSchema = null, mdTitle = null, mdAbstract = null, id = null, isHarvested = null;
        MetadataType mdType = null;

        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
          Document doc = searcher.doc(scoreDoc.doc);
          String locale = doc.get(Geonet.IndexFieldNames.LOCALE);
          if (mdSchema == null) {
            mdSchema = doc.get(Geonet.IndexFieldNames.SCHEMA);
          }
          if (mdTitle == null || contextLang.equals(locale)) {
            mdTitle = doc.get(LuceneIndexField.TITLE);
          }
          if (mdAbstract == null || contextLang.equals(locale)) {
            mdAbstract = doc.get(LuceneIndexField.ABSTRACT);
          }
          if (id == null) {
            id = doc.get(LuceneIndexField.ID);
          }
          if (isHarvested == null) {
            isHarvested = doc.get(Geonet.IndexFieldNames.IS_HARVESTED);
          }
          if (mdType == null) {
            String tmp = doc.get(LuceneIndexField.IS_TEMPLATE);
            mdType = MetadataType.lookup(tmp.charAt(0));
          }
        }

        if (mdType == null) {
          mdType = MetadataType.METADATA;
        }
        csvBuilder
            .append('"')
            .append(cleanForCsv(mdSchema))
            .append("\";\"")
            .append(cleanForCsv(uuid))
            .append("\";\"")
            .append(cleanForCsv(id))
            .append("\";\"")
            .append(mdType.toString())
            .append("\";\"")
            .append(cleanForCsv(isHarvested))
            .append("\";\"")
            .append(cleanForCsv(mdTitle))
            .append("\";\"")
            .append(cleanForCsv(mdAbstract))
            .append("\"\n");

        body.addContent(
            new Element("div")
                .setAttribute("class", "entry")
                .addContent(
                    Arrays.asList(
                        new Element("h4")
                            .setAttribute("class", "title")
                            .addContent(
                                new Element("a")
                                    .setAttribute("href", uuid)
                                    .setText(cleanXml(mdTitle))),
                        new Element("p")
                            .setAttribute("class", "abstract")
                            .setText(cleanXml(mdAbstract)),
                        new Element("table")
                            .setAttribute("class", "table")
                            .addContent(
                                Arrays.asList(
                                    new Element("thead")
                                        .addContent(
                                            new Element("tr")
                                                .addContent(
                                                    Arrays.asList(
                                                        new Element("th").setText("ID"),
                                                        new Element("th").setText("UUID"),
                                                        new Element("th").setText("Type"),
                                                        new Element("th").setText("isHarvested")))),
                                    new Element("tbody")
                                        .addContent(
                                            new Element("tr")
                                                .addContent(
                                                    Arrays.asList(
                                                        new Element("td")
                                                            .setAttribute("class", "id")
                                                            .setText(id),
                                                        new Element("td")
                                                            .setAttribute("class", "uuid")
                                                            .setText(
                                                                xmlContentEscaper().escape(uuid)),
                                                        new Element("td")
                                                            .setAttribute("class", "type")
                                                            .setText(mdType.toString()),
                                                        new Element("td")
                                                            .setAttribute("class", "isHarvested")
                                                            .setText(isHarvested)))))))));
        createMetadataFolder(
            context, uuid, zipFs, skipUUID, stylePath, format, resolveXlink, removeXlinkAttribute);
      }
      Files.write(zipFs.getPath("/index.csv"), csvBuilder.toString().getBytes(Constants.CHARSET));
      Files.write(zipFs.getPath("/index.html"), Xml.getString(html).getBytes(Constants.CHARSET));
    }
    return file;
  }
Пример #4
0
  public static List<Element> handlePropertyName(
      String[] propertyNames,
      ServiceContext context,
      boolean freq,
      int maxRecords,
      String cswServiceSpecificConstraint,
      LuceneConfig luceneConfig)
      throws Exception {

    List<Element> domainValuesList = null;

    if (Log.isDebugEnabled(Geonet.CSW))
      Log.debug(
          Geonet.CSW,
          "Handling property names '"
              + Arrays.toString(propertyNames)
              + "' with max records of "
              + maxRecords);

    for (int i = 0; i < propertyNames.length; i++) {

      if (i == 0) domainValuesList = new ArrayList<Element>();

      // Initialize list of values element.
      Element listOfValues = null;

      // Generate DomainValues element
      Element domainValues = new Element("DomainValues", Csw.NAMESPACE_CSW);

      // FIXME what should be the type ???
      domainValues.setAttribute("type", "csw:Record");

      String property = propertyNames[i].trim();

      // Set propertyName in any case.
      Element pn = new Element("PropertyName", Csw.NAMESPACE_CSW);
      domainValues.addContent(pn.setText(property));

      GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME);
      SearchManager sm = gc.getSearchmanager();

      IndexAndTaxonomy indexAndTaxonomy = sm.getNewIndexReader(null);
      try {
        GeonetworkMultiReader reader = indexAndTaxonomy.indexReader;
        BooleanQuery groupsQuery = (BooleanQuery) CatalogSearcher.getGroupsQuery(context);
        BooleanQuery query = null;

        // Apply CSW service specific constraint
        if (StringUtils.isNotEmpty(cswServiceSpecificConstraint)) {
          Query constraintQuery =
              CatalogSearcher.getCswServiceSpecificConstraintQuery(
                  cswServiceSpecificConstraint, luceneConfig);

          query = new BooleanQuery();

          BooleanClause.Occur occur = LuceneUtils.convertRequiredAndProhibitedToOccur(true, false);

          query.add(groupsQuery, occur);
          query.add(constraintQuery, occur);

        } else {
          query = groupsQuery;
        }

        List<Pair<String, Boolean>> sortFields =
            Collections.singletonList(Pair.read(Geonet.SearchResult.SortBy.RELEVANCE, true));
        Sort sort = LuceneSearcher.makeSort(sortFields, context.getLanguage(), false);
        CachingWrapperFilter filter = null;

        Pair<TopDocs, Element> searchResults =
            LuceneSearcher.doSearchAndMakeSummary(
                maxRecords,
                0,
                maxRecords,
                context.getLanguage(),
                null,
                reader,
                query,
                filter,
                sort,
                null,
                false,
                false,
                false,
                false // Scoring is useless for GetDomain operation
                );
        TopDocs hits = searchResults.one();

        try {
          // Get mapped lucene field in CSW configuration
          String indexField = CatalogConfiguration.getFieldMapping().get(property.toLowerCase());
          if (indexField != null) property = indexField;

          // check if params asked is in the index using getFieldNames ?
          FieldInfos fi = new SlowCompositeReaderWrapper(reader).getFieldInfos();
          if (fi.fieldInfo(property) == null) continue;

          boolean isRange = false;
          if (CatalogConfiguration.getGetRecordsRangeFields().contains(property)) isRange = true;

          if (isRange) listOfValues = new Element("RangeOfValues", Csw.NAMESPACE_CSW);
          else listOfValues = new Element("ListOfValues", Csw.NAMESPACE_CSW);

          Set<String> fields = new HashSet<String>();
          fields.add(property);
          fields.add("_isTemplate");

          // parse each document in the index
          String[] fieldValues;
          SortedSet<String> sortedValues = new TreeSet<String>();
          HashMap<String, Integer> duplicateValues = new HashMap<String, Integer>();
          for (int j = 0; j < hits.scoreDocs.length; j++) {
            DocumentStoredFieldVisitor selector = new DocumentStoredFieldVisitor(fields);
            reader.document(hits.scoreDocs[j].doc, selector);
            Document doc = selector.getDocument();

            // Skip templates and subTemplates
            String[] isTemplate = doc.getValues("_isTemplate");
            if (isTemplate[0] != null && !isTemplate[0].equals("n")) continue;

            // Get doc values for specified property
            fieldValues = doc.getValues(property);
            if (fieldValues == null) continue;

            addtoSortedSet(sortedValues, fieldValues, duplicateValues);
          }

          SummaryComparator valuesComparator =
              new SummaryComparator(SortOption.FREQUENCY, Type.STRING, context.getLanguage(), null);
          TreeSet<Map.Entry<String, Integer>> sortedValuesFrequency =
              new TreeSet<Map.Entry<String, Integer>>(valuesComparator);
          sortedValuesFrequency.addAll(duplicateValues.entrySet());

          if (freq) return createValuesByFrequency(sortedValuesFrequency);
          else listOfValues.addContent(createValuesElement(sortedValues, isRange));

        } finally {
          // any children means that the catalog was unable to determine
          // anything about the specified parameter
          if (listOfValues != null && listOfValues.getChildren().size() != 0)
            domainValues.addContent(listOfValues);

          // Add current DomainValues to the list
          domainValuesList.add(domainValues);
        }
      } finally {
        sm.releaseIndexReader(indexAndTaxonomy);
      }
    }
    return domainValuesList;
  }