/**
   * Read urn.
   *
   * @param file the file
   * @return The URN specified in the METS file or null if the METS file doesn't specify an URN
   * @throws IOException Signals that an I/O exception has occurred.
   * @throws ParseException the parse exception
   * @author Thomas Kleinke
   */
  public String readURN(File file) throws IOException, ParseException {

    FileInputStream fileInputStream = new FileInputStream(file);
    BOMInputStream bomInputStream = new BOMInputStream(fileInputStream);

    XMLReader xmlReader = null;
    SAXParserFactory spf = SAXParserFactory.newInstance();
    try {
      xmlReader = spf.newSAXParser().getXMLReader();
    } catch (Exception e) {
      fileInputStream.close();
      bomInputStream.close();
      throw new IOException("Error creating SAX parser", e);
    }
    xmlReader.setErrorHandler(err);
    NodeFactory nodeFactory = new PremisXmlReaderNodeFactory();
    Builder parser = new Builder(xmlReader, false, nodeFactory);
    logger.trace("Successfully built builder and XML reader");

    try {
      String urn = null;

      Document doc = parser.build(bomInputStream);
      Element root = doc.getRootElement();

      Element dmdSecEl = root.getFirstChildElement("dmdSec", METS_NS);
      if (dmdSecEl == null) return null;

      Element mdWrapEl = dmdSecEl.getFirstChildElement("mdWrap", METS_NS);
      if (mdWrapEl == null) return null;

      Element xmlDataEl = mdWrapEl.getFirstChildElement("xmlData", METS_NS);
      if (xmlDataEl == null) return null;

      Element modsEl = xmlDataEl.getFirstChildElement("mods", MODS_NS);
      if (modsEl == null) return null;

      Elements identifierEls = modsEl.getChildElements("identifier", MODS_NS);
      for (int i = 0; i < identifierEls.size(); i++) {
        Element element = identifierEls.get(i);
        Attribute attribute = element.getAttribute("type");
        if (attribute.getValue().toLowerCase().equals("urn")) urn = element.getValue();
      }

      if (urn != null && urn.equals("")) urn = null;

      return urn;
    } catch (ValidityException ve) {
      throw new IOException(ve);
    } catch (ParsingException pe) {
      throw new IOException(pe);
    } catch (IOException ie) {
      throw new IOException(ie);
    } finally {
      fileInputStream.close();
      bomInputStream.close();
    }
  }
Пример #2
0
  /**
   * Decompress the gzipped content and process the resulting XML Sitemap.
   *
   * @param url - URL of the gzipped content
   * @param response - Gzipped content
   * @throws MalformedURLException
   * @throws IOException
   * @throws UnknownFormatException
   */
  private AbstractSiteMap processGzip(URL url, byte[] response)
      throws MalformedURLException, IOException, UnknownFormatException {

    LOG.debug("Processing gzip");

    AbstractSiteMap smi;
    InputStream is = new ByteArrayInputStream(response);

    // Remove .gz ending
    String xmlUrl = url.toString().replaceFirst("\\.gz$", "");

    LOG.debug("XML url = {}", xmlUrl);

    BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
    InputSource in = new InputSource(decompressed);
    in.setSystemId(xmlUrl);
    smi = processXml(url, in);
    decompressed.close();
    return smi;
  }