/** * Read urn. * * @param file the file * @return The URN specified in the METS file or null if the METS file doesn't specify an URN * @throws IOException Signals that an I/O exception has occurred. * @throws ParseException the parse exception * @author Thomas Kleinke */ public String readURN(File file) throws IOException, ParseException { FileInputStream fileInputStream = new FileInputStream(file); BOMInputStream bomInputStream = new BOMInputStream(fileInputStream); XMLReader xmlReader = null; SAXParserFactory spf = SAXParserFactory.newInstance(); try { xmlReader = spf.newSAXParser().getXMLReader(); } catch (Exception e) { fileInputStream.close(); bomInputStream.close(); throw new IOException("Error creating SAX parser", e); } xmlReader.setErrorHandler(err); NodeFactory nodeFactory = new PremisXmlReaderNodeFactory(); Builder parser = new Builder(xmlReader, false, nodeFactory); logger.trace("Successfully built builder and XML reader"); try { String urn = null; Document doc = parser.build(bomInputStream); Element root = doc.getRootElement(); Element dmdSecEl = root.getFirstChildElement("dmdSec", METS_NS); if (dmdSecEl == null) return null; Element mdWrapEl = dmdSecEl.getFirstChildElement("mdWrap", METS_NS); if (mdWrapEl == null) return null; Element xmlDataEl = mdWrapEl.getFirstChildElement("xmlData", METS_NS); if (xmlDataEl == null) return null; Element modsEl = xmlDataEl.getFirstChildElement("mods", MODS_NS); if (modsEl == null) return null; Elements identifierEls = modsEl.getChildElements("identifier", MODS_NS); for (int i = 0; i < identifierEls.size(); i++) { Element element = identifierEls.get(i); Attribute attribute = element.getAttribute("type"); if (attribute.getValue().toLowerCase().equals("urn")) urn = element.getValue(); } if (urn != null && urn.equals("")) urn = null; return urn; } catch (ValidityException ve) { throw new IOException(ve); } catch (ParsingException pe) { throw new IOException(pe); } catch (IOException ie) { throw new IOException(ie); } finally { fileInputStream.close(); bomInputStream.close(); } }
/** * Decompress the gzipped content and process the resulting XML Sitemap. * * @param url - URL of the gzipped content * @param response - Gzipped content * @throws MalformedURLException * @throws IOException * @throws UnknownFormatException */ private AbstractSiteMap processGzip(URL url, byte[] response) throws MalformedURLException, IOException, UnknownFormatException { LOG.debug("Processing gzip"); AbstractSiteMap smi; InputStream is = new ByteArrayInputStream(response); // Remove .gz ending String xmlUrl = url.toString().replaceFirst("\\.gz$", ""); LOG.debug("XML url = {}", xmlUrl); BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is)); InputSource in = new InputSource(decompressed); in.setSystemId(xmlUrl); smi = processXml(url, in); decompressed.close(); return smi; }