/** * Parse XML that contains a Sitemap Index. Example Sitemap Index: * * <p><?xml version="1.0" encoding="UTF-8"?> <sitemapindex * xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <sitemap> * <loc>http://www.example.com/sitemap1.xml.gz</loc> <lastmod>2004-10-01T18:23:17+00:00</lastmod> * </sitemap> <sitemap> <loc>http://www.example.com/sitemap2.xml.gz</loc> * <lastmod>2005-01-01</lastmod> </sitemap> </sitemapindex> * * @param url - URL of Sitemap Index * @param nodeList */ private SiteMapIndex parseSitemapIndex(URL url, NodeList nodeList) { LOG.debug("Parsing Sitemap Index"); SiteMapIndex sitemapIndex = new SiteMapIndex(url); sitemapIndex.setType(SitemapType.INDEX); // Loop through the <sitemap>s for (int i = 0; i < nodeList.getLength() && i < MAX_URLS; i++) { Node firstNode = nodeList.item(i); if (firstNode.getNodeType() == Node.ELEMENT_NODE) { Element elem = (Element) firstNode; String loc = getElementValue(elem, "loc"); // try the text content when no loc element // has been specified if (loc == null) { loc = elem.getTextContent().trim(); } try { URL sitemapUrl = new URL(loc); String lastmod = getElementValue(elem, "lastmod"); Date lastModified = SiteMap.convertToDate(lastmod); // Right now we are not worried about sitemapUrls that point // to different websites. SiteMap s = new SiteMap(sitemapUrl, lastModified); sitemapIndex.addSitemap(s); LOG.debug(" {}. {}", (i + 1), s); } catch (MalformedURLException e) { LOG.trace("Don't create an entry with a bad URL", e); LOG.debug("Bad url: [{}]", loc); } } } sitemapIndex.setProcessed(true); return sitemapIndex; }
/** * Set when this URL was last modified. * * @param lastModified */ public void setLastModified(String lastModified) { this.lastModified = SiteMap.convertToDate(lastModified); }