/** * Update page information. * * @param node Element for the page. * @param page Page. * @throws JDOMException */ public void updatePageInformation(Element node, Page page) throws JDOMException { // Retrieve basic page information Attribute attrPageId = node.getAttribute("pageid"); if (attrPageId != null) { page.setPageId(attrPageId.getValue()); } Attribute attrTitle = node.getAttribute("title"); if (attrTitle != null) { page.setTitle(attrTitle.getValue()); } page.setStartTimestamp(node.getAttributeValue("starttimestamp")); Attribute attrRedirect = node.getAttribute("redirect"); if (attrRedirect != null) { page.isRedirect(true); } Attribute attrMissing = node.getAttribute("missing"); if (attrMissing != null) { page.setExisting(Boolean.FALSE); } // Retrieve protection information XPath xpaProtection = XPath.newInstance("protection/pr[@type=\"edit\"]"); Element protectionNode = (Element) xpaProtection.selectSingleNode(node); if (protectionNode != null) { XPath xpaLevel = XPath.newInstance("./@level"); page.setEditProtectionLevel(xpaLevel.valueOf(protectionNode)); } }
/** * Update redirect and missing information of a list of pages. * * @param root Root element. * @param pages List of pages. * @throws JDOMException */ public void updateRedirect(Element root, Collection<Page> pages) throws JDOMException { // Retrieving redirects XPath xpaRedirects = XPath.newInstance("/api/query/redirects/r"); List listRedirects = xpaRedirects.selectNodes(root); XPath xpaFrom = XPath.newInstance("./@from"); XPath xpaTo = XPath.newInstance("./@to"); // Retrieving pages XPath xpaPages = XPath.newInstance("/api/query/pages"); Element listPages = (Element) xpaPages.selectSingleNode(root); XPath xpaPageId = XPath.newInstance("./@pageid"); XPath xpaNamespace = XPath.newInstance("./@ns"); XPath xpaTitle = XPath.newInstance("./@title"); // Retrieving normalization information Map<String, String> normalization = new HashMap<String, String>(); retrieveNormalization(root, normalization); // Analyzing redirects Iterator itRedirect = listRedirects.iterator(); while (itRedirect.hasNext()) { Element currentRedirect = (Element) itRedirect.next(); String fromPage = xpaFrom.valueOf(currentRedirect); String toPage = xpaTo.valueOf(currentRedirect); for (Page p : pages) { // Find if the redirect is already taken into account boolean exists = false; Iterator<Page> itPage = p.getRedirectIteratorWithPage(); while (itPage.hasNext()) { Page tmp = itPage.next(); String title = getNormalizedTitle(tmp.getTitle(), normalization); if (Page.areSameTitle(title, toPage)) { exists = true; } } // Add the redirect if needed itPage = p.getRedirectIteratorWithPage(); while (itPage.hasNext()) { Page tmp = itPage.next(); String title = getNormalizedTitle(tmp.getTitle(), normalization); if (!exists && Page.areSameTitle(title, fromPage)) { XPath xpaPage = createXPath("page", "title", toPage); List listTo = xpaPage.selectNodes(listPages); if (!listTo.isEmpty()) { Element to = (Element) listTo.get(0); Page pageTo = DataManager.getPage(p.getWikipedia(), xpaTitle.valueOf(to), null, null, null); pageTo.setNamespace(xpaNamespace.valueOf(to)); pageTo.setPageId(xpaPageId.valueOf(to)); p.addRedirect(pageTo); } } } } } // Analyzing missing pages for (Page p : pages) { Iterator<Page> itPage = p.getRedirectIteratorWithPage(); while (itPage.hasNext()) { Page tmp = itPage.next(); String title = getNormalizedTitle(tmp.getTitle(), normalization); XPath xpaPage = createXPath("page", "title", title); Element page = (Element) xpaPage.selectSingleNode(listPages); if (page != null) { List pageId = xpaPageId.selectNodes(page); if ((pageId != null) && (!pageId.isEmpty())) { tmp.setExisting(Boolean.TRUE); } else { Attribute attrMissing = page.getAttribute("missing"); if (attrMissing != null) { tmp.setExisting(Boolean.FALSE); } } } } } }