Пример #1
0
  /**
   * Update page information.
   *
   * @param node Element for the page.
   * @param page Page.
   * @throws JDOMException
   */
  public void updatePageInformation(Element node, Page page) throws JDOMException {

    // Retrieve basic page information
    Attribute attrPageId = node.getAttribute("pageid");
    if (attrPageId != null) {
      page.setPageId(attrPageId.getValue());
    }
    Attribute attrTitle = node.getAttribute("title");
    if (attrTitle != null) {
      page.setTitle(attrTitle.getValue());
    }
    page.setStartTimestamp(node.getAttributeValue("starttimestamp"));
    Attribute attrRedirect = node.getAttribute("redirect");
    if (attrRedirect != null) {
      page.isRedirect(true);
    }
    Attribute attrMissing = node.getAttribute("missing");
    if (attrMissing != null) {
      page.setExisting(Boolean.FALSE);
    }

    // Retrieve protection information
    XPath xpaProtection = XPath.newInstance("protection/pr[@type=\"edit\"]");
    Element protectionNode = (Element) xpaProtection.selectSingleNode(node);
    if (protectionNode != null) {
      XPath xpaLevel = XPath.newInstance("./@level");
      page.setEditProtectionLevel(xpaLevel.valueOf(protectionNode));
    }
  }
Пример #2
0
  /**
   * Update redirect and missing information of a list of pages.
   *
   * @param root Root element.
   * @param pages List of pages.
   * @throws JDOMException
   */
  public void updateRedirect(Element root, Collection<Page> pages) throws JDOMException {

    // Retrieving redirects
    XPath xpaRedirects = XPath.newInstance("/api/query/redirects/r");
    List listRedirects = xpaRedirects.selectNodes(root);
    XPath xpaFrom = XPath.newInstance("./@from");
    XPath xpaTo = XPath.newInstance("./@to");

    // Retrieving pages
    XPath xpaPages = XPath.newInstance("/api/query/pages");
    Element listPages = (Element) xpaPages.selectSingleNode(root);
    XPath xpaPageId = XPath.newInstance("./@pageid");
    XPath xpaNamespace = XPath.newInstance("./@ns");
    XPath xpaTitle = XPath.newInstance("./@title");

    // Retrieving normalization information
    Map<String, String> normalization = new HashMap<String, String>();
    retrieveNormalization(root, normalization);

    // Analyzing redirects
    Iterator itRedirect = listRedirects.iterator();
    while (itRedirect.hasNext()) {
      Element currentRedirect = (Element) itRedirect.next();
      String fromPage = xpaFrom.valueOf(currentRedirect);
      String toPage = xpaTo.valueOf(currentRedirect);
      for (Page p : pages) {

        // Find if the redirect is already taken into account
        boolean exists = false;
        Iterator<Page> itPage = p.getRedirectIteratorWithPage();
        while (itPage.hasNext()) {
          Page tmp = itPage.next();
          String title = getNormalizedTitle(tmp.getTitle(), normalization);
          if (Page.areSameTitle(title, toPage)) {
            exists = true;
          }
        }

        // Add the redirect if needed
        itPage = p.getRedirectIteratorWithPage();
        while (itPage.hasNext()) {
          Page tmp = itPage.next();
          String title = getNormalizedTitle(tmp.getTitle(), normalization);
          if (!exists && Page.areSameTitle(title, fromPage)) {
            XPath xpaPage = createXPath("page", "title", toPage);
            List listTo = xpaPage.selectNodes(listPages);
            if (!listTo.isEmpty()) {
              Element to = (Element) listTo.get(0);
              Page pageTo =
                  DataManager.getPage(p.getWikipedia(), xpaTitle.valueOf(to), null, null, null);
              pageTo.setNamespace(xpaNamespace.valueOf(to));
              pageTo.setPageId(xpaPageId.valueOf(to));
              p.addRedirect(pageTo);
            }
          }
        }
      }
    }

    // Analyzing missing pages
    for (Page p : pages) {
      Iterator<Page> itPage = p.getRedirectIteratorWithPage();
      while (itPage.hasNext()) {
        Page tmp = itPage.next();
        String title = getNormalizedTitle(tmp.getTitle(), normalization);
        XPath xpaPage = createXPath("page", "title", title);
        Element page = (Element) xpaPage.selectSingleNode(listPages);
        if (page != null) {
          List pageId = xpaPageId.selectNodes(page);
          if ((pageId != null) && (!pageId.isEmpty())) {
            tmp.setExisting(Boolean.TRUE);
          } else {
            Attribute attrMissing = page.getAttribute("missing");
            if (attrMissing != null) {
              tmp.setExisting(Boolean.FALSE);
            }
          }
        }
      }
    }
  }