예제 #1
0
 /**
  * Retrieve all pages it is embedded in of a list of pages.
  * 
  * @param wikipedia Wikipedia.
  * @param pageList List of pages.
  * @param namespaces List of name spaces to look into.
  * @param limit Flag indicating if the number of results should be limited.
  * @throws APIException
  */
 @SuppressWarnings("unchecked")
 public List<Page> retrieveAllEmbeddedIn(
     EnumWikipedia wikipedia, List<Page> pageList,
     List<Integer> namespaces,
     boolean limit) throws APIException {
   if ((pageList == null) || (pageList.size() == 0)) {
     return null;
   }
   final API api = APIFactory.getAPI();
   for (final Page page : pageList) {
     addTask(new EmbeddedInCallable(wikipedia, this, api, page, namespaces, limit));
   }
   List<Page> resultList = new ArrayList<Page>();
   while (hasRemainingTask() && !shouldStop()) {
     Object result = getNextResult();
     if (result instanceof List<?>) {
       List<Page> pageResult = (List<Page>) result;
       for (Page page : pageResult) {
         resultList.add(page);
       }
     }
   }
   Collections.sort(resultList);
   Iterator<Page> itPage = resultList.iterator();
   Page previousPage = null;
   while (itPage.hasNext()) {
     Page page = itPage.next();
     if ((previousPage != null) &&
         (Page.areSameTitle(previousPage.getTitle(), page.getTitle()))) {
       itPage.remove();
     } else {
       previousPage = page;
     }
   }
   return resultList;
 }
예제 #2
0
  /**
   * Update redirect and missing information of a list of pages.
   *
   * @param root Root element.
   * @param pages List of pages.
   * @throws JDOMException
   */
  public void updateRedirect(Element root, Collection<Page> pages) throws JDOMException {

    // Retrieving redirects
    XPath xpaRedirects = XPath.newInstance("/api/query/redirects/r");
    List listRedirects = xpaRedirects.selectNodes(root);
    XPath xpaFrom = XPath.newInstance("./@from");
    XPath xpaTo = XPath.newInstance("./@to");

    // Retrieving pages
    XPath xpaPages = XPath.newInstance("/api/query/pages");
    Element listPages = (Element) xpaPages.selectSingleNode(root);
    XPath xpaPageId = XPath.newInstance("./@pageid");
    XPath xpaNamespace = XPath.newInstance("./@ns");
    XPath xpaTitle = XPath.newInstance("./@title");

    // Retrieving normalization information
    Map<String, String> normalization = new HashMap<String, String>();
    retrieveNormalization(root, normalization);

    // Analyzing redirects
    Iterator itRedirect = listRedirects.iterator();
    while (itRedirect.hasNext()) {
      Element currentRedirect = (Element) itRedirect.next();
      String fromPage = xpaFrom.valueOf(currentRedirect);
      String toPage = xpaTo.valueOf(currentRedirect);
      for (Page p : pages) {

        // Find if the redirect is already taken into account
        boolean exists = false;
        Iterator<Page> itPage = p.getRedirectIteratorWithPage();
        while (itPage.hasNext()) {
          Page tmp = itPage.next();
          String title = getNormalizedTitle(tmp.getTitle(), normalization);
          if (Page.areSameTitle(title, toPage)) {
            exists = true;
          }
        }

        // Add the redirect if needed
        itPage = p.getRedirectIteratorWithPage();
        while (itPage.hasNext()) {
          Page tmp = itPage.next();
          String title = getNormalizedTitle(tmp.getTitle(), normalization);
          if (!exists && Page.areSameTitle(title, fromPage)) {
            XPath xpaPage = createXPath("page", "title", toPage);
            List listTo = xpaPage.selectNodes(listPages);
            if (!listTo.isEmpty()) {
              Element to = (Element) listTo.get(0);
              Page pageTo =
                  DataManager.getPage(p.getWikipedia(), xpaTitle.valueOf(to), null, null, null);
              pageTo.setNamespace(xpaNamespace.valueOf(to));
              pageTo.setPageId(xpaPageId.valueOf(to));
              p.addRedirect(pageTo);
            }
          }
        }
      }
    }

    // Analyzing missing pages
    for (Page p : pages) {
      Iterator<Page> itPage = p.getRedirectIteratorWithPage();
      while (itPage.hasNext()) {
        Page tmp = itPage.next();
        String title = getNormalizedTitle(tmp.getTitle(), normalization);
        XPath xpaPage = createXPath("page", "title", title);
        Element page = (Element) xpaPage.selectSingleNode(listPages);
        if (page != null) {
          List pageId = xpaPageId.selectNodes(page);
          if ((pageId != null) && (!pageId.isEmpty())) {
            tmp.setExisting(Boolean.TRUE);
          } else {
            Attribute attrMissing = page.getAttribute("missing");
            if (attrMissing != null) {
              tmp.setExisting(Boolean.FALSE);
            }
          }
        }
      }
    }
  }