public void testArticleCountAndType() throws Exception {
    int expCount = 28;
    PluginTestUtil.crawlSimAu(sau);
    String pat1 = "branch(\\d+)/(\\d+file\\.html)";
    String rep1 = "aps/journal/v123/n$1/full/$2";
    PluginTestUtil.copyAu(sau, nau, ".*[^.][^p][^d][^f]$", pat1, rep1);
    String pat2 = "branch(\\d+)/(\\d+file\\.pdf)";
    String rep2 = "aps/journal/v123/n$1/pdf/$2";
    PluginTestUtil.copyAu(sau, nau, ".*\\.pdf$", pat2, rep2);

    // Remove some URLs
    int deleted = 0;
    for (Iterator it = nau.getAuCachedUrlSet().contentHashIterator(); it.hasNext(); ) {
      CachedUrlSetNode cusn = (CachedUrlSetNode) it.next();
      if (cusn instanceof CachedUrl) {
        CachedUrl cu = (CachedUrl) cusn;
        String url = cu.getUrl();
        if (url.contains("/journal/")
            && (url.endsWith("1file.html") || url.endsWith("2file.pdf"))) {
          deleteBlock(cu);
          ++deleted;
        }
      }
    }
    assertEquals(8, deleted);

    Iterator<ArticleFiles> it = nau.getArticleIterator();
    int count = 0;
    int countHtmlOnly = 0;
    int countPdfOnly = 0;
    while (it.hasNext()) {
      ArticleFiles af = it.next();
      log.info(af.toString());
      CachedUrl cu = af.getFullTextCu();
      String url = cu.getUrl();
      assertNotNull(cu);
      String contentType = cu.getContentType();
      log.debug("count " + count + " url " + url + " " + contentType);
      count++;
      if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == null) {
        ++countHtmlOnly;
      }
      if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == url) {
        ++countPdfOnly;
      }
    }
    log.debug("Article count is " + count);
    assertEquals(expCount, count);
    assertEquals(4, countHtmlOnly);
    assertEquals(4, countPdfOnly);
  }
  public void testCreateArticleFiles() throws Exception {
    // create urls to store in UrlCacher
    String[] au_urls = {
      BASE_URL + "pc/doifinder/10.1057/9780123456789",
      BASE_URL + "pc/browse/citationExport?doi=10.1057/9780123456789",
      BASE_URL + "pc/doifinder/download/10.1057/9780123456789",
      BASE_URL + "pc/doifinder/download/10.1057/9780123456789.epub",
      BASE_URL + "pc/doifinder/10.1057/9781234567890",
      BASE_URL + "pc/browse/citationExport?doi=10.1057/9781234567890",
      BASE_URL + "pc/doifinder/download/10.1057/9781234567890",
      BASE_URL + "pc/doifinder/download/10.1057/9781234567890.epub",
      BASE_URL + "pc/doifinder/10.1057/9782345678901",
      BASE_URL + "pc/browse/citationExport?doi=10.1057/9782345678901",
      BASE_URL + "pc/doifinder/download/10.1057/9782345678901",
      BASE_URL + "pc/doifinder/download/10.1057/9782345678901.epub"
    };
    /*
    // get cached url content type and properties from simulated contents
    // for UrclCacher.storeContent()
    CachedUrl cuPdf = null;
    CachedUrl cuHtml = null;
    CachedUrl cuEpub = null;
    for (CachedUrl cu : AuUtil.getCuIterable(sau)) {
      if (cuPdf == null
          && cu.getContentType().toLowerCase().startsWith(Constants.MIME_TYPE_PDF)) {
        log.info("pdf contenttype: " + cu.getContentType());
        cuPdf = cu;
      } else if (cuHtml == null
          && cu.getContentType().toLowerCase().startsWith(Constants.MIME_TYPE_HTML)) {
        log.info("html contenttype: " + cu.getContentType());
        cuHtml = cu;
      } else if (cuEpub == null
          && cu.getContentType().toLowerCase().startsWith("application/epub")) {
        log.info("epub contenttype: " + cu.getContentType());
        cuEpub = cu;
      }
    	if (cuPdf != null && cuHtml != null && cuEpub != null) {
    	  break;
    	}
    }
    */
    CachedUrl cu;
    // store content using cached url content type and properties
    for (String url : au_urls) {
      if (url.contains("download") && !url.endsWith(".epub")) {
        storeContent(random_content_stream, pdfHeader, url);
      } else if (url.contains("download")) { // epub
        storeContent(random_content_stream, epubHeader, url);
      } else {
        storeContent(random_content_stream, textHeader, url);
      }
    }

    // book 9780123456789
    ArticleFiles af1 = new ArticleFiles();
    af1.setRoleString(
        ArticleFiles.ROLE_FULL_TEXT_PDF, BASE_URL + "pc/doifinder/download/10.1057/9780123456789");
    af1.setRoleString(
        ArticleFiles.ROLE_ARTICLE_METADATA,
        BASE_URL + "pc/browse/citationExport?doi=10.1057/9780123456789");
    af1.setRoleString(
        ArticleFiles.ROLE_FULL_TEXT_EPUB,
        BASE_URL + "pc/doifinder/download/10.1057/9780123456789.epub");
    // book 9780123456789
    ArticleFiles af2 = new ArticleFiles();
    af1.setRoleString(
        ArticleFiles.ROLE_FULL_TEXT_PDF, BASE_URL + "pc/doifinder/download/10.1057/9781234567890");
    af1.setRoleString(
        ArticleFiles.ROLE_ARTICLE_METADATA,
        BASE_URL + "pc/browse/citationExport?doi=10.1057/9781234567890");
    af1.setRoleString(
        ArticleFiles.ROLE_FULL_TEXT_EPUB,
        BASE_URL + "pc/doifinder/download/10.1057/9781234567890.epub");
    // book 9780123456789
    ArticleFiles af3 = new ArticleFiles();
    af1.setRoleString(
        ArticleFiles.ROLE_FULL_TEXT_PDF, BASE_URL + "pc/doifinder/download/10.1057/9782345678901");
    af1.setRoleString(
        ArticleFiles.ROLE_ARTICLE_METADATA,
        BASE_URL + "pc/browse/citationExport?doi=10.1057/9782345678901");
    af1.setRoleString(
        ArticleFiles.ROLE_FULL_TEXT_EPUB,
        BASE_URL + "pc/doifinder/download/10.1057/9782345678901.epub");

    // key the expected content to the fullTextUrl for the ArticleFiles
    HashMap<String, ArticleFiles> fullUrlToAF = new HashMap<String, ArticleFiles>();
    fullUrlToAF.put(BASE_URL + "pc/doifinder/download/10.1057/9780123456789", af1);
    fullUrlToAF.put(BASE_URL + "pc/doifinder/download/10.1057/9781234567890", af2);
    fullUrlToAF.put(BASE_URL + "pc/doifinder/download/10.1057/9782345678901", af3);

    // get article iterator, get article files and the appropriate urls according
    // to their roles.
    String[] expectedUrls = {
      EXPECTED_FULL_TEXT_URL, EXPECTED_PDF_URL,
    };
    for (SubTreeArticleIterator artIter = createSubTreeIter(); artIter.hasNext(); ) {
      ArticleFiles af = artIter.next();
      String[] actualUrls = {
        af.getFullTextUrl(), af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF),
        // af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF_LANDING_PAGE)
      };
      log.info("actualUrls: " + actualUrls.length);
      for (int i = 0; i < actualUrls.length; i++) {
        log.info("e_url: " + expectedUrls[i]);

        log.info("url: " + actualUrls[i]);
        // assertEquals(expectedUrls[i], actualUrls[i]);
      }
    }
  }