public void testArticleCountAndType() throws Exception {
    int expCount = 28;
    PluginTestUtil.crawlSimAu(sau);
    String pat1 = "branch(\\d+)/(\\d+file\\.html)";
    String rep1 = "aps/journal/v123/n$1/full/$2";
    PluginTestUtil.copyAu(sau, nau, ".*[^.][^p][^d][^f]$", pat1, rep1);
    String pat2 = "branch(\\d+)/(\\d+file\\.pdf)";
    String rep2 = "aps/journal/v123/n$1/pdf/$2";
    PluginTestUtil.copyAu(sau, nau, ".*\\.pdf$", pat2, rep2);

    // Remove some URLs
    int deleted = 0;
    for (Iterator it = nau.getAuCachedUrlSet().contentHashIterator(); it.hasNext(); ) {
      CachedUrlSetNode cusn = (CachedUrlSetNode) it.next();
      if (cusn instanceof CachedUrl) {
        CachedUrl cu = (CachedUrl) cusn;
        String url = cu.getUrl();
        if (url.contains("/journal/")
            && (url.endsWith("1file.html") || url.endsWith("2file.pdf"))) {
          deleteBlock(cu);
          ++deleted;
        }
      }
    }
    assertEquals(8, deleted);

    Iterator<ArticleFiles> it = nau.getArticleIterator();
    int count = 0;
    int countHtmlOnly = 0;
    int countPdfOnly = 0;
    while (it.hasNext()) {
      ArticleFiles af = it.next();
      log.info(af.toString());
      CachedUrl cu = af.getFullTextCu();
      String url = cu.getUrl();
      assertNotNull(cu);
      String contentType = cu.getContentType();
      log.debug("count " + count + " url " + url + " " + contentType);
      count++;
      if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == null) {
        ++countHtmlOnly;
      }
      if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == url) {
        ++countPdfOnly;
      }
    }
    log.debug("Article count is " + count);
    assertEquals(expCount, count);
    assertEquals(4, countHtmlOnly);
    assertEquals(4, countPdfOnly);
  }
 protected void checkLeaf(SimulatedArchivalUnit sau) {
   log.debug("checkLeaf()");
   String parent = sau.getUrlRoot() + "/branch1";
   CachedUrlSetSpec spec = new RangeCachedUrlSetSpec(parent);
   CachedUrlSet set = sau.makeCachedUrlSet(spec);
   Iterator setIt = set.contentHashIterator();
   ArrayList childL = new ArrayList(16);
   while (setIt.hasNext()) {
     childL.add(((CachedUrlSetNode) setIt.next()).getUrl());
   }
   String[] expectedA =
       new String[] {
         parent,
         parent + "/001file.html",
         parent + "/001file.txt",
         parent + "/002file.html",
         parent + "/002file.txt",
         parent + "/branch1",
         parent + "/branch1/001file.html",
         parent + "/branch1/001file.txt",
         parent + "/branch1/002file.html",
         parent + "/branch1/002file.txt",
         parent + "/branch1/index.html",
         parent + "/branch2",
         parent + "/branch2/001file.html",
         parent + "/branch2/001file.txt",
         parent + "/branch2/002file.html",
         parent + "/branch2/002file.txt",
         parent + "/branch2/index.html",
         parent + "/index.html",
       };
   assertIsomorphic(expectedA, childL);
 }
 protected void checkContent(SimulatedArchivalUnit sau) throws IOException {
   log.debug("checkContent()");
   checkRoot(sau);
   checkLeaf(sau);
   checkStoredContent(sau);
   checkDepth(sau);
 }
  protected void checkRoot(SimulatedArchivalUnit sau) {
    log.debug("checkRoot()");
    CachedUrlSet set = sau.getAuCachedUrlSet();
    Iterator setIt = set.flatSetIterator();
    ArrayList childL = new ArrayList(1);
    CachedUrlSet cus = null;
    while (setIt.hasNext()) {
      cus = (CachedUrlSet) setIt.next();
      childL.add(cus.getUrl());
    }

    String urlRoot = sau.getUrlRoot();

    String[] expectedA = new String[1];
    expectedA[0] = urlRoot;
    assertIsomorphic(expectedA, childL);

    setIt = cus.flatSetIterator();
    childL = new ArrayList(7);
    while (setIt.hasNext()) {
      childL.add(((CachedUrlSetNode) setIt.next()).getUrl());
    }

    expectedA =
        new String[] {
          urlRoot + "/001file.html",
          urlRoot + "/001file.txt",
          urlRoot + "/002file.html",
          urlRoot + "/002file.txt",
          urlRoot + "/branch1",
          urlRoot + "/branch2",
          urlRoot + "/index.html"
        };
    assertIsomorphic(expectedA, childL);
  }
 private void deleteBlock(CachedUrl cu) throws IOException {
   log.info("deleting " + cu.getUrl());
   CachedUrlSetSpec cuss = new SingleNodeCachedUrlSetSpec(cu.getUrl());
   ArchivalUnit au = cu.getArchivalUnit();
   CachedUrlSet cus = au.makeCachedUrlSet(cuss);
   NodeManager nm = au.getPlugin().getDaemon().getNodeManager(au);
   nm.deleteNode(cus);
 }
 protected void checkDepth(SimulatedArchivalUnit sau) {
   log.debug("checkDepth()");
   String URL_ROOT = sau.getUrlRoot();
   assertEquals(0, sau.getLinkDepth(URL_ROOT + "/index.html"));
   assertEquals(0, sau.getLinkDepth(URL_ROOT + "/"));
   assertEquals(1, sau.getLinkDepth(URL_ROOT + "/001file.html"));
   assertEquals(1, sau.getLinkDepth(URL_ROOT + "/branch1/index.html"));
   assertEquals(1, sau.getLinkDepth(URL_ROOT + "/branch1/"));
   assertEquals(2, sau.getLinkDepth(URL_ROOT + "/branch1/001file.html"));
 }
  protected void hashContent(SimulatedArchivalUnit sau) throws Exception {
    log.debug("hashContent()");
    measureHashSpeed(sau);

    // If any changes are made to the contents or shape of the simulated
    // content tree, these hash values will have to be changed
    checkHashSet(sau, true, false, fromHex("6AB258B4E1FFD9F9B45316B4F54111FF5E5948D2"));
    checkHashSet(sau, true, true, fromHex("6AB258B4E1FFD9F9B45316B4F54111FF5E5948D2"));
    checkHashSet(sau, false, false, fromHex("409893F1A603F4C276632694DB1621B639BD5164"));
    checkHashSet(sau, false, true, fromHex("85E6213C3771BEAC5A4602CAF7982C6C222800D5"));
  }
  protected void createContent(SimulatedArchivalUnit sau) {
    log.debug("createContent()");
    scgen = sau.getContentGenerator();
    scgen.setFileTypes(
        SimulatedContentGenerator.FILE_TYPE_HTML + SimulatedContentGenerator.FILE_TYPE_TXT);
    scgen.setAbnormalFile("1,1", 1);
    scgen.setOddBranchesHaveContent(true);

    sau.deleteContentTree();
    sau.generateContentTree();
    assertTrue(scgen.isContentTree());
  }
  protected void checkFilter(SimulatedArchivalUnit sau) throws Exception {
    log.debug("checkFilter()");
    CachedUrl cu = sau.makeCachedUrl(sau.getUrlRoot() + "/001file.html");

    enableFilter(sau, true);
    InputStream is = cu.openForHashing();
    String expected = "001file.html This is file 1, depth 0, branch 0. foobar ";
    assertEquals(expected, StringUtil.fromInputStream(is));
    is.close();
    enableFilter(sau, false);
    cu = sau.makeCachedUrl(sau.getUrlRoot() + "/001file.html");
    is = cu.openForHashing();
    expected =
        "<HTML><HEAD><TITLE>001file.html</TITLE></HEAD><BODY>\n"
            + "This is file 1, depth 0, branch 0.<br><!-- comment -->    "
            + "Citation String   foobar<br><script>"
            + "(defun fact (n) (cond ((= n 0) 1) (t (fact (sub1 n)))))</script>\n"
            + "</BODY></HTML>";
    assertEquals(expected, StringUtil.fromInputStream(is));
    is.close();
  }
public class TestNatureArticleIteratorFactory extends LockssTestCase {
  static Logger log = Logger.getLogger("TestNatureArticleIteratorFactory");

  private SimulatedArchivalUnit sau; // Simulated AU to generate content
  private ArchivalUnit nau; // Nature AU
  private MockLockssDaemon theDaemon;
  private static final int DEFAULT_FILESIZE = 3000;
  private static int fileSize = DEFAULT_FILESIZE;

  private static String PLUGIN_NAME = "org.lockss.plugin.nature.ClockssNaturePublishingGroupPlugin";

  private static String BASE_URL = "http://www.nature.com/";

  public void setUp() throws Exception {
    super.setUp();
    String tempDirPath = getTempDir().getAbsolutePath() + File.separator;
    ConfigurationUtil.setFromArgs(LockssRepositoryImpl.PARAM_CACHE_LOCATION, tempDirPath);
    theDaemon = getMockLockssDaemon();
    theDaemon.getAlertManager();
    theDaemon.getPluginManager().setLoadablePluginsReady(true);
    theDaemon.setDaemonInited(true);
    theDaemon.getPluginManager().startService();
    theDaemon.getCrawlManager();

    sau = PluginTestUtil.createAndStartSimAu(simAuConfig(tempDirPath));
    nau = PluginTestUtil.createAndStartAu(PLUGIN_NAME, natureAuConfig());
  }

  public void tearDown() throws Exception {
    sau.deleteContentTree();
    theDaemon.stopDaemon();
    super.tearDown();
  }

  Configuration simAuConfig(String rootPath) {
    Configuration conf = ConfigManager.newConfiguration();
    conf.put("root", rootPath);
    conf.put("base_url", BASE_URL);
    conf.put("depth", "1");
    conf.put("branch", "4");
    conf.put("numFiles", "7");
    conf.put(
        "fileTypes",
        "" + (SimulatedContentGenerator.FILE_TYPE_HTML | SimulatedContentGenerator.FILE_TYPE_PDF));
    conf.put("binFileSize", "" + fileSize);
    return conf;
  }

  Configuration natureAuConfig() {
    Configuration conf = ConfigManager.newConfiguration();
    conf.put("base_url", BASE_URL);
    conf.put("journal_id", "aps");
    conf.put("volume_name", "123");
    conf.put("year", "2008");
    return conf;
  }

  public void testArticleCountAndType() throws Exception {
    int expCount = 28;
    PluginTestUtil.crawlSimAu(sau);
    String pat1 = "branch(\\d+)/(\\d+file\\.html)";
    String rep1 = "aps/journal/v123/n$1/full/$2";
    PluginTestUtil.copyAu(sau, nau, ".*[^.][^p][^d][^f]$", pat1, rep1);
    String pat2 = "branch(\\d+)/(\\d+file\\.pdf)";
    String rep2 = "aps/journal/v123/n$1/pdf/$2";
    PluginTestUtil.copyAu(sau, nau, ".*\\.pdf$", pat2, rep2);

    // Remove some URLs
    int deleted = 0;
    for (Iterator it = nau.getAuCachedUrlSet().contentHashIterator(); it.hasNext(); ) {
      CachedUrlSetNode cusn = (CachedUrlSetNode) it.next();
      if (cusn instanceof CachedUrl) {
        CachedUrl cu = (CachedUrl) cusn;
        String url = cu.getUrl();
        if (url.contains("/journal/")
            && (url.endsWith("1file.html") || url.endsWith("2file.pdf"))) {
          deleteBlock(cu);
          ++deleted;
        }
      }
    }
    assertEquals(8, deleted);

    Iterator<ArticleFiles> it = nau.getArticleIterator();
    int count = 0;
    int countHtmlOnly = 0;
    int countPdfOnly = 0;
    while (it.hasNext()) {
      ArticleFiles af = it.next();
      log.info(af.toString());
      CachedUrl cu = af.getFullTextCu();
      String url = cu.getUrl();
      assertNotNull(cu);
      String contentType = cu.getContentType();
      log.debug("count " + count + " url " + url + " " + contentType);
      count++;
      if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == null) {
        ++countHtmlOnly;
      }
      if (af.getRoleUrl(ArticleFiles.ROLE_FULL_TEXT_PDF) == url) {
        ++countPdfOnly;
      }
    }
    log.debug("Article count is " + count);
    assertEquals(expCount, count);
    assertEquals(4, countHtmlOnly);
    assertEquals(4, countPdfOnly);
  }

  //  public void testArticleCountAndDefaultType() throws Exception {
  //    testArticleCountAndType("text/html", true, 24);
  //  }
  //
  //  public void testArticleCountAndPdf() throws Exception {
  //    testArticleCountAndType("application/pdf", false, 0);
  //  }

  private void deleteBlock(CachedUrl cu) throws IOException {
    log.info("deleting " + cu.getUrl());
    CachedUrlSetSpec cuss = new SingleNodeCachedUrlSetSpec(cu.getUrl());
    ArchivalUnit au = cu.getArchivalUnit();
    CachedUrlSet cus = au.makeCachedUrlSet(cuss);
    NodeManager nm = au.getPlugin().getDaemon().getNodeManager(au);
    nm.deleteNode(cus);
  }
}
/** Functional tests on the simulated content generator. */
public class FuncSimulatedContent extends LockssTestCase {
  static final Logger log = Logger.getLogger("FuncSimulatedContent");

  private PluginManager pluginMgr;
  private Plugin simPlugin;
  private SimulatedArchivalUnit sau1;
  private SimulatedContentGenerator scgen = null;
  private MockLockssDaemon theDaemon;
  String tempDirPath;
  String tempDirPath2;

  private static String DAMAGED_CACHED_URL = "/branch2/branch2/002file.txt";

  public FuncSimulatedContent(String msg) {
    super(msg);
  }

  public void setUp() throws Exception {
    super.setUp();
    tempDirPath = getTempDir().getAbsolutePath() + File.separator;

    theDaemon = getMockLockssDaemon();
    theDaemon.getAlertManager();
    theDaemon.getPluginManager().setLoadablePluginsReady(true);
    theDaemon.getHashService();
    MockSystemMetrics metrics = new MyMockSystemMetrics();
    metrics.initService(theDaemon);
    theDaemon.setSystemMetrics(metrics);

    theDaemon.setDaemonInited(true);

    Properties props = new Properties();
    props.setProperty(SystemMetrics.PARAM_HASH_TEST_DURATION, "1000");
    props.setProperty(SystemMetrics.PARAM_HASH_TEST_BYTE_STEP, "1024");
    props.setProperty(ConfigManager.PARAM_PLATFORM_DISK_SPACE_LIST, tempDirPath);
    ConfigurationUtil.setCurrentConfigFromProps(props);

    pluginMgr = theDaemon.getPluginManager();
    pluginMgr.startService();
    theDaemon.getHashService().startService();
    metrics.startService();
    metrics.setHashSpeed(100);

    simPlugin = PluginTestUtil.findPlugin(SimulatedPlugin.class);
  }

  public void tearDown() throws Exception {
    theDaemon.getLockssRepository(sau1).stopService();
    theDaemon.getNodeManager(sau1).stopService();
    theDaemon.getPluginManager().stopService();
    theDaemon.getHashService().stopService();
    theDaemon.getSystemMetrics().stopService();
    theDaemon.stopDaemon();
    super.tearDown();
  }

  SimulatedArchivalUnit setupSimAu(Configuration auConfig)
      throws ArchivalUnit.ConfigurationException {
    ArchivalUnit au = PluginTestUtil.createAndStartAu(simPlugin, auConfig);
    return (SimulatedArchivalUnit) au;
  }

  Configuration simAuConfig(String rootPath) {
    Configuration conf = ConfigManager.newConfiguration();
    conf.put("root", rootPath);
    conf.put("depth", "2");
    conf.put("branch", "2");
    conf.put("numFiles", "2");
    conf.put("badCachedFileLoc", "2,2");
    conf.put("badCachedFileNum", "2");
    return conf;
  }

  void enableFilter(SimulatedArchivalUnit sau, boolean enable)
      throws ArchivalUnit.ConfigurationException {
    Configuration auConfig = sau.getConfiguration().copy();
    // no bad file when playing with filtering
    auConfig.remove("badCachedFileLoc");
    auConfig.remove("badCachedFileNum");
    if (enable) {
      auConfig.put(SimulatedPlugin.AU_PARAM_HASH_FILTER_SPEC, "true");
    } else {
      auConfig.remove(SimulatedPlugin.AU_PARAM_HASH_FILTER_SPEC);
    }
    sau.setConfiguration(auConfig);
  }

  public void testSimulatedContent() throws Exception {
    sau1 = setupSimAu(simAuConfig(tempDirPath));
    createContent(sau1);
    crawlContent(sau1);
    checkContent(sau1);
    doDamageRemoveTest(sau1); // must be before content read again
    checkFilter(sau1);
    hashContent(sau1);

    // this resets AU's config, do last to avoid messing up toBeDamaged set
  }

  public void testDualContentHash() throws Exception {
    sau1 = setupSimAu(simAuConfig(tempDirPath));
    createContent(sau1);
    crawlContent(sau1);
    CachedUrlSet set = sau1.getAuCachedUrlSet();
    byte[] nameH = getHash(set, true);
    byte[] contentH = getHash(set, false);

    tempDirPath2 = getTempDir().getAbsolutePath() + File.separator;
    SimulatedArchivalUnit sau2 = setupSimAu(simAuConfig(tempDirPath2));

    createContent(sau2);
    crawlContent(sau2);
    set = sau2.getAuCachedUrlSet();
    byte[] nameH2 = getHash(set, true);
    byte[] contentH2 = getHash(set, false);
    assertEquals(nameH, nameH2);
    assertEquals(contentH, contentH2);
  }

  public void testBaseUrl() throws Exception {
    sau1 = setupSimAu(simAuConfig(tempDirPath));
    createContent(sau1);
    crawlContent(sau1);
    CachedUrlSet cus1 = sau1.getAuCachedUrlSet();

    tempDirPath2 = getTempDir().getAbsolutePath() + File.separator;
    Configuration config2 = simAuConfig(tempDirPath2);
    config2.put("base_url", "http://anotherhost.org/");
    SimulatedArchivalUnit sau2 = setupSimAu(config2);
    createContent(sau2);
    crawlContent(sau2);
    CachedUrlSet cus2 = sau1.getAuCachedUrlSet();
    List urls1 = auUrls(sau1);
    List urls2 = auUrls(sau2);

    Pattern pat = Pattern.compile("http://([^/]+)(/.*)$");
    List<String> l1 = auUrls(sau1);
    List<String> l2 = auUrls(sau2);
    assertEquals(l1.size(), l2.size());
    for (int ix = 0; ix < l1.size(); ix++) {
      Matcher m1 = pat.matcher(l1.get(ix));
      assertTrue(m1.matches());
      Matcher m2 = pat.matcher(l2.get(ix));
      assertTrue(m2.matches());
      assertEquals("www.example.com", m1.group(1));
      assertEquals("anotherhost.org", m2.group(1));
      assertEquals(m1.group(2), m2.group(2));
    }
  }

  public void testBaseUrlPath() throws Exception {
    sau1 = setupSimAu(simAuConfig(tempDirPath));
    createContent(sau1);
    crawlContent(sau1);
    CachedUrlSet cus1 = sau1.getAuCachedUrlSet();

    tempDirPath2 = getTempDir().getAbsolutePath() + File.separator;
    Configuration config2 = simAuConfig(tempDirPath2);
    config2.put("base_url", "http://anotherhost.org/some/path/");
    SimulatedArchivalUnit sau2 = setupSimAu(config2);
    createContent(sau2);
    crawlContent(sau2);
    CachedUrlSet cus2 = sau1.getAuCachedUrlSet();
    List urls1 = auUrls(sau1);
    List urls2 = auUrls(sau2);

    Pattern pat1 = Pattern.compile("http://www\\.example\\.com(/.*)$");
    Pattern pat2 = Pattern.compile("http://anotherhost\\.org/some/path(/.*)$");
    List<String> l1 = auUrls(sau1);
    List<String> l2 = auUrls(sau2);
    assertEquals(l1.size(), l2.size());
    for (int ix = 0; ix < l1.size(); ix++) {
      Matcher m1 = pat1.matcher(l1.get(ix));
      assertTrue(m1.matches());
      Matcher m2 = pat2.matcher(l2.get(ix));
      assertTrue(m2.matches());
      assertEquals(m1.group(1), m2.group(1));
    }
  }

  List<String> auUrls(ArchivalUnit au) {
    List<String> res = new ArrayList<String>();
    for (Iterator iter = au.getAuCachedUrlSet().contentHashIterator(); iter.hasNext(); ) {
      CachedUrlSetNode cusn = (CachedUrlSetNode) iter.next();
      if (cusn.hasContent()) {
        res.add(cusn.getUrl());
      }
    }
    return res;
  }

  protected void createContent(SimulatedArchivalUnit sau) {
    log.debug("createContent()");
    scgen = sau.getContentGenerator();
    scgen.setFileTypes(
        SimulatedContentGenerator.FILE_TYPE_HTML + SimulatedContentGenerator.FILE_TYPE_TXT);
    scgen.setAbnormalFile("1,1", 1);
    scgen.setOddBranchesHaveContent(true);

    sau.deleteContentTree();
    sau.generateContentTree();
    assertTrue(scgen.isContentTree());
  }

  protected void crawlContent(SimulatedArchivalUnit sau) {
    log.debug("crawlContent()");
    CrawlSpec spec = new SpiderCrawlSpec(sau.getNewContentCrawlUrls(), null);
    Crawler crawler = new NoCrawlEndActionsNewContentCrawler(sau, spec, new MockAuState());
    crawler.doCrawl();
  }

  protected void checkContent(SimulatedArchivalUnit sau) throws IOException {
    log.debug("checkContent()");
    checkRoot(sau);
    checkLeaf(sau);
    checkStoredContent(sau);
    checkDepth(sau);
  }

  protected void checkFilter(SimulatedArchivalUnit sau) throws Exception {
    log.debug("checkFilter()");
    CachedUrl cu = sau.makeCachedUrl(sau.getUrlRoot() + "/001file.html");

    enableFilter(sau, true);
    InputStream is = cu.openForHashing();
    String expected = "001file.html This is file 1, depth 0, branch 0. foobar ";
    assertEquals(expected, StringUtil.fromInputStream(is));
    is.close();
    enableFilter(sau, false);
    cu = sau.makeCachedUrl(sau.getUrlRoot() + "/001file.html");
    is = cu.openForHashing();
    expected =
        "<HTML><HEAD><TITLE>001file.html</TITLE></HEAD><BODY>\n"
            + "This is file 1, depth 0, branch 0.<br><!-- comment -->    "
            + "Citation String   foobar<br><script>"
            + "(defun fact (n) (cond ((= n 0) 1) (t (fact (sub1 n)))))</script>\n"
            + "</BODY></HTML>";
    assertEquals(expected, StringUtil.fromInputStream(is));
    is.close();
  }

  private byte[] fromHex(String hex) {
    return ByteArray.fromHexString(hex);
  }

  protected void hashContent(SimulatedArchivalUnit sau) throws Exception {
    log.debug("hashContent()");
    measureHashSpeed(sau);

    // If any changes are made to the contents or shape of the simulated
    // content tree, these hash values will have to be changed
    checkHashSet(sau, true, false, fromHex("6AB258B4E1FFD9F9B45316B4F54111FF5E5948D2"));
    checkHashSet(sau, true, true, fromHex("6AB258B4E1FFD9F9B45316B4F54111FF5E5948D2"));
    checkHashSet(sau, false, false, fromHex("409893F1A603F4C276632694DB1621B639BD5164"));
    checkHashSet(sau, false, true, fromHex("85E6213C3771BEAC5A4602CAF7982C6C222800D5"));
  }

  protected void checkDepth(SimulatedArchivalUnit sau) {
    log.debug("checkDepth()");
    String URL_ROOT = sau.getUrlRoot();
    assertEquals(0, sau.getLinkDepth(URL_ROOT + "/index.html"));
    assertEquals(0, sau.getLinkDepth(URL_ROOT + "/"));
    assertEquals(1, sau.getLinkDepth(URL_ROOT + "/001file.html"));
    assertEquals(1, sau.getLinkDepth(URL_ROOT + "/branch1/index.html"));
    assertEquals(1, sau.getLinkDepth(URL_ROOT + "/branch1/"));
    assertEquals(2, sau.getLinkDepth(URL_ROOT + "/branch1/001file.html"));
  }

  protected void checkRoot(SimulatedArchivalUnit sau) {
    log.debug("checkRoot()");
    CachedUrlSet set = sau.getAuCachedUrlSet();
    Iterator setIt = set.flatSetIterator();
    ArrayList childL = new ArrayList(1);
    CachedUrlSet cus = null;
    while (setIt.hasNext()) {
      cus = (CachedUrlSet) setIt.next();
      childL.add(cus.getUrl());
    }

    String urlRoot = sau.getUrlRoot();

    String[] expectedA = new String[1];
    expectedA[0] = urlRoot;
    assertIsomorphic(expectedA, childL);

    setIt = cus.flatSetIterator();
    childL = new ArrayList(7);
    while (setIt.hasNext()) {
      childL.add(((CachedUrlSetNode) setIt.next()).getUrl());
    }

    expectedA =
        new String[] {
          urlRoot + "/001file.html",
          urlRoot + "/001file.txt",
          urlRoot + "/002file.html",
          urlRoot + "/002file.txt",
          urlRoot + "/branch1",
          urlRoot + "/branch2",
          urlRoot + "/index.html"
        };
    assertIsomorphic(expectedA, childL);
  }

  protected void checkLeaf(SimulatedArchivalUnit sau) {
    log.debug("checkLeaf()");
    String parent = sau.getUrlRoot() + "/branch1";
    CachedUrlSetSpec spec = new RangeCachedUrlSetSpec(parent);
    CachedUrlSet set = sau.makeCachedUrlSet(spec);
    Iterator setIt = set.contentHashIterator();
    ArrayList childL = new ArrayList(16);
    while (setIt.hasNext()) {
      childL.add(((CachedUrlSetNode) setIt.next()).getUrl());
    }
    String[] expectedA =
        new String[] {
          parent,
          parent + "/001file.html",
          parent + "/001file.txt",
          parent + "/002file.html",
          parent + "/002file.txt",
          parent + "/branch1",
          parent + "/branch1/001file.html",
          parent + "/branch1/001file.txt",
          parent + "/branch1/002file.html",
          parent + "/branch1/002file.txt",
          parent + "/branch1/index.html",
          parent + "/branch2",
          parent + "/branch2/001file.html",
          parent + "/branch2/001file.txt",
          parent + "/branch2/002file.html",
          parent + "/branch2/002file.txt",
          parent + "/branch2/index.html",
          parent + "/index.html",
        };
    assertIsomorphic(expectedA, childL);
  }

  protected void checkUrlContent(
      SimulatedArchivalUnit sau,
      String path,
      int fileNum,
      int depth,
      int branchNum,
      boolean isAbnormal,
      boolean isDamaged)
      throws IOException {
    String file = sau.getUrlRoot() + path;
    CachedUrl url = sau.makeCachedUrl(file);
    String content = getUrlContent(url);
    String expectedContent;
    if (path.endsWith(".html")) {
      String fn = path.substring(path.lastIndexOf("/") + 1);
      expectedContent = scgen.getHtmlFileContent(fn, fileNum, depth, branchNum, isAbnormal);
    } else {
      expectedContent = scgen.getTxtContent(fileNum, depth, branchNum, isAbnormal);
    }
    if (isDamaged) {
      assertNotEquals(expectedContent, content);
    } else {
      assertEquals(expectedContent, content);
    }
  }

  protected void checkStoredContent(SimulatedArchivalUnit sau) throws IOException {
    checkUrlContent(sau, "/001file.txt", 1, 0, 0, false, false);
    checkUrlContent(sau, "/branch1/branch1/001file.txt", 1, 2, 1, true, false);
    checkUrlContent(sau, DAMAGED_CACHED_URL, 2, 2, 2, false, true);
  }

  protected void doDamageRemoveTest(SimulatedArchivalUnit sau) throws Exception {
    /* Cache the file again; this time the damage should be gone */
    String file = sau.getUrlRoot() + DAMAGED_CACHED_URL;
    UrlCacher uc = sau.makeUrlCacher(file);
    BitSet fetchFlags = new BitSet();
    fetchFlags.set(UrlCacher.REFETCH_FLAG);
    uc.setFetchFlags(fetchFlags);
    uc.cache();
    checkUrlContent(sau, DAMAGED_CACHED_URL, 2, 2, 2, false, false);
  }

  private void measureHashSpeed(SimulatedArchivalUnit sau) throws Exception {
    MessageDigest dig = null;
    try {
      dig = MessageDigest.getInstance("SHA-1");
    } catch (NoSuchAlgorithmException ex) {
      fail("No algorithm.");
    }
    CachedUrlSet set = sau.getAuCachedUrlSet();
    CachedUrlSetHasher hasher = set.getContentHasher(dig);
    SystemMetrics metrics = theDaemon.getSystemMetrics();
    int estimate = metrics.getBytesPerMsHashEstimate(hasher, dig);
    // should be protected against this being zero by MyMockSystemMetrics,
    // but otherwise use the proper calculation.  This avoids test failure
    // due to really slow machines
    assertTrue(estimate > 0);
    long estimatedTime = set.estimatedHashDuration();
    long size = ((Long) PrivilegedAccessor.getValue(set, "totalNodeSize")).longValue();
    assertTrue(size > 0);
    System.out.println("b/ms: " + estimate);
    System.out.println("size: " + size);
    System.out.println("estimate: " + estimatedTime);
    assertEquals(estimatedTime, theDaemon.getHashService().padHashEstimate(size / estimate));
  }

  private void checkHashSet(
      SimulatedArchivalUnit sau, boolean namesOnly, boolean filter, byte[] expected)
      throws Exception {
    enableFilter(sau, filter);
    CachedUrlSet set = sau.getAuCachedUrlSet();
    byte[] hash = getHash(set, namesOnly);
    assertEquals(expected, hash);

    String parent = sau.getUrlRoot() + "/branch1";
    CachedUrlSetSpec spec = new RangeCachedUrlSetSpec(parent);
    set = sau.makeCachedUrlSet(spec);
    byte[] hash2 = getHash(set, namesOnly);
    assertFalse(Arrays.equals(hash, hash2));
  }

  private byte[] getHash(CachedUrlSet set, boolean namesOnly) throws IOException {
    MessageDigest dig = null;
    try {
      dig = MessageDigest.getInstance("SHA-1");
    } catch (NoSuchAlgorithmException ex) {
      fail("No algorithm.");
    }
    hash(set, dig, namesOnly);
    return dig.digest();
  }

  private void hash(CachedUrlSet set, MessageDigest dig, boolean namesOnly) throws IOException {
    CachedUrlSetHasher hasher = null;
    if (namesOnly) {
      hasher = set.getNameHasher(dig);
    } else {
      hasher = set.getContentHasher(dig);
    }
    int bytesHashed = 0;
    long timeTaken = System.currentTimeMillis();
    while (!hasher.finished()) {
      bytesHashed += hasher.hashStep(256);
    }
    timeTaken = System.currentTimeMillis() - timeTaken;
    if ((timeTaken > 0) && (bytesHashed > 500)) {
      System.out.println("Bytes hashed: " + bytesHashed);
      System.out.println("Time taken: " + timeTaken + "ms");
      System.out.println("Bytes/sec: " + (bytesHashed * 1000 / timeTaken));
    } else {
      System.out.println("No time taken, or insufficient bytes hashed.");
      System.out.println("Bytes hashed: " + bytesHashed);
      System.out.println("Time taken: " + timeTaken + "ms");
    }
  }

  private String getUrlContent(CachedUrl url) throws IOException {
    InputStream content = url.getUnfilteredInputStream();
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    StreamUtil.copy(content, baos);
    content.close();
    String contentStr = new String(baos.toByteArray());
    baos.close();
    return contentStr;
  }

  // this version doesn't fully override the 'measureHashSpeed()' function, but
  // protects against it returning '0' by returning the set speed
  private class MyMockSystemMetrics extends MockSystemMetrics {
    public int measureHashSpeed(CachedUrlSetHasher hasher, MessageDigest digest)
        throws IOException {
      int speed = super.measureHashSpeed(hasher, digest);
      if (speed == 0) {
        speed = getHashSpeed();
        if (speed <= 0) {
          throw new RuntimeException("No hash speed set.");
        }
      }
      return speed;
    }
  }

  public static void main(String[] argv) {
    String[] testCaseList = {FuncSimulatedContent.class.getName()};
    junit.swingui.TestRunner.main(testCaseList);
  }

  public static Test suite() {
    return new TestSuite(FuncSimulatedContent.class);
  }
}
 protected void crawlContent(SimulatedArchivalUnit sau) {
   log.debug("crawlContent()");
   CrawlSpec spec = new SpiderCrawlSpec(sau.getNewContentCrawlUrls(), null);
   Crawler crawler = new NoCrawlEndActionsNewContentCrawler(sau, spec, new MockAuState());
   crawler.doCrawl();
 }