public void testExtractGoodHtmlContent() throws Exception {

    List<ArticleMetadata> mdlist = setupContentForAU(bau1, ABS_URL, goodHtmlContent, true);
    assertNotEmpty(mdlist);
    ArticleMetadata md = mdlist.get(0);
    assertNotNull(md);
    assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER));
    assertEquals(goodTitle, md.get(MetadataField.DC_FIELD_TITLE));
    assertEquals(goodDate, md.get(MetadataField.FIELD_DATE));
    assertEquals(goodFormat, md.get(MetadataField.DC_FIELD_FORMAT));
    assertEquals(goodType, md.get(MetadataField.DC_FIELD_TYPE));
    assertEquals(Arrays.asList(goodAuthors), md.getList(MetadataField.FIELD_AUTHOR));
    assertEquals(goodAuthors[0], md.get(MetadataField.DC_FIELD_CREATOR));
  }
  private void checkHashSet(
      SimulatedArchivalUnit sau, boolean namesOnly, boolean filter, byte[] expected)
      throws Exception {
    enableFilter(sau, filter);
    CachedUrlSet set = sau.getAuCachedUrlSet();
    byte[] hash = getHash(set, namesOnly);
    assertEquals(expected, hash);

    String parent = sau.getUrlRoot() + "/branch1";
    CachedUrlSetSpec spec = new RangeCachedUrlSetSpec(parent);
    set = sau.makeCachedUrlSet(spec);
    byte[] hash2 = getHash(set, namesOnly);
    assertFalse(Arrays.equals(hash, hash2));
  }