public void testExtractGoodHtmlContent() throws Exception { List<ArticleMetadata> mdlist = setupContentForAU(bau1, ABS_URL, goodHtmlContent, true); assertNotEmpty(mdlist); ArticleMetadata md = mdlist.get(0); assertNotNull(md); assertEquals(goodPublisher, md.get(MetadataField.FIELD_PUBLISHER)); assertEquals(goodTitle, md.get(MetadataField.DC_FIELD_TITLE)); assertEquals(goodDate, md.get(MetadataField.FIELD_DATE)); assertEquals(goodFormat, md.get(MetadataField.DC_FIELD_FORMAT)); assertEquals(goodType, md.get(MetadataField.DC_FIELD_TYPE)); assertEquals(Arrays.asList(goodAuthors), md.getList(MetadataField.FIELD_AUTHOR)); assertEquals(goodAuthors[0], md.get(MetadataField.DC_FIELD_CREATOR)); }
private void checkHashSet( SimulatedArchivalUnit sau, boolean namesOnly, boolean filter, byte[] expected) throws Exception { enableFilter(sau, filter); CachedUrlSet set = sau.getAuCachedUrlSet(); byte[] hash = getHash(set, namesOnly); assertEquals(expected, hash); String parent = sau.getUrlRoot() + "/branch1"; CachedUrlSetSpec spec = new RangeCachedUrlSetSpec(parent); set = sau.makeCachedUrlSet(spec); byte[] hash2 = getHash(set, namesOnly); assertFalse(Arrays.equals(hash, hash2)); }