public void testFunctionalFromTarHierarchy() throws Exception { log.debug3("in testFromTarHierarchy"); // load the tarballs InputStream file_input = null; try { file_input = getResourceAsStream(realTARFile_A); // UrlCacher uc = au.makeUrlCacher(TAR_A_BASE); // uc.storeContent(file_input, tarHeader); UrlCacher uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_A_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); file_input = getResourceAsStream(realTARFile_B); // uc = au.makeUrlCacher(TAR_B_BASE); // uc.storeContent(file_input, tarHeader); uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_B_BASE)); uc.storeContent(); IOUtil.safeClose(file_input); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { IOUtil.safeClose(file_input); } CachedUrlSet cus = tarAu.getAuCachedUrlSet(); for (CachedUrl cu : cus.getCuIterable()) { log.debug3("AU - cu is: " + cu.getUrl()); cu.release(); } // We need to start from the level of the ArticleMetadataExtractor MyListEmitter emitter = new MyListEmitter(); ArticleMetadataExtractor amEx = new ElsevierDeferredArticleMetadataExtractor(ArticleFiles.ROLE_ARTICLE_METADATA); Iterator<ArticleFiles> it = tarAu.getArticleIterator(MetadataTarget.Any()); while (it.hasNext()) { ArticleFiles af = it.next(); log.debug3("Metadata test - articlefiles " + af.toString()); // CachedUrl cu = af.getFullTextCu(); CachedUrl cu = af.getRoleCu(ArticleFiles.ROLE_ARTICLE_METADATA); log.debug3("metadata cu is " + cu.getUrl()); // List<ArticleMetadata> mdlist = mle.extract(MetadataTarget.Any(), cu); amEx.extract(MetadataTarget.Any(), af, emitter); List<ArticleMetadata> returnList = emitter.getAmList(); assertNotNull(returnList); log.debug3("size of returnList is " + returnList.size()); Iterator<ArticleMetadata> mdIt = returnList.iterator(); ArticleMetadata mdRecord = null; while (mdIt.hasNext()) { mdRecord = (ArticleMetadata) mdIt.next(); validateCompleteMetadataRecord(mdRecord); } } }
public void testCreateArticleFiles() throws Exception { PluginTestUtil.crawlSimAu(sau); String pat0 = "001file[.]html"; String rep0 = "52/1/S1"; PluginTestUtil.copyAu(sau, au, ".*[.]html$", pat0, rep0); String pat1 = "001file[.]pdf"; String rep1 = "52/1/S1.pdf"; PluginTestUtil.copyAu(sau, au, ".*[.]pdf$", pat1, rep1); String pdfurl = "http://pediatrics.aappublications.org/cgi/reprint/52/1/S1.pdf"; String url = "http://pediatrics.aappublications.org/cgi/reprint/52/1/S1"; au.makeCachedUrl(url); CachedUrl cu = au.makeCachedUrl(pdfurl); assertNotNull(cu); SubTreeArticleIterator artIter = createSubTreeIter(); assertNotNull(artIter); ArticleFiles af = artIter.next(); assertNotNull(af); System.out.println("article files::" + af); assertEquals(url, af.getRoleCu(ArticleFiles.ROLE_FULL_TEXT_PDF_LANDING_PAGE).getUrl()); assertEquals(pdfurl, af.getRoleCu(ArticleFiles.ROLE_FULL_TEXT_PDF).getUrl()); }