コード例 #1
0
  /* private support methods */
  private List<ArticleMetadata> setupContentForAU(
      ArchivalUnit au, String url, String content, boolean isHtmlExtractor)
      throws IOException, PluginException {
    FileMetadataExtractor me;

    InputStream input = null;
    CIProperties props = null;
    if (isHtmlExtractor) {
      input = IOUtils.toInputStream(content, "utf-8");
      props = getContentHtmlProperties();
      me =
          new BaseAtyponHtmlMetadataExtractorFactory()
              .createFileMetadataExtractor(MetadataTarget.Any(), "text/html");
    } else {
      input = IOUtils.toInputStream(content, "utf-8");
      props = getContentRisProperties();
      me =
          new BaseAtyponRisMetadataExtractorFactory()
              .createFileMetadataExtractor(MetadataTarget.Any(), "text/plain");
    }
    UrlData ud = new UrlData(input, props, url);
    UrlCacher uc = au.makeUrlCacher(ud);
    uc.storeContent();
    CachedUrl cu = uc.getCachedUrl();
    FileMetadataListExtractor mle = new FileMetadataListExtractor(me);
    return mle.extract(MetadataTarget.Any(), cu);
  }
コード例 #2
0
  public void testFunctionalFromTarHierarchy() throws Exception {
    log.debug3("in testFromTarHierarchy");
    // load the tarballs
    InputStream file_input = null;
    try {
      file_input = getResourceAsStream(realTARFile_A);
      // UrlCacher uc = au.makeUrlCacher(TAR_A_BASE);
      // uc.storeContent(file_input, tarHeader);
      UrlCacher uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_A_BASE));
      uc.storeContent();
      IOUtil.safeClose(file_input);

      file_input = getResourceAsStream(realTARFile_B);
      // uc = au.makeUrlCacher(TAR_B_BASE);
      // uc.storeContent(file_input, tarHeader);
      uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_B_BASE));
      uc.storeContent();
      IOUtil.safeClose(file_input);

    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      IOUtil.safeClose(file_input);
    }

    CachedUrlSet cus = tarAu.getAuCachedUrlSet();
    for (CachedUrl cu : cus.getCuIterable()) {
      log.debug3("AU - cu is: " + cu.getUrl());
      cu.release();
    }

    // We need to start from the level of the ArticleMetadataExtractor
    MyListEmitter emitter = new MyListEmitter();
    ArticleMetadataExtractor amEx =
        new ElsevierDeferredArticleMetadataExtractor(ArticleFiles.ROLE_ARTICLE_METADATA);

    Iterator<ArticleFiles> it = tarAu.getArticleIterator(MetadataTarget.Any());
    while (it.hasNext()) {
      ArticleFiles af = it.next();
      log.debug3("Metadata test - articlefiles " + af.toString());
      // CachedUrl cu = af.getFullTextCu();
      CachedUrl cu = af.getRoleCu(ArticleFiles.ROLE_ARTICLE_METADATA);
      log.debug3("metadata cu is " + cu.getUrl());
      // List<ArticleMetadata> mdlist = mle.extract(MetadataTarget.Any(), cu);
      amEx.extract(MetadataTarget.Any(), af, emitter);
      List<ArticleMetadata> returnList = emitter.getAmList();

      assertNotNull(returnList);
      log.debug3("size of returnList is " + returnList.size());
      Iterator<ArticleMetadata> mdIt = returnList.iterator();
      ArticleMetadata mdRecord = null;
      while (mdIt.hasNext()) {
        mdRecord = (ArticleMetadata) mdIt.next();
        validateCompleteMetadataRecord(mdRecord);
      }
    }
  }
コード例 #3
0
 protected void doDamageRemoveTest(SimulatedArchivalUnit sau) throws Exception {
   /* Cache the file again; this time the damage should be gone */
   String file = sau.getUrlRoot() + DAMAGED_CACHED_URL;
   UrlCacher uc = sau.makeUrlCacher(file);
   BitSet fetchFlags = new BitSet();
   fetchFlags.set(UrlCacher.REFETCH_FLAG);
   uc.setFetchFlags(fetchFlags);
   uc.cache();
   checkUrlContent(sau, DAMAGED_CACHED_URL, 2, 2, 2, false, false);
 }