protected void checkLeaf(SimulatedArchivalUnit sau) {
   log.debug("checkLeaf()");
   String parent = sau.getUrlRoot() + "/branch1";
   CachedUrlSetSpec spec = new RangeCachedUrlSetSpec(parent);
   CachedUrlSet set = sau.makeCachedUrlSet(spec);
   Iterator setIt = set.contentHashIterator();
   ArrayList childL = new ArrayList(16);
   while (setIt.hasNext()) {
     childL.add(((CachedUrlSetNode) setIt.next()).getUrl());
   }
   String[] expectedA =
       new String[] {
         parent,
         parent + "/001file.html",
         parent + "/001file.txt",
         parent + "/002file.html",
         parent + "/002file.txt",
         parent + "/branch1",
         parent + "/branch1/001file.html",
         parent + "/branch1/001file.txt",
         parent + "/branch1/002file.html",
         parent + "/branch1/002file.txt",
         parent + "/branch1/index.html",
         parent + "/branch2",
         parent + "/branch2/001file.html",
         parent + "/branch2/001file.txt",
         parent + "/branch2/002file.html",
         parent + "/branch2/002file.txt",
         parent + "/branch2/index.html",
         parent + "/index.html",
       };
   assertIsomorphic(expectedA, childL);
 }
  protected void checkRoot(SimulatedArchivalUnit sau) {
    log.debug("checkRoot()");
    CachedUrlSet set = sau.getAuCachedUrlSet();
    Iterator setIt = set.flatSetIterator();
    ArrayList childL = new ArrayList(1);
    CachedUrlSet cus = null;
    while (setIt.hasNext()) {
      cus = (CachedUrlSet) setIt.next();
      childL.add(cus.getUrl());
    }

    String urlRoot = sau.getUrlRoot();

    String[] expectedA = new String[1];
    expectedA[0] = urlRoot;
    assertIsomorphic(expectedA, childL);

    setIt = cus.flatSetIterator();
    childL = new ArrayList(7);
    while (setIt.hasNext()) {
      childL.add(((CachedUrlSetNode) setIt.next()).getUrl());
    }

    expectedA =
        new String[] {
          urlRoot + "/001file.html",
          urlRoot + "/001file.txt",
          urlRoot + "/002file.html",
          urlRoot + "/002file.txt",
          urlRoot + "/branch1",
          urlRoot + "/branch2",
          urlRoot + "/index.html"
        };
    assertIsomorphic(expectedA, childL);
  }
  public void testFunctionalFromTarHierarchy() throws Exception {
    log.debug3("in testFromTarHierarchy");
    // load the tarballs
    InputStream file_input = null;
    try {
      file_input = getResourceAsStream(realTARFile_A);
      // UrlCacher uc = au.makeUrlCacher(TAR_A_BASE);
      // uc.storeContent(file_input, tarHeader);
      UrlCacher uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_A_BASE));
      uc.storeContent();
      IOUtil.safeClose(file_input);

      file_input = getResourceAsStream(realTARFile_B);
      // uc = au.makeUrlCacher(TAR_B_BASE);
      // uc.storeContent(file_input, tarHeader);
      uc = tarAu.makeUrlCacher(new UrlData(file_input, tarHeader, TAR_B_BASE));
      uc.storeContent();
      IOUtil.safeClose(file_input);

    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      IOUtil.safeClose(file_input);
    }

    CachedUrlSet cus = tarAu.getAuCachedUrlSet();
    for (CachedUrl cu : cus.getCuIterable()) {
      log.debug3("AU - cu is: " + cu.getUrl());
      cu.release();
    }

    // We need to start from the level of the ArticleMetadataExtractor
    MyListEmitter emitter = new MyListEmitter();
    ArticleMetadataExtractor amEx =
        new ElsevierDeferredArticleMetadataExtractor(ArticleFiles.ROLE_ARTICLE_METADATA);

    Iterator<ArticleFiles> it = tarAu.getArticleIterator(MetadataTarget.Any());
    while (it.hasNext()) {
      ArticleFiles af = it.next();
      log.debug3("Metadata test - articlefiles " + af.toString());
      // CachedUrl cu = af.getFullTextCu();
      CachedUrl cu = af.getRoleCu(ArticleFiles.ROLE_ARTICLE_METADATA);
      log.debug3("metadata cu is " + cu.getUrl());
      // List<ArticleMetadata> mdlist = mle.extract(MetadataTarget.Any(), cu);
      amEx.extract(MetadataTarget.Any(), af, emitter);
      List<ArticleMetadata> returnList = emitter.getAmList();

      assertNotNull(returnList);
      log.debug3("size of returnList is " + returnList.size());
      Iterator<ArticleMetadata> mdIt = returnList.iterator();
      ArticleMetadata mdRecord = null;
      while (mdIt.hasNext()) {
        mdRecord = (ArticleMetadata) mdIt.next();
        validateCompleteMetadataRecord(mdRecord);
      }
    }
  }
Example #4
0
 /** Setup common to most constructors */
 private void commonSetup(CachedUrlSet cus, String lwrBound, String uprBound, int pollType) {
   CachedUrlSetSpec cuss = cus.getSpec();
   if (cuss instanceof PrunedCachedUrlSetSpec) {
     throw new IllegalArgumentException("Polls do not support PrunedCachedUrlSetSpec");
   }
   this.cus = cus;
   ArchivalUnit au = cus.getArchivalUnit();
   auId = au.getAuId();
   this.pluginVersion = AuUtil.getPollVersion(au);
   url = cuss.getUrl();
   this.lwrBound = lwrBound;
   this.uprBound = uprBound;
   this.protocolVersion = protocolVersionFromPollType(pollType);
   this.pollType = pollType;
 }
Example #5
0
 /**
  * Construct a PollSpec from a CachedUrlSet.
  *
  * @param cus the CachedUrlSpec which defines the range of interest
  * @param pollType one of the types defined by Poll
  */
 public PollSpec(CachedUrlSet cus, int pollType) {
   CachedUrlSetSpec cuss = cus.getSpec();
   if (cuss instanceof RangeCachedUrlSetSpec) {
     RangeCachedUrlSetSpec rcuss = (RangeCachedUrlSetSpec) cuss;
     commonSetup(cus, rcuss.getLowerBound(), rcuss.getUpperBound(), pollType);
   } else if (cuss.isSingleNode()) {
     commonSetup(cus, SINGLE_NODE_LWRBOUND, null, pollType);
   } else {
     commonSetup(cus, null, null, pollType);
   }
 }
 private void hash(CachedUrlSet set, MessageDigest dig, boolean namesOnly) throws IOException {
   CachedUrlSetHasher hasher = null;
   if (namesOnly) {
     hasher = set.getNameHasher(dig);
   } else {
     hasher = set.getContentHasher(dig);
   }
   int bytesHashed = 0;
   long timeTaken = System.currentTimeMillis();
   while (!hasher.finished()) {
     bytesHashed += hasher.hashStep(256);
   }
   timeTaken = System.currentTimeMillis() - timeTaken;
   if ((timeTaken > 0) && (bytesHashed > 500)) {
     System.out.println("Bytes hashed: " + bytesHashed);
     System.out.println("Time taken: " + timeTaken + "ms");
     System.out.println("Bytes/sec: " + (bytesHashed * 1000 / timeTaken));
   } else {
     System.out.println("No time taken, or insufficient bytes hashed.");
     System.out.println("Bytes hashed: " + bytesHashed);
     System.out.println("Time taken: " + timeTaken + "ms");
   }
 }
 private void measureHashSpeed(SimulatedArchivalUnit sau) throws Exception {
   MessageDigest dig = null;
   try {
     dig = MessageDigest.getInstance("SHA-1");
   } catch (NoSuchAlgorithmException ex) {
     fail("No algorithm.");
   }
   CachedUrlSet set = sau.getAuCachedUrlSet();
   CachedUrlSetHasher hasher = set.getContentHasher(dig);
   SystemMetrics metrics = theDaemon.getSystemMetrics();
   int estimate = metrics.getBytesPerMsHashEstimate(hasher, dig);
   // should be protected against this being zero by MyMockSystemMetrics,
   // but otherwise use the proper calculation.  This avoids test failure
   // due to really slow machines
   assertTrue(estimate > 0);
   long estimatedTime = set.estimatedHashDuration();
   long size = ((Long) PrivilegedAccessor.getValue(set, "totalNodeSize")).longValue();
   assertTrue(size > 0);
   System.out.println("b/ms: " + estimate);
   System.out.println("size: " + size);
   System.out.println("estimate: " + estimatedTime);
   assertEquals(estimatedTime, theDaemon.getHashService().padHashEstimate(size / estimate));
 }