コード例 #1
0
  /** generate a bundle from the given list of items and simultaneously flush it * */
  void generateABundle(long domainFP, List<SegmentGeneratorItem> items, Reporter reporter)
      throws IOException {

    SegmentGeneratorItemBundle bundle = getBundleForDomain(domainFP);

    // LOG.info("Generating Bundle:" + currentBundleId + " for DH:" + domainFP);
    float maxPageRank = 0.0f;
    for (SegmentGeneratorItem item : items) {
      // LOG.info("URL:" + item.getUrl() + " Status:" +
      // CrawlDatum.getStatusName(item.getStatus()) +" PR:" +
      // item.getMetadata().getPageRank());
      bundle.getUrls().add(item);
      currentDomainURLCount++;
      maxPageRank = Math.max(maxPageRank, item.getPageRank());

      if (currentDomainURLCount <= 200) {
        urlDebugURLWriter.append(
            item.getUrl() + "\t" + item.getModifiedStatus() + "\t" + item.getPageRank() + "\n");
      }
    }
    // LOG.info("Done Generating Bunlde - PR is:" + maxPageRank);

    // set page rank for bundle
    bundle.setMaxPageRank(maxPageRank);

    flushCurrentBundle(reporter);
  }