Exemple #1
0
 @Override
 public void collect(Tuple tuple) {
   // Prevent two bad things from happening:
   // 1. Somebody changes _keepCollecting after we've tested that it's true
   // 2. Two people calling collector.add() at the same time (it's not thread safe)
   synchronized (_keepCollecting) {
     if (_keepCollecting.get()) {
       _collector.add(BixoPlatform.clone(tuple, _flowProcess));
     } else {
       LOGGER.warn("Losing an entry: " + tuple);
     }
   }
 }
Exemple #2
0
  private void skipUrls(List<ScoredUrlDatum> urls, UrlStatus status, String traceMsg) {
    for (ScoredUrlDatum datum : urls) {
      FetchedDatum result = new FetchedDatum(datum);
      Tuple tuple = result.getTuple();
      tuple.add(status.toString());
      _collector.add(BixoPlatform.clone(tuple, _flowProcess));
    }

    _flowProcess.increment(FetchCounters.URLS_SKIPPED, urls.size());
    if (status == UrlStatus.SKIPPED_PER_SERVER_LIMIT) {
      _flowProcess.increment(FetchCounters.URLS_SKIPPED_PER_SERVER_LIMIT, urls.size());
    }

    if ((traceMsg != null) && LOGGER.isTraceEnabled()) {
      LOGGER.trace(String.format(traceMsg, urls.size()));
    }
  }