Ejemplo n.º 1
0
    private void addToQueue(FetchSetDatum datum) {
      if (datum != null) {
        _flowProcess.increment(FetchCounters.FETCHSETS_QUEUED, 1);
        _flowProcess.increment(FetchCounters.URLS_QUEUED, datum.getUrls().size());

        _queue.add(datum);
      }
    }
Ejemplo n.º 2
0
  @Override
  public void operate(FlowProcess flowProcess, BufferCall<NullContext> bufferCall) {
    TupleEntry group = bufferCall.getGroup();
    String protocolAndDomain = group.getString(0);
    LOGGER.info("Processing tuple group: " + group);

    DiskQueue<GroupedUrlDatum> urls = new DiskQueue<GroupedUrlDatum>(MAX_URLS_IN_MEMORY);
    Iterator<TupleEntry> values = bufferCall.getArgumentsIterator();
    while (values.hasNext()) {
      urls.add(new GroupedUrlDatum(new TupleEntry(values.next())));
    }

    try {
      Runnable doRobots =
          new ProcessRobotsTask(
              protocolAndDomain,
              _scorer,
              urls,
              _fetcher,
              _parser,
              bufferCall.getOutputCollector(),
              _flowProcess);
      _executor.execute(doRobots);
    } catch (RejectedExecutionException e) {
      // should never happen.
      LOGGER.error("Robots handling pool rejected our request for " + protocolAndDomain);
      _flowProcess.increment(FetchCounters.DOMAINS_REJECTED, 1);
      _flowProcess.increment(FetchCounters.URLS_REJECTED, urls.size());
      ProcessRobotsTask.emptyQueue(
          urls, GroupingKey.DEFERRED_GROUPING_KEY, bufferCall.getOutputCollector(), flowProcess);
    } catch (Throwable t) {
      LOGGER.error(
          "Caught an unexpected throwable - robots handling rejected our request for "
              + protocolAndDomain,
          t);
      _flowProcess.increment(FetchCounters.DOMAINS_REJECTED, 1);
      _flowProcess.increment(FetchCounters.URLS_REJECTED, urls.size());
      ProcessRobotsTask.emptyQueue(
          urls, GroupingKey.DEFERRED_GROUPING_KEY, bufferCall.getOutputCollector(), flowProcess);
    }
  }