private void addToQueue(FetchSetDatum datum) { if (datum != null) { _flowProcess.increment(FetchCounters.FETCHSETS_QUEUED, 1); _flowProcess.increment(FetchCounters.URLS_QUEUED, datum.getUrls().size()); _queue.add(datum); } }
/** * Empty the buffer, then the iterator, without worrying about mode/state. * * @return */ public FetchSetDatum drain() { if (!_queue.isEmpty()) { return removeFromQueue(); } else if (safeHasNext()) { return new FetchSetDatum(new TupleEntry(_values.next())); } else { return null; } }
/** * Return the top-most item from the queue, or null if the queue is empty. * * @return fetch set from queue */ private FetchSetDatum removeFromQueue() { FetchSetDatum result = _queue.poll(); if (result != null) { _flowProcess.increment(FetchCounters.FETCHSETS_QUEUED, -1); _flowProcess.increment(FetchCounters.URLS_QUEUED, -result.getUrls().size()); } return result; }
@Override public void operate(FlowProcess flowProcess, BufferCall<NullContext> bufferCall) { TupleEntry group = bufferCall.getGroup(); String protocolAndDomain = group.getString(0); LOGGER.info("Processing tuple group: " + group); DiskQueue<GroupedUrlDatum> urls = new DiskQueue<GroupedUrlDatum>(MAX_URLS_IN_MEMORY); Iterator<TupleEntry> values = bufferCall.getArgumentsIterator(); while (values.hasNext()) { urls.add(new GroupedUrlDatum(new TupleEntry(values.next()))); } try { Runnable doRobots = new ProcessRobotsTask( protocolAndDomain, _scorer, urls, _fetcher, _parser, bufferCall.getOutputCollector(), _flowProcess); _executor.execute(doRobots); } catch (RejectedExecutionException e) { // should never happen. LOGGER.error("Robots handling pool rejected our request for " + protocolAndDomain); _flowProcess.increment(FetchCounters.DOMAINS_REJECTED, 1); _flowProcess.increment(FetchCounters.URLS_REJECTED, urls.size()); ProcessRobotsTask.emptyQueue( urls, GroupingKey.DEFERRED_GROUPING_KEY, bufferCall.getOutputCollector(), flowProcess); } catch (Throwable t) { LOGGER.error( "Caught an unexpected throwable - robots handling rejected our request for " + protocolAndDomain, t); _flowProcess.increment(FetchCounters.DOMAINS_REJECTED, 1); _flowProcess.increment(FetchCounters.URLS_REJECTED, urls.size()); ProcessRobotsTask.emptyQueue( urls, GroupingKey.DEFERRED_GROUPING_KEY, bufferCall.getOutputCollector(), flowProcess); } }
public boolean isEmpty() { return _queue.isEmpty() && !safeHasNext(); }