@Override public boolean filter(UniMap content) { final Date date = content.get(_field); if (date != null) { final long start = content.get(VISIT_START).getTime(); final long time = date.getTime(); if (time > (start + _grace)) { content.remove(_field); } } return true; }
@Override public boolean filter(UniMap order) { UniMap revisitOrder = order.remove(REVISIT_ORDER); if (revisitOrder != null) { _visitCounter.add(revisitOrder); return false; } return true; }
@Override public void run() { // FIXME: Replace with Runnable for privacy? _running = true; try { long now = System.currentTimeMillis(); while (_running) { if (checkWorkPoll(now)) { _log.warn("Shutting down after {} generations", _generation); synchronized (this) { _manager = null; _running = false; } shutdown(); return; } UniMap order = _visitQ.acquire(50); if (_running && (order != null)) { now = order.get(ContentKeys.VISIT_START).getTime(); _executor.execute(new VisitTask(order)); _log.debug( "Queued order for rldomain {}, depth: {}", order.get(ContentKeys.URL).domain(), _executor.getQueue().size()); } else { now = System.currentTimeMillis(); } } } catch (InterruptedException x) { _log.warn("Executor run loop: " + x); } finally { _log.info("Manager thread exit"); _manager = null; _running = false; } }
public boolean filter(UniMap content) { HTTPSession session = _client.createSession(); session.setUrl(content.get(URL).toString()); HTTPSession.Method method = content.get(HTTP_METHOD); if (method != null) { session.setMethod(method); } RequestContent reqContent = content.get(REQUEST_CONTENT); if (reqContent != null) { session.setRequestContent(reqContent); } CharSequence etag = content.get(ETAG); if (etag != null) { session.addRequestHeader(new Header("If-None-Match", etag)); } // Add If-Modified-Since (from LAST_VISIT) in RFC 822 format Date lastVisit = content.get(LAST_VISIT); if (lastVisit != null) { session.addRequestHeader(Headers.createDateHeader("If-Modified-Since", lastVisit)); } // FIXME: Use recorded LAST_SUCCESS_VISIT // (i.e. last actual fetch for lastVisit) or Last-Modified from // last success stored? // REQUEST_HEADERS may be added to input content and override same // named statically set "fixed" headers. List<Header> headers = content.get(REQUEST_HEADERS); if (headers != null) { for (Header f : _fixedRequestHeaders) { if (Headers.getFirst(headers, f.name().toString()) == null) { session.addRequestHeader(f); } } session.addRequestHeaders(headers); } else { session.addRequestHeaders(_fixedRequestHeaders); } session.setMaxContentLength(_maxContentLength); session.setAcceptedContentTypes(_acceptedContentTypes); _client.request(session, new Handler(content)); return true; }