Пример #1
0
  @Override
  public boolean filter(UniMap content) {
    final Date date = content.get(_field);
    if (date != null) {
      final long start = content.get(VISIT_START).getTime();
      final long time = date.getTime();

      if (time > (start + _grace)) {
        content.remove(_field);
      }
    }
    return true;
  }
Пример #2
0
  @Override
  public boolean filter(UniMap order) {
    UniMap revisitOrder = order.remove(REVISIT_ORDER);
    if (revisitOrder != null) {
      _visitCounter.add(revisitOrder);
      return false;
    }

    return true;
  }
Пример #3
0
  @Override
  public void run() {
    // FIXME: Replace with Runnable for privacy?

    _running = true;

    try {
      long now = System.currentTimeMillis();
      while (_running) {

        if (checkWorkPoll(now)) {
          _log.warn("Shutting down after {} generations", _generation);

          synchronized (this) {
            _manager = null;
            _running = false;
          }

          shutdown();
          return;
        }

        UniMap order = _visitQ.acquire(50);
        if (_running && (order != null)) {
          now = order.get(ContentKeys.VISIT_START).getTime();
          _executor.execute(new VisitTask(order));
          _log.debug(
              "Queued order for rldomain {}, depth: {}",
              order.get(ContentKeys.URL).domain(),
              _executor.getQueue().size());
        } else {
          now = System.currentTimeMillis();
        }
      }
    } catch (InterruptedException x) {
      _log.warn("Executor run loop: " + x);
    } finally {
      _log.info("Manager thread exit");
      _manager = null;
      _running = false;
    }
  }
Пример #4
0
  public boolean filter(UniMap content) {
    HTTPSession session = _client.createSession();
    session.setUrl(content.get(URL).toString());

    HTTPSession.Method method = content.get(HTTP_METHOD);
    if (method != null) {
      session.setMethod(method);
    }

    RequestContent reqContent = content.get(REQUEST_CONTENT);
    if (reqContent != null) {
      session.setRequestContent(reqContent);
    }

    CharSequence etag = content.get(ETAG);
    if (etag != null) {
      session.addRequestHeader(new Header("If-None-Match", etag));
    }

    // Add If-Modified-Since (from LAST_VISIT) in RFC 822 format
    Date lastVisit = content.get(LAST_VISIT);
    if (lastVisit != null) {
      session.addRequestHeader(Headers.createDateHeader("If-Modified-Since", lastVisit));
    }
    // FIXME: Use recorded LAST_SUCCESS_VISIT
    // (i.e. last actual fetch for lastVisit) or Last-Modified from
    // last success stored?

    // REQUEST_HEADERS may be added to input content and override same
    // named statically set "fixed" headers.
    List<Header> headers = content.get(REQUEST_HEADERS);
    if (headers != null) {
      for (Header f : _fixedRequestHeaders) {
        if (Headers.getFirst(headers, f.name().toString()) == null) {
          session.addRequestHeader(f);
        }
      }
      session.addRequestHeaders(headers);
    } else {
      session.addRequestHeaders(_fixedRequestHeaders);
    }

    session.setMaxContentLength(_maxContentLength);
    session.setAcceptedContentTypes(_acceptedContentTypes);

    _client.request(session, new Handler(content));

    return true;
  }