示例#1
0
  @Override
  public void process(HandlerContext ctx, CrawlURL url) {
    // location
    String loc = url.getResponse().getHeader(HttpHeaders.Names.LOCATION);
    // no location, proceed the pipeline
    if (loc == null) {
      ctx.proceed();
      return;
    }

    try {
      // location found, 跳过ExtracxtorHttp等后续,直接进入candidate handler
      addCandidateFromHeader(url, loc);
      ctx.jumpTo(candidateHandlerName);
    } catch (MalformedURLException e) {
      // location found but broken,视该URL为broken的,停止继续处理
      url.setNeedRetry(false);
      ctx.finish();
    }
  }