Exemple #1
0
 private void processWritableKey(SelectionKey key) {
   CrawlURL url = (CrawlURL) key.attachment();
   SocketChannel channel = (SocketChannel) key.channel();
   ByteBuffer buffer = (ByteBuffer) url.getHandlerAttr(_REQUEST_BUFFER);
   try {
     // 发送http请求,若发送完成,取消OP_WRITE。
     int writtenBytes = 0;
     for (int i = WRITE_SPIN_COUNT; i > 0; i--) {
       writtenBytes = channel.write(buffer);
       // write success
       if (writtenBytes != 0) {
         url.setHandlerAttr(_LAST_SEND_REQUEST_MILLIS, System.currentTimeMillis());
         url.setHandlerAttr(
             _REQUEST_ALREADY_SEND_SIZE,
             (Integer) url.getHandlerAttr(_REQUEST_ALREADY_SEND_SIZE) + writtenBytes);
         url.setHandlerAttr(
             _REQUEST_SEND_TIMES, (Integer) url.getHandlerAttr(_REQUEST_SEND_TIMES) + 1);
         break;
       }
     }
     boolean reqSendFinished = !buffer.hasRemaining();
     url.setHandlerAttr(_REQUEST_SEND_FINISHED, reqSendFinished);
     url.setHandlerAttr(_REQUEST_SEND_FINISHED_MILLIS, reqSendFinished);
     if (reqSendFinished) {
       url.removeHandlerAttr(_REQUEST_BUFFER);
       key.interestOps(key.interestOps() & ~SelectionKey.OP_WRITE);
     }
   } catch (IOException e) {
     logger.error("error send http request ! URL: " + url);
     cancelAndClose(key);
     url.setFetchStatus(FETCH_FAILED);
     url.getPipeline().resume(DefaultPipeline.EMPTY_MSG);
   }
 }
Exemple #2
0
  private void processReadableKey(SelectionKey key) {
    /**
     *
     *
     * <pre>
     * 这里暂时使用一连串的Heap-based ByteBuffer来保存每次读取到的网页数据,
     *
     * TODO:
     * 1. 使用directByteBuffer,bytebuffer池化/一次性分配大的然后slice?
     * 2. 读大小预测器 -- 保存在CrawURI中,每服务器一个
     * 3. resume时传递的message不用bytebuffer,拷贝到一个相同大小的HeapChannelBuffer中; byteBuffer只在执行真正的网络IO时使用
     * 4. CrawURI中使用一个composite channel buffer,每次将第三步中的buffer合并进去,减少拷贝次数
     * </pre>
     */
    ByteBuffer buffer = this.receiveBuffer;
    buffer.clear();
    SocketChannel channel = (SocketChannel) key.channel();
    CrawlURL uri = (CrawlURL) key.attachment();

    int ret = 0;
    int readBytes = 0;
    try {
      while ((ret = channel.read(buffer)) > 0) { // 在低速网络情况下会抛出:java.io.IOException:
        // 远程主机强迫关闭了一个现有的连接。
        readBytes += ret;
        if (!buffer.hasRemaining()) {
          break;
        }
      }
      // 读取完毕了?设置URI的状态
      uri.setFetchStatus(ret < 0 ? FETCH_SUCCESSED : FETCH_ING);
      // 若本次读到了数据,无论是否读取完毕均resume pipeline执行,并将读取到的数据传递出去
      if (readBytes > 0) {
        // 从DirectBuffer拷贝数据到一个compact的Heap ByteBuffer,传递出去
        ByteBuffer msg = ByteBuffer.allocate(buffer.position());
        buffer.flip();
        msg.put(buffer);
        uri.getPipeline().resume(msg);
      }

    } catch (IOException e) {
      Object lastSendTime = uri.getHandlerAttr(_LAST_SEND_REQUEST_MILLIS);
      Long conTime = (Long) uri.getHandlerAttr(_CONNECT_SUCCESS_MILLIS);
      Integer sendReqTimes = (Integer) uri.getHandlerAttr(_REQUEST_SEND_TIMES);
      Integer sendBytes = (Integer) uri.getHandlerAttr(_REQUEST_ALREADY_SEND_SIZE);
      Integer requestSize = (Integer) uri.getHandlerAttr(_REQUEST_SIZE);
      long now = System.currentTimeMillis();
      String debug = "\n";
      if (lastSendTime != null) {
        debug += "距上次发送request时间(s):" + ((now - (Long) lastSendTime) / 1000);
        debug += "\n一共发送request次数:" + sendReqTimes;
        debug += "\n一共发送字节:" + sendBytes;
        debug += "\n请求共有字节:" + requestSize;
      } else {
        debug += "未发送过request,距连接成功时间(s):" + ((now - conTime) / 1000);
      }
      logger.error("error read http response ! URL: " + uri + debug, e);
      cancelAndClose(key);
      // TODO 读取响应失败,重试?
      uri.setFetchStatus(FETCH_FAILED);
      uri.getPipeline().resume(DefaultPipeline.EMPTY_MSG);
    }

    // 如果数据读取完毕,取消注册,关闭连接
    if (ret < 0) {
      cancelAndClose(key);
    }
  }