Example #1
0
 private void closeReader() throws IOException {
   if (reader != null) {
     try {
       readBytes += reader.getReadBytes();
       reader.close();
     } catch (Exception e) {
       // not much to do here but skip the task
     } finally {
       reader = null;
     }
   }
 }
Example #2
0
 private long getPos() throws IOException {
   if (reader != null) {
     return readBytes + reader.getReadBytes();
   } else {
     return readBytes;
   }
 }
Example #3
0
  @Override
  public boolean nextKeyValue() throws IOException, InterruptedException {

    // we only pull for a specified time. unfinished work will be
    // rescheduled in the next
    // run.
    if (System.currentTimeMillis() > maxPullTime) {
      if (reader != null) {
        closeReader();
      }
      return false;
    }

    while (true) {
      try {
        if (reader == null || reader.hasNext() == false) {
          EtlRequest request = split.popRequest();
          if (request == null) {
            return false;
          }

          if (maxPullHours > 0) {
            endTimeStamp = 0;
          }

          key.set(
              request.getTopic(),
              request.getNodeId(),
              request.getPartition(),
              request.getOffset(),
              request.getOffset(),
              0);
          value = new AvroWrapper<Object>(new Object());

          System.out.println(
              "topic:"
                  + request.getTopic()
                  + " partition:"
                  + request.getPartition()
                  + " beginOffset:"
                  + request.getOffset()
                  + " estimatedLastOffset:"
                  + request.getLastOffset());

          statusMsg += statusMsg.length() > 0 ? "; " : "";
          statusMsg +=
              request.getTopic() + ":" + request.getNodeId() + ":" + request.getPartition();
          context.setStatus(statusMsg);

          if (reader != null) {
            closeReader();
          }
          reader =
              new KafkaReader(
                  request,
                  EtlInputFormat.getKafkaClientTimeout(mapperContext),
                  EtlInputFormat.getKafkaClientBufferSize(mapperContext));

          decoder =
              (MessageDecoder<Message, Record>)
                  MessageDecoderFactory.createMessageDecoder(context, request.getTopic());
        }

        while (reader.getNext(key, msgValue)) {
          context.progress();
          mapperContext.getCounter("total", "data-read").increment(msgValue.getLength());
          mapperContext.getCounter("total", "event-count").increment(1);
          byte[] bytes = getBytes(msgValue);

          // check the checksum of message
          Message message = new Message(bytes);
          long checksum = key.getChecksum();
          if (checksum != message.checksum()) {
            throw new ChecksumException(
                "Invalid message checksum "
                    + message.checksum()
                    + ". Expected "
                    + key.getChecksum(),
                key.getOffset());
          }

          long tempTime = System.currentTimeMillis();
          CamusWrapper wrapper;
          try {
            wrapper = getWrappedRecord(key.getTopic(), message);
          } catch (Exception e) {
            mapperContext.write(key, new ExceptionWritable(e));
            continue;
          }

          if (wrapper == null) {
            mapperContext.write(key, new ExceptionWritable(new RuntimeException("null record")));
            continue;
          }

          long timeStamp = wrapper.getTimestamp();
          try {
            key.setTime(timeStamp);
            key.setServer(wrapper.getServer());
            key.setService(wrapper.getService());
          } catch (Exception e) {
            mapperContext.write(key, new ExceptionWritable(e));
            continue;
          }

          if (timeStamp < beginTimeStamp) {
            mapperContext.getCounter("total", "skip-old").increment(1);
          } else if (endTimeStamp == 0) {
            DateTime time = new DateTime(timeStamp);
            statusMsg += " begin read at " + time.toString();
            context.setStatus(statusMsg);
            System.out.println(key.getTopic() + " begin read at " + time.toString());
            endTimeStamp = (time.plusHours(this.maxPullHours)).getMillis();
          } else if (timeStamp > endTimeStamp || System.currentTimeMillis() > maxPullTime) {
            statusMsg += " max read at " + new DateTime(timeStamp).toString();
            context.setStatus(statusMsg);
            System.out.println(
                key.getTopic() + " max read at " + new DateTime(timeStamp).toString());
            mapperContext.getCounter("total", "request-time(ms)").increment(reader.getFetchTime());
            closeReader();
          }

          long secondTime = System.currentTimeMillis();
          value.datum(wrapper.getRecord());
          long decodeTime = ((secondTime - tempTime));

          mapperContext.getCounter("total", "decode-time(ms)").increment(decodeTime);

          if (reader != null) {
            mapperContext.getCounter("total", "request-time(ms)").increment(reader.getFetchTime());
          }
          return true;
        }
        reader = null;
      } catch (Throwable t) {
        Exception e = new Exception(t.getLocalizedMessage(), t);
        e.setStackTrace(t.getStackTrace());
        mapperContext.write(key, new ExceptionWritable(e));
        reader = null;
        continue;
      }
    }
  }
Example #4
0
 @Override
 public synchronized void close() throws IOException {
   if (reader != null) {
     reader.close();
   }
 }