@SuppressWarnings({"rawtypes", "unchecked"}) @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { this.split = (EtlSplit) split; this.context = context; if (context instanceof Mapper.Context) { mapperContext = (Context) context; } this.skipSchemaErrors = EtlInputFormat.getEtlIgnoreSchemaErrors(context); if (EtlInputFormat.getKafkaMaxPullHrs(context) != -1) { this.maxPullHours = EtlInputFormat.getKafkaMaxPullHrs(context); } else { this.endTimeStamp = Long.MAX_VALUE; } if (EtlInputFormat.getKafkaMaxPullMinutesPerTask(context) != -1) { DateTime now = new DateTime(); this.maxPullTime = now.plusMinutes(EtlInputFormat.getKafkaMaxPullMinutesPerTask(context)).getMillis(); } else { this.maxPullTime = Long.MAX_VALUE; } if (EtlInputFormat.getKafkaMaxHistoricalDays(context) != -1) { int maxDays = EtlInputFormat.getKafkaMaxHistoricalDays(context); beginTimeStamp = (new DateTime()).minusDays(maxDays).getMillis(); } else { beginTimeStamp = 0; } this.totalBytes = this.split.getLength(); System.out.println("Finished executing the initialize part"); }
@Override public boolean nextKeyValue() throws IOException, InterruptedException { // we only pull for a specified time. unfinished work will be // rescheduled in the next // run. if (System.currentTimeMillis() > maxPullTime) { if (reader != null) { closeReader(); } return false; } while (true) { try { if (reader == null || reader.hasNext() == false) { EtlRequest request = split.popRequest(); if (request == null) { return false; } if (maxPullHours > 0) { endTimeStamp = 0; } key.set( request.getTopic(), request.getNodeId(), request.getPartition(), request.getOffset(), request.getOffset(), 0); value = new AvroWrapper<Object>(new Object()); System.out.println( "topic:" + request.getTopic() + " partition:" + request.getPartition() + " beginOffset:" + request.getOffset() + " estimatedLastOffset:" + request.getLastOffset()); statusMsg += statusMsg.length() > 0 ? "; " : ""; statusMsg += request.getTopic() + ":" + request.getNodeId() + ":" + request.getPartition(); context.setStatus(statusMsg); if (reader != null) { closeReader(); } reader = new KafkaReader( request, EtlInputFormat.getKafkaClientTimeout(mapperContext), EtlInputFormat.getKafkaClientBufferSize(mapperContext)); decoder = (MessageDecoder<Message, Record>) MessageDecoderFactory.createMessageDecoder(context, request.getTopic()); } while (reader.getNext(key, msgValue)) { context.progress(); mapperContext.getCounter("total", "data-read").increment(msgValue.getLength()); mapperContext.getCounter("total", "event-count").increment(1); byte[] bytes = getBytes(msgValue); // check the checksum of message Message message = new Message(bytes); long checksum = key.getChecksum(); if (checksum != message.checksum()) { throw new ChecksumException( "Invalid message checksum " + message.checksum() + ". Expected " + key.getChecksum(), key.getOffset()); } long tempTime = System.currentTimeMillis(); CamusWrapper wrapper; try { wrapper = getWrappedRecord(key.getTopic(), message); } catch (Exception e) { mapperContext.write(key, new ExceptionWritable(e)); continue; } if (wrapper == null) { mapperContext.write(key, new ExceptionWritable(new RuntimeException("null record"))); continue; } long timeStamp = wrapper.getTimestamp(); try { key.setTime(timeStamp); key.setServer(wrapper.getServer()); key.setService(wrapper.getService()); } catch (Exception e) { mapperContext.write(key, new ExceptionWritable(e)); continue; } if (timeStamp < beginTimeStamp) { mapperContext.getCounter("total", "skip-old").increment(1); } else if (endTimeStamp == 0) { DateTime time = new DateTime(timeStamp); statusMsg += " begin read at " + time.toString(); context.setStatus(statusMsg); System.out.println(key.getTopic() + " begin read at " + time.toString()); endTimeStamp = (time.plusHours(this.maxPullHours)).getMillis(); } else if (timeStamp > endTimeStamp || System.currentTimeMillis() > maxPullTime) { statusMsg += " max read at " + new DateTime(timeStamp).toString(); context.setStatus(statusMsg); System.out.println( key.getTopic() + " max read at " + new DateTime(timeStamp).toString()); mapperContext.getCounter("total", "request-time(ms)").increment(reader.getFetchTime()); closeReader(); } long secondTime = System.currentTimeMillis(); value.datum(wrapper.getRecord()); long decodeTime = ((secondTime - tempTime)); mapperContext.getCounter("total", "decode-time(ms)").increment(decodeTime); if (reader != null) { mapperContext.getCounter("total", "request-time(ms)").increment(reader.getFetchTime()); } return true; } reader = null; } catch (Throwable t) { Exception e = new Exception(t.getLocalizedMessage(), t); e.setStackTrace(t.getStackTrace()); mapperContext.write(key, new ExceptionWritable(e)); reader = null; continue; } } }