private void markFileAsBad(Path file) { String fileName = file.toString(); String fileNameMinusSuffix = fileName.substring(0, fileName.indexOf(inprogress_suffix)); String originalName = new Path(fileNameMinusSuffix).getName(); Path newFile = new Path(badFilesDirPath + Path.SEPARATOR + originalName); LOG.info( "Moving bad file {} to {}. Processed it till offset {}. SpoutID= {}", originalName, newFile, tracker.getCommitPosition(), spoutId); try { if (!hdfs.rename( file, newFile)) { // seems this can fail by returning false or throwing exception throw new IOException( "Move failed for bad file: " + file); // convert false ret value to exception } } catch (IOException e) { LOG.warn( "Error moving bad file: " + file + " to destination " + newFile + " SpoutId =" + spoutId, e); } closeReaderAndResetTrackers(); }
@Override public void ack(Object msgId) { LOG.trace("Ack received for msg {} on spout {}", msgId, spoutId); if (!ackEnabled) { return; } MessageId id = (MessageId) msgId; inflight.remove(id); ++acksSinceLastCommit; tracker.recordAckedOffset(id.offset); commitProgress(tracker.getCommitPosition()); if (fileReadCompletely && inflight.isEmpty()) { markFileAsDone(reader.getFilePath()); reader = null; } super.ack(msgId); }
public void nextTuple() { LOG.trace("Next Tuple {}", spoutId); // 1) First re-emit any previously failed tuples (from retryList) if (!retryList.isEmpty()) { LOG.debug("Sending tuple from retry list"); HdfsUtils.Pair<MessageId, List<Object>> pair = retryList.remove(); emitData(pair.getValue(), pair.getKey()); return; } if (ackEnabled && tracker.size() >= maxOutstanding) { LOG.warn( "Waiting for more ACKs before generating new tuples. " + "Progress tracker size has reached limit {}, SpoutID {}", maxOutstanding, spoutId); // Don't emit anything .. allow configured spout wait strategy to kick in return; } // 2) If no failed tuples to be retried, then send tuples from hdfs while (true) { try { // 3) Select a new file if one is not open already if (reader == null) { reader = pickNextFile(); if (reader == null) { LOG.debug("Currently no new files to process under : " + sourceDirPath); return; } else { fileReadCompletely = false; } } if (fileReadCompletely) { // wait for more ACKs before proceeding return; } // 4) Read record from file, emit to collector and record progress List<Object> tuple = reader.next(); if (tuple != null) { fileReadCompletely = false; ++tupleCounter; MessageId msgId = new MessageId(tupleCounter, reader.getFilePath(), reader.getFileOffset()); emitData(tuple, msgId); if (!ackEnabled) { ++acksSinceLastCommit; // assume message is immediately ACKed in non-ack mode commitProgress(reader.getFileOffset()); } else { commitProgress(tracker.getCommitPosition()); } return; } else { fileReadCompletely = true; if (!ackEnabled) { markFileAsDone(reader.getFilePath()); } } } catch (IOException e) { LOG.error("I/O Error processing at file location " + getFileProgress(reader), e); // don't emit anything .. allow configured spout wait strategy to kick in return; } catch (ParseException e) { LOG.error( "Parsing error when processing at file location " + getFileProgress(reader) + ". Skipping remainder of file.", e); markFileAsBad(reader.getFilePath()); // Note: We don't return from this method on ParseException to avoid triggering the // spout wait strategy (due to no emits). Instead we go back into the loop and // generate a tuple from next file } } // while }