Beispiel #1
0
  private void markFileAsBad(Path file) {
    String fileName = file.toString();
    String fileNameMinusSuffix = fileName.substring(0, fileName.indexOf(inprogress_suffix));
    String originalName = new Path(fileNameMinusSuffix).getName();
    Path newFile = new Path(badFilesDirPath + Path.SEPARATOR + originalName);

    LOG.info(
        "Moving bad file {} to {}. Processed it till offset {}. SpoutID= {}",
        originalName,
        newFile,
        tracker.getCommitPosition(),
        spoutId);
    try {
      if (!hdfs.rename(
          file, newFile)) { // seems this can fail by returning false or throwing exception
        throw new IOException(
            "Move failed for bad file: " + file); // convert false ret value to exception
      }
    } catch (IOException e) {
      LOG.warn(
          "Error moving bad file: " + file + " to destination " + newFile + " SpoutId =" + spoutId,
          e);
    }
    closeReaderAndResetTrackers();
  }
Beispiel #2
0
 @Override
 public void ack(Object msgId) {
   LOG.trace("Ack received for msg {} on spout {}", msgId, spoutId);
   if (!ackEnabled) {
     return;
   }
   MessageId id = (MessageId) msgId;
   inflight.remove(id);
   ++acksSinceLastCommit;
   tracker.recordAckedOffset(id.offset);
   commitProgress(tracker.getCommitPosition());
   if (fileReadCompletely && inflight.isEmpty()) {
     markFileAsDone(reader.getFilePath());
     reader = null;
   }
   super.ack(msgId);
 }
Beispiel #3
0
  public void nextTuple() {
    LOG.trace("Next Tuple {}", spoutId);
    // 1) First re-emit any previously failed tuples (from retryList)
    if (!retryList.isEmpty()) {
      LOG.debug("Sending tuple from retry list");
      HdfsUtils.Pair<MessageId, List<Object>> pair = retryList.remove();
      emitData(pair.getValue(), pair.getKey());
      return;
    }

    if (ackEnabled && tracker.size() >= maxOutstanding) {
      LOG.warn(
          "Waiting for more ACKs before generating new tuples. "
              + "Progress tracker size has reached limit {}, SpoutID {}",
          maxOutstanding,
          spoutId);
      // Don't emit anything .. allow configured spout wait strategy to kick in
      return;
    }

    // 2) If no failed tuples to be retried, then send tuples from hdfs
    while (true) {
      try {
        // 3) Select a new file if one is not open already
        if (reader == null) {
          reader = pickNextFile();
          if (reader == null) {
            LOG.debug("Currently no new files to process under : " + sourceDirPath);
            return;
          } else {
            fileReadCompletely = false;
          }
        }
        if (fileReadCompletely) { // wait for more ACKs before proceeding
          return;
        }
        // 4) Read record from file, emit to collector and record progress
        List<Object> tuple = reader.next();
        if (tuple != null) {
          fileReadCompletely = false;
          ++tupleCounter;
          MessageId msgId =
              new MessageId(tupleCounter, reader.getFilePath(), reader.getFileOffset());
          emitData(tuple, msgId);

          if (!ackEnabled) {
            ++acksSinceLastCommit; // assume message is immediately ACKed in non-ack mode
            commitProgress(reader.getFileOffset());
          } else {
            commitProgress(tracker.getCommitPosition());
          }
          return;
        } else {
          fileReadCompletely = true;
          if (!ackEnabled) {
            markFileAsDone(reader.getFilePath());
          }
        }
      } catch (IOException e) {
        LOG.error("I/O Error processing at file location " + getFileProgress(reader), e);
        // don't emit anything .. allow configured spout wait strategy to kick in
        return;
      } catch (ParseException e) {
        LOG.error(
            "Parsing error when processing at file location "
                + getFileProgress(reader)
                + ". Skipping remainder of file.",
            e);
        markFileAsBad(reader.getFilePath());
        // Note: We don't return from this method on ParseException to avoid triggering the
        // spout wait strategy (due to no emits). Instead we go back into the loop and
        // generate a tuple from next file
      }
    } // while
  }