/** * Aligns segments to match record boundaries (where a record boundary = a new line). If not * possible (max record size = 4MB), throws an exception. * * @param metadata The metadata to realign * @throws IOException Thrown if the input file path in the metadata is invalid or inaccessible. * @throws UploadFailedException Thrown if the length adjustment cannot be determined. */ private void alignSegmentsToRecordBoundaries(UploadMetadata metadata) throws IOException, UploadFailedException { int remainingSegments = 0; try (RandomAccessFile stream = new RandomAccessFile(metadata.getInputFilePath(), "r")) { long offset = 0; for (int i = 0; i < metadata.getSegments().length; i++) { UploadSegmentMetadata segment = metadata.getSegments()[i]; // updating segment lengths means that both the offset and the length of the next segment // needs to be recalculated, to keep the segment lengths somewhat balanced long diff = segment.getOffset() - offset; segment.setOffset(offset); segment.setLength(segment.getLength() + diff); if (segment.getOffset() >= metadata.getFileLength()) { continue; } if (segment.getSegmentNumber() == metadata.getSegments().length - 1) { // last segment picks up the slack segment.setLength(metadata.getFileLength() - segment.getOffset()); } else { // figure out how much do we need to adjust the length of the segment so it ends on a // record boundary (this can be negative or positive) int lengthAdjustment = determineLengthAdjustment( segment, stream, Charset.forName(metadata.getEncodingName()), metadata.getDelimiter()) + 1; // adjust segment length and offset segment.setLength(segment.getLength() + lengthAdjustment); } offset += segment.getLength(); remainingSegments++; } } // since we adjusted the segment lengths, it's possible that the last segment(s) became of zero // length; so remove it UploadSegmentMetadata[] segments = metadata.getSegments(); if (remainingSegments < segments.length) { ArrayUtils.subarray(segments, 0, remainingSegments); metadata.setSegments(segments); metadata.setSegmentCount(segments.length); } // NOTE: we are not validating consistency here; this method is called by createNewMetadata // which calls save() after this, which validates consistency anyway. }