/** * Attempts to load the metadata from an existing file in its canonical location. * * @param metadataFilePath The metadata file path. * @return The deserialized {@link UploadMetadata} from the specified file path. * @throws FileNotFoundException Thrown if the specified metadataFilePath is invalid. * @throws InvalidMetadataException Thrown if the metadata itself is invalid. */ public UploadMetadata getExistingMetadata(String metadataFilePath) throws FileNotFoundException, InvalidMetadataException { // load from file (based on input parameters) UploadMetadata metadata = UploadMetadata.loadFrom(metadataFilePath); metadata.validateConsistency(); return metadata; }
/** * Creates a new metadata based on the given input parameters, and saves it to its canonical * location. * * @param metadataFilePath Where the serialized metadata will be saved * @return A new {@link UploadMetadata} object. * @throws IOException Thrown if there is an issue saving the metadata to disk. * @throws UploadFailedException Thrown if there is an issue aligning the segment record * boundaries * @throws InvalidMetadataException Thrown if the metadata is invalid. */ public UploadMetadata createNewMetadata(String metadataFilePath) throws IOException, UploadFailedException, InvalidMetadataException { // determine segment count, segment length and upload Id // create metadata UploadMetadata metadata = new UploadMetadata(metadataFilePath, parameters); if (!parameters.isBinary() && metadata.getSegmentCount() > 1) { this.alignSegmentsToRecordBoundaries(metadata); } // save the initial version metadata.save(); return metadata; }
/** * Aligns segments to match record boundaries (where a record boundary = a new line). If not * possible (max record size = 4MB), throws an exception. * * @param metadata The metadata to realign * @throws IOException Thrown if the input file path in the metadata is invalid or inaccessible. * @throws UploadFailedException Thrown if the length adjustment cannot be determined. */ private void alignSegmentsToRecordBoundaries(UploadMetadata metadata) throws IOException, UploadFailedException { int remainingSegments = 0; try (RandomAccessFile stream = new RandomAccessFile(metadata.getInputFilePath(), "r")) { long offset = 0; for (int i = 0; i < metadata.getSegments().length; i++) { UploadSegmentMetadata segment = metadata.getSegments()[i]; // updating segment lengths means that both the offset and the length of the next segment // needs to be recalculated, to keep the segment lengths somewhat balanced long diff = segment.getOffset() - offset; segment.setOffset(offset); segment.setLength(segment.getLength() + diff); if (segment.getOffset() >= metadata.getFileLength()) { continue; } if (segment.getSegmentNumber() == metadata.getSegments().length - 1) { // last segment picks up the slack segment.setLength(metadata.getFileLength() - segment.getOffset()); } else { // figure out how much do we need to adjust the length of the segment so it ends on a // record boundary (this can be negative or positive) int lengthAdjustment = determineLengthAdjustment( segment, stream, Charset.forName(metadata.getEncodingName()), metadata.getDelimiter()) + 1; // adjust segment length and offset segment.setLength(segment.getLength() + lengthAdjustment); } offset += segment.getLength(); remainingSegments++; } } // since we adjusted the segment lengths, it's possible that the last segment(s) became of zero // length; so remove it UploadSegmentMetadata[] segments = metadata.getSegments(); if (remainingSegments < segments.length) { ArrayUtils.subarray(segments, 0, remainingSegments); metadata.setSegments(segments); metadata.setSegmentCount(segments.length); } // NOTE: we are not validating consistency here; this method is called by createNewMetadata // which calls save() after this, which validates consistency anyway. }