/** * Lists objects from AmazonS3 in chronological order [lexicographical order if 2 files have same * timestamp] which are later than or equal to the timestamp of the previous offset object * * @param s3Client * @param s3ConfigBean * @param pathMatcher glob patterns to match file name against * @param s3Offset current offset which provides the timestamp of the previous object * @param fetchSize number of objects to fetch in one go * @return * @throws AmazonClientException */ static List<S3ObjectSummary> listObjectsChronologically( AmazonS3Client s3Client, S3ConfigBean s3ConfigBean, PathMatcher pathMatcher, AmazonS3Source.S3Offset s3Offset, int fetchSize) throws AmazonClientException { // Algorithm: // - Full scan all objects that match the file name pattern and which are later than the file in // the offset // - Select the oldest "fetchSize" number of files and return them. TreeSet<S3ObjectSummary> treeSet = new TreeSet<>( new Comparator<S3ObjectSummary>() { @Override public int compare(S3ObjectSummary o1, S3ObjectSummary o2) { int result = o1.getLastModified().compareTo(o2.getLastModified()); if (result != 0) { // same modified time. Use name to sort return result; } return o1.getKey().compareTo(o2.getKey()); } }); S3Objects s3ObjectSummaries = S3Objects.withPrefix(s3Client, s3ConfigBean.s3Config.bucket, s3ConfigBean.s3Config.folder) .withBatchSize(BATCH_SIZE); for (S3ObjectSummary s : s3ObjectSummaries) { String fileName = s.getKey().substring(s3ConfigBean.s3Config.folder.length(), s.getKey().length()); if (!fileName.isEmpty()) { // fileName can be empty. // If the user manually creates a folder "myFolder/mySubFolder" in bucket "myBucket" and // uploads "myObject", // then the first objects returned here are: // myFolder/mySubFolder // myFolder/mySubFolder/myObject // // All is good when pipeline is run but preview returns with no data. So we should ignore // the empty file as it // has no data if (pathMatcher.matches(Paths.get(fileName)) && isEligible(s, s3Offset)) { treeSet.add(s); } if (treeSet.size() > fetchSize) { treeSet.pollLast(); } } } return new ArrayList<>(treeSet); }
static S3ObjectSummary getObjectSummary( AmazonS3Client s3Client, String bucket, String objectKey) { S3ObjectSummary s3ObjectSummary = null; S3Objects s3ObjectSummaries = S3Objects.withPrefix(s3Client, bucket, objectKey); for (S3ObjectSummary s : s3ObjectSummaries) { if (s.getKey().equals(objectKey)) { s3ObjectSummary = s; break; } } return s3ObjectSummary; }
private static void populateFakes3() throws IOException, InterruptedException, URISyntaxException { BasicAWSCredentials credentials = new BasicAWSCredentials("foo", "bar"); s3client = new AmazonS3Client(credentials); s3client.setEndpoint("http://localhost:" + port); s3client.setS3ClientOptions(new S3ClientOptions().withPathStyleAccess(true)); createBucket(s3client, BUCKET_NAME); createBucket(s3client, POSTPROCESS_BUCKET); createBucket(s3client, ERROR_BUCKET); // create directory structure // mybucket/NorthAmerica/USA // mybucket/NorthAmerica/Canada // // write 3 files each under myBucket, myBucket/NorthAmerica, mybucket/NorthAmerica/USA, // mybucket/NorthAmerica/Canada // 12 files in total InputStream in = new ByteArrayInputStream("Hello World".getBytes()); PutObjectRequest putObjectRequest = new PutObjectRequest(BUCKET_NAME, "file1.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "file2.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "file3.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "NorthAmerica/file4.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "NorthAmerica/file5.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "NorthAmerica/file6.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "NorthAmerica/USA/file7.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "NorthAmerica/USA/file8.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "NorthAmerica/USA/file9.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/Canada/file10.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/Canada/file11.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/Canada/file12.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/logArchive1.zip", new FileInputStream(new File(Resources.getResource("logArchive.zip").toURI())), new ObjectMetadata()); s3client.putObject(putObjectRequest); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/logArchive2.zip", new FileInputStream(new File(Resources.getResource("logArchive.zip").toURI())), new ObjectMetadata()); s3client.putObject(putObjectRequest); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/logArchive1.tar.gz", new FileInputStream(new File(Resources.getResource("logArchive.tar.gz").toURI())), new ObjectMetadata()); s3client.putObject(putObjectRequest); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/logArchive2.tar.gz", new FileInputStream(new File(Resources.getResource("logArchive.tar.gz").toURI())), new ObjectMetadata()); s3client.putObject(putObjectRequest); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/testAvro1.tar.gz", new FileInputStream(new File(Resources.getResource("testAvro.tar.gz").toURI())), new ObjectMetadata()); s3client.putObject(putObjectRequest); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/testAvro2.tar.gz", new FileInputStream(new File(Resources.getResource("testAvro.tar.gz").toURI())), new ObjectMetadata()); s3client.putObject(putObjectRequest); int count = 0; if (s3client.doesBucketExist(BUCKET_NAME)) { for (S3ObjectSummary s : S3Objects.withPrefix(s3client, BUCKET_NAME, "")) { System.out.println(s.getKey()); count++; } } Assert.assertEquals(18, count); // 12 files + 3 dirs }