private int getObjectCount(AmazonS3Client s3Client, String bucket) { int count = 0; for (S3ObjectSummary s : S3Objects.inBucket(s3Client, bucket)) { count++; } return count; }
/** * Lists objects from AmazonS3 in chronological order [lexicographical order if 2 files have same * timestamp] which are later than or equal to the timestamp of the previous offset object * * @param s3Client * @param s3ConfigBean * @param pathMatcher glob patterns to match file name against * @param s3Offset current offset which provides the timestamp of the previous object * @param fetchSize number of objects to fetch in one go * @return * @throws AmazonClientException */ static List<S3ObjectSummary> listObjectsChronologically( AmazonS3Client s3Client, S3ConfigBean s3ConfigBean, PathMatcher pathMatcher, AmazonS3Source.S3Offset s3Offset, int fetchSize) throws AmazonClientException { // Algorithm: // - Full scan all objects that match the file name pattern and which are later than the file in // the offset // - Select the oldest "fetchSize" number of files and return them. TreeSet<S3ObjectSummary> treeSet = new TreeSet<>( new Comparator<S3ObjectSummary>() { @Override public int compare(S3ObjectSummary o1, S3ObjectSummary o2) { int result = o1.getLastModified().compareTo(o2.getLastModified()); if (result != 0) { // same modified time. Use name to sort return result; } return o1.getKey().compareTo(o2.getKey()); } }); S3Objects s3ObjectSummaries = S3Objects.withPrefix(s3Client, s3ConfigBean.s3Config.bucket, s3ConfigBean.s3Config.folder) .withBatchSize(BATCH_SIZE); for (S3ObjectSummary s : s3ObjectSummaries) { String fileName = s.getKey().substring(s3ConfigBean.s3Config.folder.length(), s.getKey().length()); if (!fileName.isEmpty()) { // fileName can be empty. // If the user manually creates a folder "myFolder/mySubFolder" in bucket "myBucket" and // uploads "myObject", // then the first objects returned here are: // myFolder/mySubFolder // myFolder/mySubFolder/myObject // // All is good when pipeline is run but preview returns with no data. So we should ignore // the empty file as it // has no data if (pathMatcher.matches(Paths.get(fileName)) && isEligible(s, s3Offset)) { treeSet.add(s); } if (treeSet.size() > fetchSize) { treeSet.pollLast(); } } } return new ArrayList<>(treeSet); }
static S3ObjectSummary getObjectSummary( AmazonS3Client s3Client, String bucket, String objectKey) { S3ObjectSummary s3ObjectSummary = null; S3Objects s3ObjectSummaries = S3Objects.withPrefix(s3Client, bucket, objectKey); for (S3ObjectSummary s : s3ObjectSummaries) { if (s.getKey().equals(objectKey)) { s3ObjectSummary = s; break; } } return s3ObjectSummary; }
private static void createBucket(AmazonS3Client s3client, String bucketName) { if (s3client.doesBucketExist(bucketName)) { for (S3ObjectSummary s : S3Objects.inBucket(s3client, bucketName)) { s3client.deleteObject(bucketName, s.getKey()); } s3client.deleteBucket(bucketName); } Assert.assertFalse(s3client.doesBucketExist(bucketName)); // Note that CreateBucketRequest does not specify region. So bucket is // bucketName s3client.createBucket(new CreateBucketRequest(bucketName)); }
public static void main(String[] args) throws Exception { System.out.println("==========================================="); System.out.println("Welcome to the AWS Java SDK!"); System.out.println("==========================================="); init(); try { /* * The Amazon EC2 client allows you to easily launch and configure * computing capacity in AWS datacenters. * * In this sample, we use the EC2 client to list the availability zones * in a region, and then list the instances running in those zones. */ DescribeAvailabilityZonesResult availabilityZonesResult = ec2.describeAvailabilityZones(); List<AvailabilityZone> availabilityZones = availabilityZonesResult.getAvailabilityZones(); System.out.println("You have access to " + availabilityZones.size() + " availability zones:"); for (AvailabilityZone zone : availabilityZones) { System.out.println(" - " + zone.getZoneName() + " (" + zone.getRegionName() + ")"); } DescribeInstancesResult describeInstancesResult = ec2.describeInstances(); Set<Instance> instances = new HashSet<Instance>(); for (Reservation reservation : describeInstancesResult.getReservations()) { instances.addAll(reservation.getInstances()); } System.out.println("You have " + instances.size() + " Amazon EC2 instance(s) running."); /* * The Amazon S3 client allows you to manage and configure buckets * and to upload and download data. * * In this sample, we use the S3 client to list all the buckets in * your account, and then iterate over the object metadata for all * objects in one bucket to calculate the total object count and * space usage for that one bucket. Note that this sample only * retrieves the object's metadata and doesn't actually download the * object's content. * * In addition to the low-level Amazon S3 client in the SDK, there * is also a high-level TransferManager API that provides * asynchronous management of uploads and downloads with an easy to * use API: * http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/s3/transfer/TransferManager.html */ List<Bucket> buckets = s3.listBuckets(); System.out.println("You have " + buckets.size() + " Amazon S3 bucket(s)."); if (buckets.size() > 0) { Bucket bucket = buckets.get(0); long totalSize = 0; long totalItems = 0; /* * The S3Objects and S3Versions classes provide convenient APIs * for iterating over the contents of your buckets, without * having to manually deal with response pagination. */ for (S3ObjectSummary objectSummary : S3Objects.inBucket(s3, bucket.getName())) { totalSize += objectSummary.getSize(); totalItems++; } System.out.println( "The bucket '" + bucket.getName() + "' contains " + totalItems + " objects " + "with a total size of " + totalSize + " bytes."); } } catch (AmazonServiceException ase) { /* * AmazonServiceExceptions represent an error response from an AWS * services, i.e. your request made it to AWS, but the AWS service * either found it invalid or encountered an error trying to execute * it. */ System.out.println("Error Message: " + ase.getMessage()); System.out.println("HTTP Status Code: " + ase.getStatusCode()); System.out.println("AWS Error Code: " + ase.getErrorCode()); System.out.println("Error Type: " + ase.getErrorType()); System.out.println("Request ID: " + ase.getRequestId()); } catch (AmazonClientException ace) { /* * AmazonClientExceptions represent an error that occurred inside * the client on the local host, either while trying to send the * request to AWS or interpret the response. For example, if no * network connection is available, the client won't be able to * connect to AWS to execute a request and will throw an * AmazonClientException. */ System.out.println("Error Message: " + ace.getMessage()); } }
private static void populateFakes3() throws IOException, InterruptedException, URISyntaxException { BasicAWSCredentials credentials = new BasicAWSCredentials("foo", "bar"); s3client = new AmazonS3Client(credentials); s3client.setEndpoint("http://localhost:" + port); s3client.setS3ClientOptions(new S3ClientOptions().withPathStyleAccess(true)); createBucket(s3client, BUCKET_NAME); createBucket(s3client, POSTPROCESS_BUCKET); createBucket(s3client, ERROR_BUCKET); // create directory structure // mybucket/NorthAmerica/USA // mybucket/NorthAmerica/Canada // // write 3 files each under myBucket, myBucket/NorthAmerica, mybucket/NorthAmerica/USA, // mybucket/NorthAmerica/Canada // 12 files in total InputStream in = new ByteArrayInputStream("Hello World".getBytes()); PutObjectRequest putObjectRequest = new PutObjectRequest(BUCKET_NAME, "file1.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "file2.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "file3.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "NorthAmerica/file4.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "NorthAmerica/file5.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "NorthAmerica/file6.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "NorthAmerica/USA/file7.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "NorthAmerica/USA/file8.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest(BUCKET_NAME, "NorthAmerica/USA/file9.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/Canada/file10.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/Canada/file11.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); in = new ByteArrayInputStream("Hello World".getBytes()); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/Canada/file12.log", in, new ObjectMetadata()); s3client.putObject(putObjectRequest); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/logArchive1.zip", new FileInputStream(new File(Resources.getResource("logArchive.zip").toURI())), new ObjectMetadata()); s3client.putObject(putObjectRequest); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/logArchive2.zip", new FileInputStream(new File(Resources.getResource("logArchive.zip").toURI())), new ObjectMetadata()); s3client.putObject(putObjectRequest); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/logArchive1.tar.gz", new FileInputStream(new File(Resources.getResource("logArchive.tar.gz").toURI())), new ObjectMetadata()); s3client.putObject(putObjectRequest); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/logArchive2.tar.gz", new FileInputStream(new File(Resources.getResource("logArchive.tar.gz").toURI())), new ObjectMetadata()); s3client.putObject(putObjectRequest); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/testAvro1.tar.gz", new FileInputStream(new File(Resources.getResource("testAvro.tar.gz").toURI())), new ObjectMetadata()); s3client.putObject(putObjectRequest); putObjectRequest = new PutObjectRequest( BUCKET_NAME, "NorthAmerica/testAvro2.tar.gz", new FileInputStream(new File(Resources.getResource("testAvro.tar.gz").toURI())), new ObjectMetadata()); s3client.putObject(putObjectRequest); int count = 0; if (s3client.doesBucketExist(BUCKET_NAME)) { for (S3ObjectSummary s : S3Objects.withPrefix(s3client, BUCKET_NAME, "")) { System.out.println(s.getKey()); count++; } } Assert.assertEquals(18, count); // 12 files + 3 dirs }