private int getObjectCount(AmazonS3Client s3Client, String bucket) {
   int count = 0;
   for (S3ObjectSummary s : S3Objects.inBucket(s3Client, bucket)) {
     count++;
   }
   return count;
 }
예제 #2
0
  /**
   * Lists objects from AmazonS3 in chronological order [lexicographical order if 2 files have same
   * timestamp] which are later than or equal to the timestamp of the previous offset object
   *
   * @param s3Client
   * @param s3ConfigBean
   * @param pathMatcher glob patterns to match file name against
   * @param s3Offset current offset which provides the timestamp of the previous object
   * @param fetchSize number of objects to fetch in one go
   * @return
   * @throws AmazonClientException
   */
  static List<S3ObjectSummary> listObjectsChronologically(
      AmazonS3Client s3Client,
      S3ConfigBean s3ConfigBean,
      PathMatcher pathMatcher,
      AmazonS3Source.S3Offset s3Offset,
      int fetchSize)
      throws AmazonClientException {

    // Algorithm:
    // - Full scan all objects that match the file name pattern and which are later than the file in
    // the offset
    // - Select the oldest "fetchSize" number of files and return them.
    TreeSet<S3ObjectSummary> treeSet =
        new TreeSet<>(
            new Comparator<S3ObjectSummary>() {
              @Override
              public int compare(S3ObjectSummary o1, S3ObjectSummary o2) {
                int result = o1.getLastModified().compareTo(o2.getLastModified());
                if (result != 0) {
                  // same modified time. Use name to sort
                  return result;
                }
                return o1.getKey().compareTo(o2.getKey());
              }
            });

    S3Objects s3ObjectSummaries =
        S3Objects.withPrefix(s3Client, s3ConfigBean.s3Config.bucket, s3ConfigBean.s3Config.folder)
            .withBatchSize(BATCH_SIZE);
    for (S3ObjectSummary s : s3ObjectSummaries) {
      String fileName =
          s.getKey().substring(s3ConfigBean.s3Config.folder.length(), s.getKey().length());
      if (!fileName.isEmpty()) {
        // fileName can be empty.
        // If the user manually creates a folder "myFolder/mySubFolder" in bucket "myBucket" and
        // uploads "myObject",
        // then the first objects returned here are:
        // myFolder/mySubFolder
        // myFolder/mySubFolder/myObject
        //
        // All is good when pipeline is run but preview returns with no data. So we should ignore
        // the empty file as it
        // has no data
        if (pathMatcher.matches(Paths.get(fileName)) && isEligible(s, s3Offset)) {
          treeSet.add(s);
        }
        if (treeSet.size() > fetchSize) {
          treeSet.pollLast();
        }
      }
    }

    return new ArrayList<>(treeSet);
  }
예제 #3
0
 static S3ObjectSummary getObjectSummary(
     AmazonS3Client s3Client, String bucket, String objectKey) {
   S3ObjectSummary s3ObjectSummary = null;
   S3Objects s3ObjectSummaries = S3Objects.withPrefix(s3Client, bucket, objectKey);
   for (S3ObjectSummary s : s3ObjectSummaries) {
     if (s.getKey().equals(objectKey)) {
       s3ObjectSummary = s;
       break;
     }
   }
   return s3ObjectSummary;
 }
 private static void createBucket(AmazonS3Client s3client, String bucketName) {
   if (s3client.doesBucketExist(bucketName)) {
     for (S3ObjectSummary s : S3Objects.inBucket(s3client, bucketName)) {
       s3client.deleteObject(bucketName, s.getKey());
     }
     s3client.deleteBucket(bucketName);
   }
   Assert.assertFalse(s3client.doesBucketExist(bucketName));
   // Note that CreateBucketRequest does not specify region. So bucket is
   // bucketName
   s3client.createBucket(new CreateBucketRequest(bucketName));
 }
예제 #5
0
  public static void main(String[] args) throws Exception {
    System.out.println("===========================================");
    System.out.println("Welcome to the AWS Java SDK!");
    System.out.println("===========================================");

    init();

    try {
      /*
       * The Amazon EC2 client allows you to easily launch and configure
       * computing capacity in AWS datacenters.
       *
       * In this sample, we use the EC2 client to list the availability zones
       * in a region, and then list the instances running in those zones.
       */
      DescribeAvailabilityZonesResult availabilityZonesResult = ec2.describeAvailabilityZones();
      List<AvailabilityZone> availabilityZones = availabilityZonesResult.getAvailabilityZones();
      System.out.println("You have access to " + availabilityZones.size() + " availability zones:");
      for (AvailabilityZone zone : availabilityZones) {
        System.out.println(" - " + zone.getZoneName() + " (" + zone.getRegionName() + ")");
      }

      DescribeInstancesResult describeInstancesResult = ec2.describeInstances();
      Set<Instance> instances = new HashSet<Instance>();
      for (Reservation reservation : describeInstancesResult.getReservations()) {
        instances.addAll(reservation.getInstances());
      }

      System.out.println("You have " + instances.size() + " Amazon EC2 instance(s) running.");

      /*
       * The Amazon S3 client allows you to manage and configure buckets
       * and to upload and download data.
       *
       * In this sample, we use the S3 client to list all the buckets in
       * your account, and then iterate over the object metadata for all
       * objects in one bucket to calculate the total object count and
       * space usage for that one bucket. Note that this sample only
       * retrieves the object's metadata and doesn't actually download the
       * object's content.
       *
       * In addition to the low-level Amazon S3 client in the SDK, there
       * is also a high-level TransferManager API that provides
       * asynchronous management of uploads and downloads with an easy to
       * use API:
       *   http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/s3/transfer/TransferManager.html
       */
      List<Bucket> buckets = s3.listBuckets();
      System.out.println("You have " + buckets.size() + " Amazon S3 bucket(s).");

      if (buckets.size() > 0) {
        Bucket bucket = buckets.get(0);

        long totalSize = 0;
        long totalItems = 0;
        /*
         * The S3Objects and S3Versions classes provide convenient APIs
         * for iterating over the contents of your buckets, without
         * having to manually deal with response pagination.
         */
        for (S3ObjectSummary objectSummary : S3Objects.inBucket(s3, bucket.getName())) {
          totalSize += objectSummary.getSize();
          totalItems++;
        }

        System.out.println(
            "The bucket '"
                + bucket.getName()
                + "' contains "
                + totalItems
                + " objects "
                + "with a total size of "
                + totalSize
                + " bytes.");
      }
    } catch (AmazonServiceException ase) {
      /*
       * AmazonServiceExceptions represent an error response from an AWS
       * services, i.e. your request made it to AWS, but the AWS service
       * either found it invalid or encountered an error trying to execute
       * it.
       */
      System.out.println("Error Message:    " + ase.getMessage());
      System.out.println("HTTP Status Code: " + ase.getStatusCode());
      System.out.println("AWS Error Code:   " + ase.getErrorCode());
      System.out.println("Error Type:       " + ase.getErrorType());
      System.out.println("Request ID:       " + ase.getRequestId());
    } catch (AmazonClientException ace) {
      /*
       * AmazonClientExceptions represent an error that occurred inside
       * the client on the local host, either while trying to send the
       * request to AWS or interpret the response. For example, if no
       * network connection is available, the client won't be able to
       * connect to AWS to execute a request and will throw an
       * AmazonClientException.
       */
      System.out.println("Error Message: " + ace.getMessage());
    }
  }
  private static void populateFakes3()
      throws IOException, InterruptedException, URISyntaxException {
    BasicAWSCredentials credentials = new BasicAWSCredentials("foo", "bar");
    s3client = new AmazonS3Client(credentials);
    s3client.setEndpoint("http://localhost:" + port);
    s3client.setS3ClientOptions(new S3ClientOptions().withPathStyleAccess(true));

    createBucket(s3client, BUCKET_NAME);
    createBucket(s3client, POSTPROCESS_BUCKET);
    createBucket(s3client, ERROR_BUCKET);

    // create directory structure
    // mybucket/NorthAmerica/USA
    // mybucket/NorthAmerica/Canada
    //
    // write 3 files each under myBucket, myBucket/NorthAmerica, mybucket/NorthAmerica/USA,
    // mybucket/NorthAmerica/Canada
    // 12 files in total

    InputStream in = new ByteArrayInputStream("Hello World".getBytes());
    PutObjectRequest putObjectRequest =
        new PutObjectRequest(BUCKET_NAME, "file1.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest = new PutObjectRequest(BUCKET_NAME, "file2.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest = new PutObjectRequest(BUCKET_NAME, "file3.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(BUCKET_NAME, "NorthAmerica/file4.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(BUCKET_NAME, "NorthAmerica/file5.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(BUCKET_NAME, "NorthAmerica/file6.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(BUCKET_NAME, "NorthAmerica/USA/file7.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(BUCKET_NAME, "NorthAmerica/USA/file8.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(BUCKET_NAME, "NorthAmerica/USA/file9.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME, "NorthAmerica/Canada/file10.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME, "NorthAmerica/Canada/file11.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME, "NorthAmerica/Canada/file12.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME,
            "NorthAmerica/logArchive1.zip",
            new FileInputStream(new File(Resources.getResource("logArchive.zip").toURI())),
            new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME,
            "NorthAmerica/logArchive2.zip",
            new FileInputStream(new File(Resources.getResource("logArchive.zip").toURI())),
            new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME,
            "NorthAmerica/logArchive1.tar.gz",
            new FileInputStream(new File(Resources.getResource("logArchive.tar.gz").toURI())),
            new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME,
            "NorthAmerica/logArchive2.tar.gz",
            new FileInputStream(new File(Resources.getResource("logArchive.tar.gz").toURI())),
            new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME,
            "NorthAmerica/testAvro1.tar.gz",
            new FileInputStream(new File(Resources.getResource("testAvro.tar.gz").toURI())),
            new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME,
            "NorthAmerica/testAvro2.tar.gz",
            new FileInputStream(new File(Resources.getResource("testAvro.tar.gz").toURI())),
            new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    int count = 0;
    if (s3client.doesBucketExist(BUCKET_NAME)) {
      for (S3ObjectSummary s : S3Objects.withPrefix(s3client, BUCKET_NAME, "")) {
        System.out.println(s.getKey());
        count++;
      }
    }
    Assert.assertEquals(18, count); // 12 files + 3 dirs
  }