コード例 #1
0
  /**
   * Lists objects from AmazonS3 in chronological order [lexicographical order if 2 files have same
   * timestamp] which are later than or equal to the timestamp of the previous offset object
   *
   * @param s3Client
   * @param s3ConfigBean
   * @param pathMatcher glob patterns to match file name against
   * @param s3Offset current offset which provides the timestamp of the previous object
   * @param fetchSize number of objects to fetch in one go
   * @return
   * @throws AmazonClientException
   */
  static List<S3ObjectSummary> listObjectsChronologically(
      AmazonS3Client s3Client,
      S3ConfigBean s3ConfigBean,
      PathMatcher pathMatcher,
      AmazonS3Source.S3Offset s3Offset,
      int fetchSize)
      throws AmazonClientException {

    // Algorithm:
    // - Full scan all objects that match the file name pattern and which are later than the file in
    // the offset
    // - Select the oldest "fetchSize" number of files and return them.
    TreeSet<S3ObjectSummary> treeSet =
        new TreeSet<>(
            new Comparator<S3ObjectSummary>() {
              @Override
              public int compare(S3ObjectSummary o1, S3ObjectSummary o2) {
                int result = o1.getLastModified().compareTo(o2.getLastModified());
                if (result != 0) {
                  // same modified time. Use name to sort
                  return result;
                }
                return o1.getKey().compareTo(o2.getKey());
              }
            });

    S3Objects s3ObjectSummaries =
        S3Objects.withPrefix(s3Client, s3ConfigBean.s3Config.bucket, s3ConfigBean.s3Config.folder)
            .withBatchSize(BATCH_SIZE);
    for (S3ObjectSummary s : s3ObjectSummaries) {
      String fileName =
          s.getKey().substring(s3ConfigBean.s3Config.folder.length(), s.getKey().length());
      if (!fileName.isEmpty()) {
        // fileName can be empty.
        // If the user manually creates a folder "myFolder/mySubFolder" in bucket "myBucket" and
        // uploads "myObject",
        // then the first objects returned here are:
        // myFolder/mySubFolder
        // myFolder/mySubFolder/myObject
        //
        // All is good when pipeline is run but preview returns with no data. So we should ignore
        // the empty file as it
        // has no data
        if (pathMatcher.matches(Paths.get(fileName)) && isEligible(s, s3Offset)) {
          treeSet.add(s);
        }
        if (treeSet.size() > fetchSize) {
          treeSet.pollLast();
        }
      }
    }

    return new ArrayList<>(treeSet);
  }
コード例 #2
0
 static S3ObjectSummary getObjectSummary(
     AmazonS3Client s3Client, String bucket, String objectKey) {
   S3ObjectSummary s3ObjectSummary = null;
   S3Objects s3ObjectSummaries = S3Objects.withPrefix(s3Client, bucket, objectKey);
   for (S3ObjectSummary s : s3ObjectSummaries) {
     if (s.getKey().equals(objectKey)) {
       s3ObjectSummary = s;
       break;
     }
   }
   return s3ObjectSummary;
 }
コード例 #3
0
  private static void populateFakes3()
      throws IOException, InterruptedException, URISyntaxException {
    BasicAWSCredentials credentials = new BasicAWSCredentials("foo", "bar");
    s3client = new AmazonS3Client(credentials);
    s3client.setEndpoint("http://localhost:" + port);
    s3client.setS3ClientOptions(new S3ClientOptions().withPathStyleAccess(true));

    createBucket(s3client, BUCKET_NAME);
    createBucket(s3client, POSTPROCESS_BUCKET);
    createBucket(s3client, ERROR_BUCKET);

    // create directory structure
    // mybucket/NorthAmerica/USA
    // mybucket/NorthAmerica/Canada
    //
    // write 3 files each under myBucket, myBucket/NorthAmerica, mybucket/NorthAmerica/USA,
    // mybucket/NorthAmerica/Canada
    // 12 files in total

    InputStream in = new ByteArrayInputStream("Hello World".getBytes());
    PutObjectRequest putObjectRequest =
        new PutObjectRequest(BUCKET_NAME, "file1.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest = new PutObjectRequest(BUCKET_NAME, "file2.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest = new PutObjectRequest(BUCKET_NAME, "file3.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(BUCKET_NAME, "NorthAmerica/file4.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(BUCKET_NAME, "NorthAmerica/file5.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(BUCKET_NAME, "NorthAmerica/file6.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(BUCKET_NAME, "NorthAmerica/USA/file7.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(BUCKET_NAME, "NorthAmerica/USA/file8.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(BUCKET_NAME, "NorthAmerica/USA/file9.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME, "NorthAmerica/Canada/file10.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME, "NorthAmerica/Canada/file11.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    in = new ByteArrayInputStream("Hello World".getBytes());
    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME, "NorthAmerica/Canada/file12.log", in, new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME,
            "NorthAmerica/logArchive1.zip",
            new FileInputStream(new File(Resources.getResource("logArchive.zip").toURI())),
            new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME,
            "NorthAmerica/logArchive2.zip",
            new FileInputStream(new File(Resources.getResource("logArchive.zip").toURI())),
            new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME,
            "NorthAmerica/logArchive1.tar.gz",
            new FileInputStream(new File(Resources.getResource("logArchive.tar.gz").toURI())),
            new ObjectMetadata());
    s3client.putObject(putObjectRequest);
    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME,
            "NorthAmerica/logArchive2.tar.gz",
            new FileInputStream(new File(Resources.getResource("logArchive.tar.gz").toURI())),
            new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME,
            "NorthAmerica/testAvro1.tar.gz",
            new FileInputStream(new File(Resources.getResource("testAvro.tar.gz").toURI())),
            new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    putObjectRequest =
        new PutObjectRequest(
            BUCKET_NAME,
            "NorthAmerica/testAvro2.tar.gz",
            new FileInputStream(new File(Resources.getResource("testAvro.tar.gz").toURI())),
            new ObjectMetadata());
    s3client.putObject(putObjectRequest);

    int count = 0;
    if (s3client.doesBucketExist(BUCKET_NAME)) {
      for (S3ObjectSummary s : S3Objects.withPrefix(s3client, BUCKET_NAME, "")) {
        System.out.println(s.getKey());
        count++;
      }
    }
    Assert.assertEquals(18, count); // 12 files + 3 dirs
  }