@Test public void testProduceTarGzipTextFile() throws Exception { AmazonS3Source source = createTarGzipSource(); SourceRunner runner = new SourceRunner.Builder(AmazonS3DSource.class, source).addOutputLane("lane").build(); runner.runInit(); try { List<Record> allRecords = new ArrayList<>(); String offset = null; for (int i = 0; i < 50; i++) { BatchMaker batchMaker = SourceRunner.createTestBatchMaker("lane"); offset = source.produce(offset, 1000, batchMaker); Assert.assertNotNull(offset); StageRunner.Output output = SourceRunner.getOutput(batchMaker); List<Record> records = output.getRecords().get("lane"); allRecords.addAll(records); } Assert.assertEquals(37044, allRecords.size()); Assert.assertTrue( offset.contains( "NorthAmerica/logArchive2.tar.gz::-1::9c91073f2c2b51ed80c0a33da1238214::")); } finally { runner.runDestroy(); } }
@Test public void testProduceTarGzipAvroFile() throws Exception { AmazonS3Source source = createTarGzipAvroSource(); SourceRunner runner = new SourceRunner.Builder(AmazonS3DSource.class, source).addOutputLane("lane").build(); runner.runInit(); try { List<Record> allRecords = new ArrayList<>(); String offset = null; for (int i = 0; i < 50; i++) { BatchMaker batchMaker = SourceRunner.createTestBatchMaker("lane"); offset = source.produce(offset, 1000, batchMaker); Assert.assertNotNull(offset); StageRunner.Output output = SourceRunner.getOutput(batchMaker); List<Record> records = output.getRecords().get("lane"); allRecords.addAll(records); } Assert.assertEquals(48000, allRecords.size()); Assert.assertTrue( offset.contains("NorthAmerica/testAvro2.tar.gz::-1::c17d97fdd6f2c6902efe059753cf41b6::")); } finally { runner.runDestroy(); } }
@Test public void testProduceFullFile() throws Exception { AmazonS3Source source = createSource(); SourceRunner runner = new SourceRunner.Builder(AmazonS3DSource.class, source).addOutputLane("lane").build(); runner.runInit(); try { int initialCount = getObjectCount(s3client, BUCKET_NAME); int postProcessInitialCount = getObjectCount(s3client, POSTPROCESS_BUCKET); List<Record> allRecords = new ArrayList<>(); String offset = null; for (int i = 0; i < 10; i++) { BatchMaker batchMaker = SourceRunner.createTestBatchMaker("lane"); offset = source.produce(offset, 60000, batchMaker); Assert.assertNotNull(offset); StageRunner.Output output = SourceRunner.getOutput(batchMaker); List<Record> records = output.getRecords().get("lane"); allRecords.addAll(records); } Assert.assertEquals(6, allRecords.size()); Assert.assertEquals(initialCount - 6, getObjectCount(s3client, BUCKET_NAME)); Assert.assertEquals( postProcessInitialCount + 6, getObjectCount(s3client, POSTPROCESS_BUCKET)); } finally { runner.runDestroy(); } }