@Test(timeout = 30000) public void testProduce() throws Exception { SourceRunner sourceRunner = new SourceRunner.Builder(ClusterHdfsDSource.class) .addOutputLane("lane") .setExecutionMode(ExecutionMode.CLUSTER_BATCH) .addConfiguration("hdfsUri", miniDFS.getURI().toString()) .addConfiguration("hdfsDirLocations", Arrays.asList(dir.toUri().getPath())) .addConfiguration("recursive", false) .addConfiguration("hdfsConfigs", new HashMap<String, String>()) .addConfiguration("dataFormat", DataFormat.TEXT) .addConfiguration("textMaxLineLen", 1024) .addConfiguration("produceSingleRecordPerMessage", false) .addConfiguration("regex", null) .addConfiguration("grokPatternDefinition", null) .addConfiguration("enableLog4jCustomLogFormat", false) .addConfiguration("customLogFormat", null) .addConfiguration("fieldPathsToGroupName", null) .addConfiguration("log4jCustomLogFormat", null) .addConfiguration("grokPattern", null) .addConfiguration("hdfsKerberos", false) .addConfiguration("hdfsConfDir", hadoopConfDir) .setResourcesDir(resourcesDir) .build(); sourceRunner.runInit(); List<Map.Entry> list = new ArrayList<>(); list.add(new Pair(new LongWritable(1), new Text("aaa"))); list.add(new Pair(new LongWritable(2), new Text("bbb"))); list.add(new Pair(new LongWritable(3), new Text("ccc"))); Thread th = createThreadForAddingBatch(sourceRunner, list); try { StageRunner.Output output = sourceRunner.runProduce(null, 5); String newOffset = output.getNewOffset(); Assert.assertEquals("3", newOffset); List<Record> records = output.getRecords().get("lane"); Assert.assertEquals(3, records.size()); for (int i = 0; i < records.size(); i++) { Assert.assertNotNull(records.get(i).get("/text")); LOG.info("Header " + records.get(i).getHeader().getSourceId()); Assert.assertTrue(!records.get(i).get("/text").getValueAsString().isEmpty()); Assert.assertEquals( list.get(i).getValue().toString(), records.get(i).get("/text").getValueAsString()); } if (sourceRunner != null) { sourceRunner.runDestroy(); } } finally { th.interrupt(); } }
@Test(timeout = 30000) public void testProduceAvroData() throws Exception { SourceRunner sourceRunner = new SourceRunner.Builder(ClusterHdfsDSource.class) .addOutputLane("lane") .setExecutionMode(ExecutionMode.CLUSTER_BATCH) .addConfiguration("hdfsUri", miniDFS.getURI().toString()) .addConfiguration("hdfsDirLocations", Arrays.asList(dir.toUri().getPath())) .addConfiguration("recursive", false) .addConfiguration("hdfsConfigs", new HashMap<String, String>()) .addConfiguration("dataFormat", DataFormat.AVRO) .addConfiguration("csvFileFormat", CsvMode.CSV) .addConfiguration("csvHeader", CsvHeader.WITH_HEADER) .addConfiguration("csvMaxObjectLen", 4096) .addConfiguration("textMaxLineLen", 1024) .addConfiguration("produceSingleRecordPerMessage", false) .addConfiguration("regex", null) .addConfiguration("grokPatternDefinition", null) .addConfiguration("enableLog4jCustomLogFormat", false) .addConfiguration("customLogFormat", null) .addConfiguration("fieldPathsToGroupName", null) .addConfiguration("log4jCustomLogFormat", null) .addConfiguration("grokPattern", null) .addConfiguration("hdfsKerberos", false) .addConfiguration("hdfsConfDir", hadoopConfDir) .setResourcesDir(resourcesDir) .build(); sourceRunner.runInit(); List<Map.Entry> list = new ArrayList<>(); list.add( new Pair( "path::" + "1" + "::1", createAvroData("a", 30, ImmutableList.of("*****@*****.**", "*****@*****.**")))); list.add( new Pair( "path::" + "1" + "::2", createAvroData("b", 40, ImmutableList.of("*****@*****.**", "*****@*****.**")))); Thread th = createThreadForAddingBatch(sourceRunner, list); try { StageRunner.Output output = sourceRunner.runProduce(null, 5); String newOffset = output.getNewOffset(); Assert.assertEquals("path::" + "1::2", newOffset); List<Record> records = output.getRecords().get("lane"); Assert.assertEquals(2, records.size()); Record record = records.get(0); Assert.assertTrue(record.has("/name")); Assert.assertEquals("a", record.get("/name").getValueAsString()); Assert.assertTrue(record.has("/age")); Assert.assertEquals(30, record.get("/age").getValueAsInteger()); Assert.assertTrue(record.has("/emails")); Assert.assertTrue(record.get("/emails").getValueAsList() instanceof List); List<Field> emails = record.get("/emails").getValueAsList(); Assert.assertEquals(2, emails.size()); Assert.assertEquals("*****@*****.**", emails.get(0).getValueAsString()); Assert.assertEquals("*****@*****.**", emails.get(1).getValueAsString()); record = records.get(1); Assert.assertTrue(record.has("/name")); Assert.assertEquals("b", record.get("/name").getValueAsString()); Assert.assertTrue(record.has("/age")); Assert.assertEquals(40, record.get("/age").getValueAsInteger()); Assert.assertTrue(record.has("/emails")); Assert.assertTrue(record.get("/emails").getValueAsList() instanceof List); emails = record.get("/emails").getValueAsList(); Assert.assertEquals(2, emails.size()); Assert.assertEquals("*****@*****.**", emails.get(0).getValueAsString()); Assert.assertEquals("*****@*****.**", emails.get(1).getValueAsString()); } finally { th.interrupt(); } }
@Test(timeout = 30000) public void testProduceDelimitedWithHeader() throws Exception { SourceRunner sourceRunner = new SourceRunner.Builder(ClusterHdfsDSource.class) .addOutputLane("lane") .setExecutionMode(ExecutionMode.CLUSTER_BATCH) .addConfiguration("hdfsUri", miniDFS.getURI().toString()) .addConfiguration("hdfsDirLocations", Arrays.asList(dir.toUri().getPath())) .addConfiguration("recursive", false) .addConfiguration("hdfsConfigs", new HashMap<String, String>()) .addConfiguration("dataFormat", DataFormat.DELIMITED) .addConfiguration("csvFileFormat", CsvMode.CSV) .addConfiguration("csvHeader", CsvHeader.WITH_HEADER) .addConfiguration("csvMaxObjectLen", 4096) .addConfiguration("csvRecordType", CsvRecordType.LIST) .addConfiguration("textMaxLineLen", 1024) .addConfiguration("produceSingleRecordPerMessage", false) .addConfiguration("regex", null) .addConfiguration("grokPatternDefinition", null) .addConfiguration("enableLog4jCustomLogFormat", false) .addConfiguration("customLogFormat", null) .addConfiguration("fieldPathsToGroupName", null) .addConfiguration("log4jCustomLogFormat", null) .addConfiguration("grokPattern", null) .addConfiguration("hdfsKerberos", false) .addConfiguration("hdfsConfDir", hadoopConfDir) .setResourcesDir(resourcesDir) .build(); sourceRunner.runInit(); List<Map.Entry> list = new ArrayList<>(); list.add(new Pair("HEADER_COL_1,HEADER_COL_2", null)); list.add(new Pair("path::" + "1", new String("a,b\nC,D\nc,d"))); Thread th = createThreadForAddingBatch(sourceRunner, list); try { StageRunner.Output output = sourceRunner.runProduce(null, 5); String newOffset = output.getNewOffset(); Assert.assertEquals("path::" + "1", newOffset); List<Record> records = output.getRecords().get("lane"); Assert.assertEquals(3, records.size()); Record record = records.get(0); Assert.assertEquals( "a", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString()); Assert.assertEquals( "HEADER_COL_1", record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString()); Assert.assertEquals( "b", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString()); Assert.assertEquals( "HEADER_COL_2", record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString()); record = records.get(1); Assert.assertEquals( "C", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString()); Assert.assertEquals( "HEADER_COL_1", record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString()); Assert.assertEquals( "D", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString()); Assert.assertEquals( "HEADER_COL_2", record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString()); record = records.get(2); Assert.assertEquals( "c", record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString()); Assert.assertEquals( "HEADER_COL_1", record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString()); Assert.assertEquals( "d", record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString()); Assert.assertEquals( "HEADER_COL_2", record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString()); if (sourceRunner != null) { sourceRunner.runDestroy(); } } finally { th.interrupt(); } }