@Test public void testEmptyBatch() throws Exception { List<JdbcFieldColumnMapping> columnMappings = ImmutableList.of(new JdbcFieldColumnMapping("P_ID", "[3]")); JdbcLookupDProcessor processor = new JdbcLookupDProcessor(); processor.hikariConfigBean = createConfigBean(h2ConnectionString, username, password); ProcessorRunner processorRunner = new ProcessorRunner.Builder(JdbcLookupDProcessor.class, processor) .addConfiguration("query", listQuery) .addConfiguration("columnMappings", columnMappings) .addConfiguration("maxClobSize", 1000) .addConfiguration("maxBlobSize", 1000) .addOutputLane("lane") .build(); List<Record> emptyBatch = ImmutableList.of(); processorRunner.runInit(); try { StageRunner.Output output = processorRunner.runProcess(emptyBatch); Assert.assertEquals(0, output.getRecords().get("lane").size()); } finally { processorRunner.runDestroy(); } }
@Test public void testProduceTarGzipAvroFile() throws Exception { AmazonS3Source source = createTarGzipAvroSource(); SourceRunner runner = new SourceRunner.Builder(AmazonS3DSource.class, source).addOutputLane("lane").build(); runner.runInit(); try { List<Record> allRecords = new ArrayList<>(); String offset = null; for (int i = 0; i < 50; i++) { BatchMaker batchMaker = SourceRunner.createTestBatchMaker("lane"); offset = source.produce(offset, 1000, batchMaker); Assert.assertNotNull(offset); StageRunner.Output output = SourceRunner.getOutput(batchMaker); List<Record> records = output.getRecords().get("lane"); allRecords.addAll(records); } Assert.assertEquals(48000, allRecords.size()); Assert.assertTrue( offset.contains("NorthAmerica/testAvro2.tar.gz::-1::c17d97fdd6f2c6902efe059753cf41b6::")); } finally { runner.runDestroy(); } }
@Test public void testCategoryRegexInNonComplexType() throws StageException { FieldRenamerConfig renameConfig = new FieldRenamerConfig(); // Any field containing a non-word character should be in single quotes renameConfig.fromFieldExpression = "/'(.*)[#&@|](.*)'"; renameConfig.toFieldExpression = "/$1_$2"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.APPEND_NUMBERS; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .setOnRecordError(OnRecordError.TO_ERROR) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("a#b", Field.create(Field.Type.STRING, "foo1")); map.put("a_b", Field.create(Field.Type.STRING, "foo2")); map.put("a&b", Field.create(Field.Type.STRING, "foo3")); map.put("a|b", Field.create(Field.Type.STRING, "foo4")); map.put("a@b", Field.create(Field.Type.STRING, "foo5")); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(Field.Type.MAP, map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record r = output.getRecords().get("a").get(0); Assert.assertFalse(r.has("/'a#b'")); Assert.assertFalse(r.has("/'a&b'")); Assert.assertFalse(r.has("/'a|b'")); Assert.assertFalse(r.has("/'a&b'")); Assert.assertTrue(r.has("/a_b")); Assert.assertEquals("foo2", r.get("/a_b").getValueAsString()); Assert.assertTrue(r.has("/a_b1")); Assert.assertEquals("foo1", r.get("/a_b1").getValueAsString()); Assert.assertTrue(r.has("/a_b2")); Assert.assertEquals("foo3", r.get("/a_b2").getValueAsString()); Assert.assertTrue(r.has("/a_b3")); Assert.assertEquals("foo4", r.get("/a_b3").getValueAsString()); Assert.assertTrue(r.has("/a_b4")); Assert.assertEquals("foo5", r.get("/a_b4").getValueAsString()); } finally { runner.runDestroy(); } }
@Test public void testProduceFullFile() throws Exception { AmazonS3Source source = createSource(); SourceRunner runner = new SourceRunner.Builder(AmazonS3DSource.class, source).addOutputLane("lane").build(); runner.runInit(); try { int initialCount = getObjectCount(s3client, BUCKET_NAME); int postProcessInitialCount = getObjectCount(s3client, POSTPROCESS_BUCKET); List<Record> allRecords = new ArrayList<>(); String offset = null; for (int i = 0; i < 10; i++) { BatchMaker batchMaker = SourceRunner.createTestBatchMaker("lane"); offset = source.produce(offset, 60000, batchMaker); Assert.assertNotNull(offset); StageRunner.Output output = SourceRunner.getOutput(batchMaker); List<Record> records = output.getRecords().get("lane"); allRecords.addAll(records); } Assert.assertEquals(6, allRecords.size()); Assert.assertEquals(initialCount - 6, getObjectCount(s3client, BUCKET_NAME)); Assert.assertEquals( postProcessInitialCount + 6, getObjectCount(s3client, POSTPROCESS_BUCKET)); } finally { runner.runDestroy(); } }
@Test public void testNestedKeep3() throws StageException { /* * try to retain the second element from the root array list. * Make sure that this turns out to be the first element in the resulting record */ Record record = createNestedRecord(); ProcessorRunner runner = new ProcessorRunner.Builder(FieldFilterDProcessor.class) .addConfiguration( "fields", ImmutableList.of("/USA[1]/SantaMonica/cole/streets[0][1]/name")) .addConfiguration("filterOperation", FilterOperation.KEEP) .addOutputLane("a") .build(); runner.runInit(); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record resultRecord = output.getRecords().get("a").get(0); Assert.assertTrue(resultRecord.has("/USA[0]/SantaMonica/cole/streets[0][0]/name")); Assert.assertEquals( "f", resultRecord.get("/USA[0]/SantaMonica/cole/streets[0][0]/name").getValueAsString()); Assert.assertFalse(resultRecord.has("/USA[0]/SantaMonica/cole/streets[0][1]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco")); Assert.assertFalse(resultRecord.has("/USA[1]")); } finally { runner.runDestroy(); } }
@Test public void testNestedKeep5() throws StageException { Record record = createNestedRecord(); /* * keep all entries of a list by specifying path just upto the list */ ProcessorRunner runner = new ProcessorRunner.Builder(FieldFilterDProcessor.class) .addConfiguration("fields", ImmutableList.of("/USA[0]/SanFrancisco/folsom/streets[0]")) .addConfiguration("filterOperation", FilterOperation.KEEP) .addOutputLane("a") .build(); runner.runInit(); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record resultRecord = output.getRecords().get("a").get(0); Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0]")); // Its a list and it is empty. use wild card [*] to preserve the contents of the list Assert.assertEquals( 2, resultRecord.get("/USA[0]/SanFrancisco/folsom/streets[0]").getValueAsList().size()); } finally { runner.runDestroy(); } }
@Test public void testNestedKeep6() throws StageException { Record record = createNestedRecord(); /* * keep all entries of a map by specifying path just upto the map */ ProcessorRunner runner = new ProcessorRunner.Builder(FieldFilterDProcessor.class) .addConfiguration("fields", ImmutableList.of("/USA[0]/SanFrancisco/noe")) .addConfiguration("filterOperation", FilterOperation.KEEP) .addOutputLane("a") .build(); runner.runInit(); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record resultRecord = output.getRecords().get("a").get(0); Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe")); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString(), "a"); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/noe/streets[0][1]/name").getValueAsString(), "b"); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/noe/streets[1][0]/name").getValueAsString(), "c"); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/noe/streets[1][1]/name").getValueAsString(), "d"); } finally { runner.runDestroy(); } }
@Test public void testWildCardRemove1() throws StageException { Record record = createNestedRecord(); ProcessorRunner runner = new ProcessorRunner.Builder(FieldFilterDProcessor.class) .addConfiguration( "fields", ImmutableList.of("/USA[*]/SanFrancisco/*/streets[*][*]/name")) .addConfiguration("filterOperation", FilterOperation.REMOVE) .addOutputLane("a") .build(); runner.runInit(); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record resultRecord = output.getRecords().get("a").get(0); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][0]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][1]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][0]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][1]/name")); Assert.assertEquals( resultRecord.get("/USA[1]/SantaMonica/cole/streets[0][0]/name").getValueAsString(), "e"); Assert.assertEquals( resultRecord.get("/USA[1]/SantaMonica/cole/streets[0][1]/name").getValueAsString(), "f"); } finally { runner.runDestroy(); } }
@Test public void testKeepNonExistingFiled() throws StageException { ProcessorRunner runner = new ProcessorRunner.Builder(FieldFilterDProcessor.class) .addConfiguration("fields", ImmutableList.of("/city")) .addConfiguration("filterOperation", FilterOperation.KEEP) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("name", Field.create("a")); map.put("age", Field.create("b")); map.put("streetAddress", Field.create("c")); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Field field = output.getRecords().get("a").get(0).get(); Assert.assertTrue(field.getValue() instanceof Map); Map<String, Field> result = field.getValueAsMap(); Assert.assertTrue(result.size() == 0); } finally { runner.runDestroy(); } }
@Test public void testProduceTarGzipTextFile() throws Exception { AmazonS3Source source = createTarGzipSource(); SourceRunner runner = new SourceRunner.Builder(AmazonS3DSource.class, source).addOutputLane("lane").build(); runner.runInit(); try { List<Record> allRecords = new ArrayList<>(); String offset = null; for (int i = 0; i < 50; i++) { BatchMaker batchMaker = SourceRunner.createTestBatchMaker("lane"); offset = source.produce(offset, 1000, batchMaker); Assert.assertNotNull(offset); StageRunner.Output output = SourceRunner.getOutput(batchMaker); List<Record> records = output.getRecords().get("lane"); allRecords.addAll(records); } Assert.assertEquals(37044, allRecords.size()); Assert.assertTrue( offset.contains( "NorthAmerica/logArchive2.tar.gz::-1::9c91073f2c2b51ed80c0a33da1238214::")); } finally { runner.runDestroy(); } }
@Test public void testMultipleRegexMatchingSameField() throws StageException { FieldRenamerConfig renameConfig1 = new FieldRenamerConfig(); renameConfig1.fromFieldExpression = "/sql(.*)"; renameConfig1.toFieldExpression = "/sqlRename$1"; FieldRenamerConfig renamerConfig2 = new FieldRenamerConfig(); renamerConfig2.fromFieldExpression = "/s(.*)"; renamerConfig2.toFieldExpression = "/sRename$1"; Map<String, Field> map = new LinkedHashMap<>(); map.put("sqlField", Field.create(Field.Type.STRING, "foo")); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(Field.Type.MAP, map)); try { FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig1, renamerConfig2), errorHandler); // Test non-existent source with existing target field ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .setOnRecordError(OnRecordError.STOP_PIPELINE) .addOutputLane("a") .build(); runner.runInit(); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.fail("Should throw error if multiple regex match the same field"); } catch (OnRecordErrorException e) { Assert.assertEquals(Errors.FIELD_RENAMER_03, e.getErrorCode()); } FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.CONTINUE; errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig1, renamerConfig2), errorHandler); ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .setOnRecordError(OnRecordError.TO_ERROR) .addOutputLane("a") .build(); runner.runInit(); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record r = output.getRecords().get("a").get(0); Assert.assertTrue(r.has("/sqlField")); }
@Test public void testRenameMultipleListElementsWithConstIdxExpr() throws StageException { FieldRenamerConfig renameConfig1 = new FieldRenamerConfig(); renameConfig1.fromFieldExpression = "/listOfInts[0]"; renameConfig1.toFieldExpression = "/nonExisting0"; FieldRenamerConfig renameConfig2 = new FieldRenamerConfig(); renameConfig2.fromFieldExpression = "/listOfInts[1]"; renameConfig2.toFieldExpression = "/nonExisting1"; FieldRenamerConfig renameConfig3 = new FieldRenamerConfig(); renameConfig3.fromFieldExpression = "/listOfInts[2]"; renameConfig3.toFieldExpression = "/nonExisting2"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.TO_ERROR; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.REPLACE; // Reverse order in configuration so as to preserve array indices FieldRenamerProcessor processor = new FieldRenamerProcessor( ImmutableList.of(renameConfig3, renameConfig2, renameConfig1), errorHandler); // Test non-existent source with existing target field ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put( "listOfInts", Field.create( ImmutableList.of( Field.create(Field.Type.INTEGER, 1), Field.create(Field.Type.INTEGER, 2), Field.create(Field.Type.INTEGER, 3)))); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Map<String, Field> result = output.getRecords().get("a").get(0).get().getValueAsMap(); Assert.assertTrue(result.containsKey("listOfInts")); Assert.assertTrue(result.get("listOfInts").getValueAsList().isEmpty()); Assert.assertTrue(result.containsKey("nonExisting0")); Assert.assertTrue(result.containsKey("nonExisting1")); Assert.assertTrue(result.containsKey("nonExisting2")); Assert.assertEquals(1, result.get("nonExisting0").getValueAsInteger()); Assert.assertEquals(2, result.get("nonExisting1").getValueAsInteger()); Assert.assertEquals(3, result.get("nonExisting2").getValueAsInteger()); } finally { runner.runDestroy(); } }
@Test public void testLookup() throws Exception { String ip = "128.101.101.101"; List<GeolocationFieldConfig> configs = new ArrayList<>(); GeolocationFieldConfig config; config = new GeolocationFieldConfig(); config.inputFieldName = "/ipAsInt"; config.outputFieldName = "/intIpCountry"; config.targetType = GeolocationField.COUNTRY_NAME; configs.add(config); config = new GeolocationFieldConfig(); config.inputFieldName = "/ipAsIntString"; config.outputFieldName = "/intStringIpCountry"; config.targetType = GeolocationField.COUNTRY_NAME; configs.add(config); config = new GeolocationFieldConfig(); config.inputFieldName = "/ipAsString"; config.outputFieldName = "/stringIpCountry"; config.targetType = GeolocationField.COUNTRY_NAME; configs.add(config); ProcessorRunner runner = new ProcessorRunner.Builder(GeolocationDProcessor.class) .addConfiguration("fieldTypeConverterConfigs", configs) .addConfiguration("geoIP2DBFile", databaseFile.getAbsolutePath()) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("ipAsInt", Field.create(GeolocationProcessor.ipAsStringToInt(ip))); map.put( "ipAsIntString", Field.create(String.valueOf(GeolocationProcessor.ipAsStringToInt(ip)))); map.put("ipAsString", Field.create(ip)); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(0, runner.getErrorRecords().size()); Assert.assertEquals(1, output.getRecords().get("a").size()); Field field = output.getRecords().get("a").get(0).get(); Assert.assertTrue(field.getValue() instanceof Map); Map<String, Field> result = field.getValueAsMap(); Assert.assertEquals(String.valueOf(result), 6, result.size()); Assert.assertEquals( "United States", Utils.checkNotNull(result.get("intStringIpCountry"), "intStringIpCountry").getValue()); Assert.assertEquals( "United States", Utils.checkNotNull(result.get("intIpCountry"), "intIpCountry").getValue()); Assert.assertEquals( "United States", Utils.checkNotNull(result.get("stringIpCountry"), "stringIpCountry").getValue()); } finally { runner.runDestroy(); } }
@Test(timeout = 30000) public void testProduce() throws Exception { SourceRunner sourceRunner = new SourceRunner.Builder(ClusterHdfsDSource.class) .addOutputLane("lane") .setExecutionMode(ExecutionMode.CLUSTER_BATCH) .addConfiguration("hdfsUri", miniDFS.getURI().toString()) .addConfiguration("hdfsDirLocations", Arrays.asList(dir.toUri().getPath())) .addConfiguration("recursive", false) .addConfiguration("hdfsConfigs", new HashMap<String, String>()) .addConfiguration("dataFormat", DataFormat.TEXT) .addConfiguration("textMaxLineLen", 1024) .addConfiguration("produceSingleRecordPerMessage", false) .addConfiguration("regex", null) .addConfiguration("grokPatternDefinition", null) .addConfiguration("enableLog4jCustomLogFormat", false) .addConfiguration("customLogFormat", null) .addConfiguration("fieldPathsToGroupName", null) .addConfiguration("log4jCustomLogFormat", null) .addConfiguration("grokPattern", null) .addConfiguration("hdfsKerberos", false) .addConfiguration("hdfsConfDir", hadoopConfDir) .setResourcesDir(resourcesDir) .build(); sourceRunner.runInit(); List<Map.Entry> list = new ArrayList<>(); list.add(new Pair(new LongWritable(1), new Text("aaa"))); list.add(new Pair(new LongWritable(2), new Text("bbb"))); list.add(new Pair(new LongWritable(3), new Text("ccc"))); Thread th = createThreadForAddingBatch(sourceRunner, list); try { StageRunner.Output output = sourceRunner.runProduce(null, 5); String newOffset = output.getNewOffset(); Assert.assertEquals("3", newOffset); List<Record> records = output.getRecords().get("lane"); Assert.assertEquals(3, records.size()); for (int i = 0; i < records.size(); i++) { Assert.assertNotNull(records.get(i).get("/text")); LOG.info("Header " + records.get(i).getHeader().getSourceId()); Assert.assertTrue(!records.get(i).get("/text").getValueAsString().isEmpty()); Assert.assertEquals( list.get(i).getValue().toString(), records.get(i).get("/text").getValueAsString()); } if (sourceRunner != null) { sourceRunner.runDestroy(); } } finally { th.interrupt(); } }
@Test public void testDifferentMatchesRegex() throws StageException { FieldRenamerConfig renameConfig = new FieldRenamerConfig(); // Any field containing a non-word character should be in single quotes renameConfig.fromFieldExpression = "/'(.*)(#)(.*)'"; renameConfig.toFieldExpression = "/$1hash$3"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.TO_ERROR; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .setOnRecordError(OnRecordError.STOP_PIPELINE) .addOutputLane("a") .build(); runner.runInit(); Map<String, Field> map = new LinkedHashMap<>(); map.put("#abcd", Field.create("hashabcd")); Record record1 = RecordCreator.create("s", "s:1"); record1.set(Field.create(Field.Type.MAP, map)); map = new LinkedHashMap<>(); map.put("ab#cd", Field.create("abhashcd")); Record record2 = RecordCreator.create("s", "s:2"); record2.set(Field.create(Field.Type.MAP, map)); map = new LinkedHashMap<>(); map.put("abcd#", Field.create("abcdhash")); Record record3 = RecordCreator.create("s", "s:3"); record3.set(Field.create(Field.Type.MAP, map)); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record1, record2, record3)); Assert.assertEquals(3, output.getRecords().get("a").size()); for (Record record : output.getRecords().get("a")) { Map<String, Field> fieldMap = record.get().getValueAsMap(); for (Map.Entry<String, Field> fieldEntry : fieldMap.entrySet()) { Assert.assertEquals(fieldEntry.getKey(), fieldEntry.getValue().getValueAsString()); } } } finally { runner.runDestroy(); } }
@Test public void testRegexInComplexListType() throws StageException { FieldRenamerConfig renameConfig = new FieldRenamerConfig(); // Any field containing a non-word character should be in single quotes renameConfig.fromFieldExpression = "/(*)[(*)]/'SQL(#)(.*)'"; renameConfig.toFieldExpression = "/$1[$2]/SQL$4"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.TO_ERROR; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .setOnRecordError(OnRecordError.TO_ERROR) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> innerMap1 = new LinkedHashMap<>(); innerMap1.put("SQL#1", Field.create(Field.Type.STRING, "foo1")); Map<String, Field> innerMap2 = new LinkedHashMap<>(); innerMap2.put("SQL#2", Field.create(Field.Type.STRING, "foo2")); List<Field> list = new LinkedList<>(); list.add(Field.create(innerMap1)); list.add(Field.create(innerMap2)); Map<String, Field> map = new HashMap<>(); map.put("list", Field.create(list)); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record r = output.getRecords().get("a").get(0); Assert.assertFalse(r.getEscapedFieldPaths().contains("/list[0]/'SQL#1'")); Assert.assertFalse(r.getEscapedFieldPaths().contains("/list[1]/'SQL#2'")); Assert.assertTrue(r.getEscapedFieldPaths().contains("/list[0]/SQL1")); Assert.assertTrue(r.getEscapedFieldPaths().contains("/list[1]/SQL2")); } finally { runner.runDestroy(); } }
@Test public void testTargetFieldExistsAppendNumbers() throws StageException { FieldRenamerConfig renameConfig = new FieldRenamerConfig(); // Any field containing a non-word character should be in single quotes renameConfig.fromFieldExpression = "/field"; renameConfig.toFieldExpression = "/col"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.APPEND_NUMBERS; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); // Test non-existent source with existing target field ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("field", Field.create(Field.Type.STRING, "field")); map.put("col", Field.create(Field.Type.STRING, "col")); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(Field.Type.MAP, map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record r = output.getRecords().get("a").get(0); Assert.assertFalse(r.has("/field")); Assert.assertTrue(r.has("/col")); Assert.assertTrue(r.has("/col1")); Assert.assertEquals("col", r.get("/col").getValueAsString()); Assert.assertEquals("field", r.get("/col1").getValueAsString()); } finally { runner.runDestroy(); } }
@Test public void testRenameMapField() throws StageException { FieldRenamerConfig renameConfig = new FieldRenamerConfig(); // Any field containing a non-word character should be in single quotes renameConfig.fromFieldExpression = "/first"; renameConfig.toFieldExpression = "/second"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .setOnRecordError(OnRecordError.TO_ERROR) .addOutputLane("a") .build(); runner.runInit(); Map<String, Field> renameableInnerMap = new HashMap<>(); renameableInnerMap.put("value", Field.create(Field.Type.STRING, "value")); Map<String, Field> map = new LinkedHashMap<>(); map.put("first", Field.create(renameableInnerMap)); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(Field.Type.MAP, map)); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record r = output.getRecords().get("a").get(0); Assert.assertFalse(r.has("/first")); Assert.assertFalse(r.has("/first/value")); Assert.assertTrue(r.has("/second")); Assert.assertTrue(r.has("/second/value")); } finally { runner.runDestroy(); } }
@Test public void testNestedKeep2() throws StageException { /* * In a deep nested record try to retain arbitrary paths */ Record record = createNestedRecord(); ProcessorRunner runner = new ProcessorRunner.Builder(FieldFilterDProcessor.class) .addConfiguration( "fields", ImmutableList.of( "/USA[0]/SanFrancisco/noe/streets[1][1]/name", "/USA[1]/SantaMonica/cole/streets[0][1]/name")) .addConfiguration("filterOperation", FilterOperation.KEEP) .addOutputLane("a") .build(); runner.runInit(); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record resultRecord = output.getRecords().get("a").get(0); Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][0]/name")); Assert.assertEquals( "d", resultRecord.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString()); Assert.assertTrue(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][0]/name")); Assert.assertEquals( "f", resultRecord.get("/USA[1]/SantaMonica/cole/streets[0][0]/name").getValueAsString()); Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][1]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][0]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][1]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][1]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][0]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][1]/name")); } finally { runner.runDestroy(); } }
@Test public void testTargetFieldExistsReplace() throws StageException { // Standard overwrite condition. Source and target fields exist FieldRenamerConfig renameConfig = new FieldRenamerConfig(); renameConfig.fromFieldExpression = "/existing"; renameConfig.toFieldExpression = "/overwrite"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.REPLACE; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); // Test non-existent source with existing target field ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("existing", Field.create(Field.Type.STRING, "foo")); map.put("overwrite", Field.create(Field.Type.STRING, "bar")); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Field field = output.getRecords().get("a").get(0).get(); Assert.assertTrue(field.getValue() instanceof Map); Map<String, Field> result = field.getValueAsMap(); Assert.assertEquals(String.valueOf(result), 1, result.size()); Assert.assertTrue(result.containsKey("overwrite")); Assert.assertTrue(!result.containsKey("existing")); Assert.assertEquals("foo", result.get("overwrite").getValue()); } finally { runner.runDestroy(); } }
@Test public void testSourceWithQuotedSubstring() throws StageException { // source should be processed as quoted string. FieldRenamerConfig renameConfig1 = new FieldRenamerConfig(); renameConfig1.fromFieldExpression = "/'attr|OrderNum'"; renameConfig1.toFieldExpression = "/theOrderNum"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.REPLACE; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig1), errorHandler); // Test non-existent source with existing target field ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("attr|OrderNum", Field.create(Field.Type.STRING, "foo")); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Field field = output.getRecords().get("a").get(0).get(); Assert.assertTrue(field.getValue() instanceof Map); Map<String, Field> result = field.getValueAsMap(); Assert.assertEquals(String.valueOf(result), 1, result.size()); Assert.assertFalse(result.containsKey("/'attr|OrderNum'")); Assert.assertFalse(result.containsKey("'attr|OrderNum'")); Assert.assertFalse(result.containsKey("attr|OrderNum")); Assert.assertTrue(result.containsKey("theOrderNum")); } finally { runner.runDestroy(); } }
@Test public void testWildCardKeep2() throws StageException { /* * Use wil card in array within array */ Record record = createNestedRecord(); ProcessorRunner runner = new ProcessorRunner.Builder(FieldFilterDProcessor.class) .addConfiguration("fields", ImmutableList.of("/USA[0]/SanFrancisco/noe/streets[0][*]")) .addConfiguration("filterOperation", FilterOperation.KEEP) .addOutputLane("a") .build(); runner.runInit(); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record resultRecord = output.getRecords().get("a").get(0); Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0]")); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString(), "a"); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/noe/streets[0][1]/name").getValueAsString(), "b"); } finally { runner.runDestroy(); } /* * Use wil card in array */ record = createNestedRecord(); runner = new ProcessorRunner.Builder(FieldFilterDProcessor.class) .addConfiguration("fields", ImmutableList.of("/USA[0]/SanFrancisco/noe/streets[*]")) .addConfiguration("filterOperation", FilterOperation.KEEP) .addOutputLane("a") .build(); runner.runInit(); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record resultRecord = output.getRecords().get("a").get(0); Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets")); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString(), "a"); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/noe/streets[0][1]/name").getValueAsString(), "b"); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/noe/streets[1][0]/name").getValueAsString(), "c"); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/noe/streets[1][1]/name").getValueAsString(), "d"); } finally { runner.runDestroy(); } }
@Test public void testNonExistingSourceAndTargetFields() throws StageException { // If neither the source or target fields exist, then field renaming is a noop, and should // succeed FieldRenamerConfig renameConfig = new FieldRenamerConfig(); renameConfig.fromFieldExpression = "/nonExisting"; renameConfig.toFieldExpression = "/alsoNonExisting"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("name", Field.create(Field.Type.STRING, null)); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Field field = output.getRecords().get("a").get(0).get(); Assert.assertTrue(field.getValue() instanceof Map); Map<String, Field> result = field.getValueAsMap(); Assert.assertEquals(String.valueOf(result), 1, result.size()); Assert.assertTrue(result.containsKey("name")); Assert.assertEquals(null, result.get("name").getValue()); } finally { runner.runDestroy(); } }
@Test public void testTargetFieldExistsError() throws StageException { // If overwrite is set to false, overwriting should result in an error FieldRenamerConfig renameConfig = new FieldRenamerConfig(); renameConfig.fromFieldExpression = "/existing"; renameConfig.toFieldExpression = "/overwrite"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); // Test non-existent source with existing target field ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .setOnRecordError(OnRecordError.TO_ERROR) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("existing", Field.create(Field.Type.STRING, "foo")); map.put("overwrite", Field.create(Field.Type.STRING, "bar")); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(0, output.getRecords().get("a").size()); Assert.assertEquals(1, runner.getErrorRecords().size()); } finally { runner.runDestroy(); } }
@Test public void testSingleRecordMap() throws Exception { List<JdbcFieldColumnMapping> columnMappings = ImmutableList.of(new JdbcFieldColumnMapping("P_ID", "/p_id")); JdbcLookupDProcessor processor = new JdbcLookupDProcessor(); processor.hikariConfigBean = createConfigBean(h2ConnectionString, username, password); ProcessorRunner processorRunner = new ProcessorRunner.Builder(JdbcLookupDProcessor.class, processor) .addConfiguration("query", mapQuery) .addConfiguration("columnMappings", columnMappings) .addConfiguration("maxClobSize", 1000) .addConfiguration("maxBlobSize", 1000) .addOutputLane("lane") .build(); Record record = RecordCreator.create(); LinkedHashMap<String, Field> fields = new LinkedHashMap<>(); fields.put("first_name", Field.create("Adam")); fields.put("last_name", Field.create("Kunicki")); record.set(Field.create(fields)); List<Record> singleRecord = ImmutableList.of(record); processorRunner.runInit(); try { StageRunner.Output output = processorRunner.runProcess(singleRecord); Assert.assertEquals(1, output.getRecords().get("lane").size()); record = output.getRecords().get("lane").get(0); Assert.assertNotEquals(null, record.get("/p_id")); Assert.assertEquals(1, record.get("/p_id").getValueAsInteger()); } finally { processorRunner.runDestroy(); } }
@Test public void testUnreachableFields() throws Exception { FieldRenamerConfig renameConfig = new FieldRenamerConfig(); renameConfig.fromFieldExpression = "/a"; renameConfig.toFieldExpression = "/b/c/d"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.TO_ERROR; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.REPLACE; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .addOutputLane("a") .setOnRecordError(OnRecordError.TO_ERROR) .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("a", Field.create(123)); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(0, output.getRecords().get("a").size()); Assert.assertEquals(1, runner.getErrorRecords().size()); Record errorRecord = runner.getErrorRecords().get(0); Assert.assertEquals(Errors.FIELD_RENAMER_04.name(), errorRecord.getHeader().getErrorCode()); } finally { runner.runDestroy(); } }
@Test public void testNestedKeep4() throws StageException { Record record = createNestedRecord(); /* * Keep non existing nested path "/USA[0]/SanFrancisco/cole". * Only objects upto /USA[0]/SanFrancisco should exist */ ProcessorRunner runner = new ProcessorRunner.Builder(FieldFilterDProcessor.class) .addConfiguration( "fields", ImmutableList.of( "/USA[0]/SanFrancisco/cole/streets[0][0]/name", "/USA[0]/SanFrancisco/cole/streets[1][0]/name")) .addConfiguration("filterOperation", FilterOperation.KEEP) .addOutputLane("a") .build(); runner.runInit(); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record resultRecord = output.getRecords().get("a").get(0); Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco")); Assert.assertEquals(0, resultRecord.get("/USA[0]/SanFrancisco").getValueAsMap().size()); Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][0]/name")); Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][1]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][0]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][1]/name")); } finally { runner.runDestroy(); } }
@Test public void testNestedKeep1() throws StageException { /* * In a deep nested field path try to retain the second elements of an array within an array */ Record record = createNestedRecord(); // Keep only second elements of array within array "/USA[0]/SanFrancisco/noe/streets" ProcessorRunner runner = new ProcessorRunner.Builder(FieldFilterDProcessor.class) .addConfiguration( "fields", ImmutableList.of( "/USA[0]/SanFrancisco/noe/streets[0][1]/name", "/USA[0]/SanFrancisco/noe/streets[1][1]/name")) .addConfiguration("filterOperation", FilterOperation.KEEP) .addOutputLane("a") .build(); runner.runInit(); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record resultRecord = output.getRecords().get("a").get(0); // Note that since the first element was removed, the second element is the new first element Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][0]/name")); Assert.assertEquals( "b", resultRecord.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString()); // Note that since the first element was removed, the second element is the new first element Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][0]/name")); Assert.assertEquals( "d", resultRecord.get("/USA[0]/SanFrancisco/noe/streets[1][0]/name").getValueAsString()); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][1]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][1]/name")); Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][0]/name")); Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][1]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][0]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][1]/name")); } finally { runner.runDestroy(); } /* * In a deep nested field path try to retain the first elements of an array within an array */ record = createNestedRecord(); // Keep only first elements of array within array "/USA[0]/SanFrancisco/noe/streets" runner = new ProcessorRunner.Builder(FieldFilterDProcessor.class) .addConfiguration( "fields", ImmutableList.of( "/USA[0]/SanFrancisco/noe/streets[0][0]/name", "/USA[0]/SanFrancisco/noe/streets[1][0]/name")) .addConfiguration("filterOperation", FilterOperation.KEEP) .addOutputLane("a") .build(); runner.runInit(); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record resultRecord = output.getRecords().get("a").get(0); Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][0]/name")); Assert.assertEquals( "a", resultRecord.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString()); Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][0]/name")); Assert.assertEquals( "c", resultRecord.get("/USA[0]/SanFrancisco/noe/streets[1][0]/name").getValueAsString()); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][1]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][1]/name")); Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][0]/name")); Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][1]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][0]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][1]/name")); } finally { runner.runDestroy(); } }
@Test public void testWildCardKeep3() throws StageException { /* * Use wil card in map and array */ Record record = createNestedRecord(); ProcessorRunner runner = new ProcessorRunner.Builder(FieldFilterDProcessor.class) .addConfiguration( "fields", ImmutableList.of("/USA[0]/SanFrancisco/*/streets[*][1]/name")) .addConfiguration("filterOperation", FilterOperation.KEEP) .addOutputLane("a") .build(); runner.runInit(); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record resultRecord = output.getRecords().get("a").get(0); Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][0]/name")); Assert.assertEquals( "b", resultRecord.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString()); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][1]/name")); Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][0]/name")); Assert.assertEquals( "d", resultRecord.get("/USA[0]/SanFrancisco/noe/streets[1][0]/name").getValueAsString()); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][1]/name")); Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][0]/name")); Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][1]/name")); Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][0]/name")); Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][1]/name")); } finally { runner.runDestroy(); } /* * Use wil card in map. Make sure the entire tree of elements is preserved */ record = createNestedRecord(); runner = new ProcessorRunner.Builder(FieldFilterDProcessor.class) .addConfiguration("fields", ImmutableList.of("/USA[0]/SanFrancisco/*")) .addConfiguration("filterOperation", FilterOperation.KEEP) .addOutputLane("a") .build(); runner.runInit(); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record resultRecord = output.getRecords().get("a").get(0); Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe")); Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/folsom")); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString(), "a"); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/noe/streets[0][1]/name").getValueAsString(), "b"); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/noe/streets[1][0]/name").getValueAsString(), "c"); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/noe/streets[1][1]/name").getValueAsString(), "d"); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/folsom/streets[0][0]/name").getValueAsString(), "g"); Assert.assertEquals( record.get("/USA[0]/SanFrancisco/folsom/streets[0][1]/name").getValueAsString(), "h"); Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][0]/name")); Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][1]/name")); } finally { runner.runDestroy(); } }
@Test(timeout = 30000) public void testProduceAvroData() throws Exception { SourceRunner sourceRunner = new SourceRunner.Builder(ClusterHdfsDSource.class) .addOutputLane("lane") .setExecutionMode(ExecutionMode.CLUSTER_BATCH) .addConfiguration("hdfsUri", miniDFS.getURI().toString()) .addConfiguration("hdfsDirLocations", Arrays.asList(dir.toUri().getPath())) .addConfiguration("recursive", false) .addConfiguration("hdfsConfigs", new HashMap<String, String>()) .addConfiguration("dataFormat", DataFormat.AVRO) .addConfiguration("csvFileFormat", CsvMode.CSV) .addConfiguration("csvHeader", CsvHeader.WITH_HEADER) .addConfiguration("csvMaxObjectLen", 4096) .addConfiguration("textMaxLineLen", 1024) .addConfiguration("produceSingleRecordPerMessage", false) .addConfiguration("regex", null) .addConfiguration("grokPatternDefinition", null) .addConfiguration("enableLog4jCustomLogFormat", false) .addConfiguration("customLogFormat", null) .addConfiguration("fieldPathsToGroupName", null) .addConfiguration("log4jCustomLogFormat", null) .addConfiguration("grokPattern", null) .addConfiguration("hdfsKerberos", false) .addConfiguration("hdfsConfDir", hadoopConfDir) .setResourcesDir(resourcesDir) .build(); sourceRunner.runInit(); List<Map.Entry> list = new ArrayList<>(); list.add( new Pair( "path::" + "1" + "::1", createAvroData("a", 30, ImmutableList.of("*****@*****.**", "*****@*****.**")))); list.add( new Pair( "path::" + "1" + "::2", createAvroData("b", 40, ImmutableList.of("*****@*****.**", "*****@*****.**")))); Thread th = createThreadForAddingBatch(sourceRunner, list); try { StageRunner.Output output = sourceRunner.runProduce(null, 5); String newOffset = output.getNewOffset(); Assert.assertEquals("path::" + "1::2", newOffset); List<Record> records = output.getRecords().get("lane"); Assert.assertEquals(2, records.size()); Record record = records.get(0); Assert.assertTrue(record.has("/name")); Assert.assertEquals("a", record.get("/name").getValueAsString()); Assert.assertTrue(record.has("/age")); Assert.assertEquals(30, record.get("/age").getValueAsInteger()); Assert.assertTrue(record.has("/emails")); Assert.assertTrue(record.get("/emails").getValueAsList() instanceof List); List<Field> emails = record.get("/emails").getValueAsList(); Assert.assertEquals(2, emails.size()); Assert.assertEquals("*****@*****.**", emails.get(0).getValueAsString()); Assert.assertEquals("*****@*****.**", emails.get(1).getValueAsString()); record = records.get(1); Assert.assertTrue(record.has("/name")); Assert.assertEquals("b", record.get("/name").getValueAsString()); Assert.assertTrue(record.has("/age")); Assert.assertEquals(40, record.get("/age").getValueAsInteger()); Assert.assertTrue(record.has("/emails")); Assert.assertTrue(record.get("/emails").getValueAsList() instanceof List); emails = record.get("/emails").getValueAsList(); Assert.assertEquals(2, emails.size()); Assert.assertEquals("*****@*****.**", emails.get(0).getValueAsString()); Assert.assertEquals("*****@*****.**", emails.get(1).getValueAsString()); } finally { th.interrupt(); } }