@Test public void testDifferentMatchesRegex() throws StageException { FieldRenamerConfig renameConfig = new FieldRenamerConfig(); // Any field containing a non-word character should be in single quotes renameConfig.fromFieldExpression = "/'(.*)(#)(.*)'"; renameConfig.toFieldExpression = "/$1hash$3"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.TO_ERROR; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .setOnRecordError(OnRecordError.STOP_PIPELINE) .addOutputLane("a") .build(); runner.runInit(); Map<String, Field> map = new LinkedHashMap<>(); map.put("#abcd", Field.create("hashabcd")); Record record1 = RecordCreator.create("s", "s:1"); record1.set(Field.create(Field.Type.MAP, map)); map = new LinkedHashMap<>(); map.put("ab#cd", Field.create("abhashcd")); Record record2 = RecordCreator.create("s", "s:2"); record2.set(Field.create(Field.Type.MAP, map)); map = new LinkedHashMap<>(); map.put("abcd#", Field.create("abcdhash")); Record record3 = RecordCreator.create("s", "s:3"); record3.set(Field.create(Field.Type.MAP, map)); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record1, record2, record3)); Assert.assertEquals(3, output.getRecords().get("a").size()); for (Record record : output.getRecords().get("a")) { Map<String, Field> fieldMap = record.get().getValueAsMap(); for (Map.Entry<String, Field> fieldEntry : fieldMap.entrySet()) { Assert.assertEquals(fieldEntry.getKey(), fieldEntry.getValue().getValueAsString()); } } } finally { runner.runDestroy(); } }
@Test public void testCategoryRegexInNonComplexType() throws StageException { FieldRenamerConfig renameConfig = new FieldRenamerConfig(); // Any field containing a non-word character should be in single quotes renameConfig.fromFieldExpression = "/'(.*)[#&@|](.*)'"; renameConfig.toFieldExpression = "/$1_$2"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.APPEND_NUMBERS; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .setOnRecordError(OnRecordError.TO_ERROR) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("a#b", Field.create(Field.Type.STRING, "foo1")); map.put("a_b", Field.create(Field.Type.STRING, "foo2")); map.put("a&b", Field.create(Field.Type.STRING, "foo3")); map.put("a|b", Field.create(Field.Type.STRING, "foo4")); map.put("a@b", Field.create(Field.Type.STRING, "foo5")); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(Field.Type.MAP, map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record r = output.getRecords().get("a").get(0); Assert.assertFalse(r.has("/'a#b'")); Assert.assertFalse(r.has("/'a&b'")); Assert.assertFalse(r.has("/'a|b'")); Assert.assertFalse(r.has("/'a&b'")); Assert.assertTrue(r.has("/a_b")); Assert.assertEquals("foo2", r.get("/a_b").getValueAsString()); Assert.assertTrue(r.has("/a_b1")); Assert.assertEquals("foo1", r.get("/a_b1").getValueAsString()); Assert.assertTrue(r.has("/a_b2")); Assert.assertEquals("foo3", r.get("/a_b2").getValueAsString()); Assert.assertTrue(r.has("/a_b3")); Assert.assertEquals("foo4", r.get("/a_b3").getValueAsString()); Assert.assertTrue(r.has("/a_b4")); Assert.assertEquals("foo5", r.get("/a_b4").getValueAsString()); } finally { runner.runDestroy(); } }
@Test public void testNewFieldWithTypedNull() throws Exception { // initial data in record is empty Record record = RecordCreator.create(); Map<String, Field> map = new HashMap<>(); record.set(Field.create(map)); Processor processor = new JythonProcessor( ProcessingMode.RECORD, "for record in records:\n" + " record.value['null_int'] = NULL_INTEGER\n" + " record.value['null_long'] = NULL_LONG\n" + " record.value['null_float'] = NULL_FLOAT\n" + " record.value['null_double'] = NULL_DOUBLE\n" + " record.value['null_date'] = NULL_DATE\n" + " record.value['null_datetime'] = NULL_DATETIME\n" + " record.value['null_boolean'] = NULL_BOOLEAN\n" + " record.value['null_decimal'] = NULL_DECIMAL\n" + " record.value['null_byteArray'] = NULL_BYTE_ARRAY\n" + " record.value['null_string'] = NULL_STRING\n" + " record.value['null_list'] = NULL_LIST\n" + " record.value['null_map'] = NULL_MAP\n" + " record.value['null_time'] = NULL_TIME\n" + " output.write(record)\n"); ScriptingProcessorTestUtil.verifyTypedFieldWithNullValue( JythonDProcessor.class, processor, record); }
@Test public void testGetFieldNull() throws Exception { // initial data in record Record record = RecordCreator.create(); Map<String, Field> map = new HashMap<>(); map.put("null_int", Field.create(Field.Type.INTEGER, null)); map.put("null_string", Field.create(Field.Type.STRING, null)); map.put("null_boolean", Field.create(Field.Type.BOOLEAN, null)); map.put("null_list", Field.create(Field.Type.LIST, null)); map.put("null_map", Field.create(Field.Type.MAP, null)); // original record has value in the field, so getFieldNull should return the value map.put("null_datetime", Field.createDatetime(new Date())); record.set(Field.create(map)); Processor processor = new JythonProcessor( ProcessingMode.RECORD, "for record in records:\n" + " if sdcFunctions.getFieldNull(record, '/null_int') == NULL_INTEGER:\n" + " record.value['null_int'] = 123 \n" + " if sdcFunctions.getFieldNull(record, '/null_string') == NULL_STRING:\n" + " record.value['null_string'] = 'test' \n" + " if sdcFunctions.getFieldNull(record, '/null_boolean') == NULL_BOOLEAN:\n" + " record.value['null_boolean'] = True \n" + " if sdcFunctions.getFieldNull(record, '/null_list') is NULL_LIST:\n" + " record.value['null_list'] = ['elem1', 'elem2'] \n" + " if sdcFunctions.getFieldNull(record, '/null_map') == NULL_MAP:\n" + " record.value['null_map'] = {'x': 'X', 'y': 'Y'} \n" + " if sdcFunctions.getFieldNull(record, '/null_datetime') == NULL_DATETIME:\n" + // this should be false " record.value['null_datetime'] = NULL_DATETIME \n" + " output.write(record);\n"); ScriptingProcessorTestUtil.verifyNullField(JythonProcessor.class, processor, record); }
public List<Record> runNewPartitionRecord() throws Exception { HiveMetastoreTarget hiveTarget = new HiveMetastoreTargetBuilder().build(); TargetRunner runner = new TargetRunner.Builder(HiveMetastoreTarget.class, hiveTarget) .setOnRecordError(OnRecordError.TO_ERROR) .build(); runner.runInit(); Assert.assertEquals("There should be no error records", 0, runner.getErrorRecords().size()); LinkedHashMap<String, String> partitionVals = new LinkedHashMap<String, String>(); partitionVals.put("dt", "2016"); Field newPartitionField = HiveMetastoreUtil.newPartitionMetadataFieldBuilder( "default", "tbl", partitionVals, "/user/hive/warehouse/tbl/dt=2016"); Record record = RecordCreator.create(); record.set(newPartitionField); runner.runWrite(ImmutableList.of(record)); try { return runner.getEventRecords(); } finally { runner.runDestroy(); } }
@Test public void testKeepNonExistingFiled() throws StageException { ProcessorRunner runner = new ProcessorRunner.Builder(FieldFilterDProcessor.class) .addConfiguration("fields", ImmutableList.of("/city")) .addConfiguration("filterOperation", FilterOperation.KEEP) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("name", Field.create("a")); map.put("age", Field.create("b")); map.put("streetAddress", Field.create("c")); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Field field = output.getRecords().get("a").get(0).get(); Assert.assertTrue(field.getValue() instanceof Map); Map<String, Field> result = field.getValueAsMap(); Assert.assertTrue(result.size() == 0); } finally { runner.runDestroy(); } }
@Test public void testMultipleRegexMatchingSameField() throws StageException { FieldRenamerConfig renameConfig1 = new FieldRenamerConfig(); renameConfig1.fromFieldExpression = "/sql(.*)"; renameConfig1.toFieldExpression = "/sqlRename$1"; FieldRenamerConfig renamerConfig2 = new FieldRenamerConfig(); renamerConfig2.fromFieldExpression = "/s(.*)"; renamerConfig2.toFieldExpression = "/sRename$1"; Map<String, Field> map = new LinkedHashMap<>(); map.put("sqlField", Field.create(Field.Type.STRING, "foo")); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(Field.Type.MAP, map)); try { FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig1, renamerConfig2), errorHandler); // Test non-existent source with existing target field ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .setOnRecordError(OnRecordError.STOP_PIPELINE) .addOutputLane("a") .build(); runner.runInit(); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.fail("Should throw error if multiple regex match the same field"); } catch (OnRecordErrorException e) { Assert.assertEquals(Errors.FIELD_RENAMER_03, e.getErrorCode()); } FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.CONTINUE; errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig1, renamerConfig2), errorHandler); ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .setOnRecordError(OnRecordError.TO_ERROR) .addOutputLane("a") .build(); runner.runInit(); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record r = output.getRecords().get("a").get(0); Assert.assertTrue(r.has("/sqlField")); }
@Test public void testMissingColumnMappingList() throws Exception { List<JdbcFieldColumnMapping> columnMappings = ImmutableList.of(); JdbcLookupDProcessor processor = new JdbcLookupDProcessor(); processor.hikariConfigBean = createConfigBean(h2ConnectionString, username, password); ProcessorRunner processorRunner = new ProcessorRunner.Builder(JdbcLookupDProcessor.class, processor) .addConfiguration("query", listQuery) .addConfiguration("columnMappings", columnMappings) .addConfiguration("maxClobSize", 1000) .addConfiguration("maxBlobSize", 1000) .addOutputLane("lane") .build(); Record record1 = RecordCreator.create(); List<Field> fields1 = new ArrayList<>(); fields1.add(Field.create("Adam")); fields1.add(Field.create("Kunicki")); record1.set(Field.create(fields1)); Record record2 = RecordCreator.create(); List<Field> fields2 = new ArrayList<>(); fields2.add(Field.create("Jon")); fields2.add(Field.create("Natkins")); record2.set(Field.create(fields2)); Record record3 = RecordCreator.create(); List<Field> fields3 = new ArrayList<>(); fields3.add(Field.create("Jon")); fields3.add(Field.create("Daulton")); record3.set(Field.create(fields3)); List<Record> records = ImmutableList.of(record1, record2, record3); processorRunner.runInit(); List<Record> outputRecords = processorRunner.runProcess(records).getRecords().get("lane"); Assert.assertEquals( 1, outputRecords.get(0).get("[2]").getValueAsMap().get("value").getValueAsInteger()); Assert.assertEquals( 2, outputRecords.get(1).get("[2]").getValueAsMap().get("value").getValueAsInteger()); Assert.assertEquals( 3, outputRecords.get(2).get("[2]").getValueAsMap().get("value").getValueAsInteger()); }
public static List<Record> createStringRecords() { List<Record> records = new ArrayList<>(9); for (int i = 0; i < 9; i++) { Record r = RecordCreator.create("s", "s:1", (TEST_STRING + i).getBytes(), MIME); r.set(Field.create((TEST_STRING + i))); records.add(r); } return records; }
@Test public void testRenameMultipleListElementsWithConstIdxExpr() throws StageException { FieldRenamerConfig renameConfig1 = new FieldRenamerConfig(); renameConfig1.fromFieldExpression = "/listOfInts[0]"; renameConfig1.toFieldExpression = "/nonExisting0"; FieldRenamerConfig renameConfig2 = new FieldRenamerConfig(); renameConfig2.fromFieldExpression = "/listOfInts[1]"; renameConfig2.toFieldExpression = "/nonExisting1"; FieldRenamerConfig renameConfig3 = new FieldRenamerConfig(); renameConfig3.fromFieldExpression = "/listOfInts[2]"; renameConfig3.toFieldExpression = "/nonExisting2"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.TO_ERROR; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.REPLACE; // Reverse order in configuration so as to preserve array indices FieldRenamerProcessor processor = new FieldRenamerProcessor( ImmutableList.of(renameConfig3, renameConfig2, renameConfig1), errorHandler); // Test non-existent source with existing target field ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put( "listOfInts", Field.create( ImmutableList.of( Field.create(Field.Type.INTEGER, 1), Field.create(Field.Type.INTEGER, 2), Field.create(Field.Type.INTEGER, 3)))); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Map<String, Field> result = output.getRecords().get("a").get(0).get().getValueAsMap(); Assert.assertTrue(result.containsKey("listOfInts")); Assert.assertTrue(result.get("listOfInts").getValueAsList().isEmpty()); Assert.assertTrue(result.containsKey("nonExisting0")); Assert.assertTrue(result.containsKey("nonExisting1")); Assert.assertTrue(result.containsKey("nonExisting2")); Assert.assertEquals(1, result.get("nonExisting0").getValueAsInteger()); Assert.assertEquals(2, result.get("nonExisting1").getValueAsInteger()); Assert.assertEquals(3, result.get("nonExisting2").getValueAsInteger()); } finally { runner.runDestroy(); } }
@Test public void testLookup() throws Exception { String ip = "128.101.101.101"; List<GeolocationFieldConfig> configs = new ArrayList<>(); GeolocationFieldConfig config; config = new GeolocationFieldConfig(); config.inputFieldName = "/ipAsInt"; config.outputFieldName = "/intIpCountry"; config.targetType = GeolocationField.COUNTRY_NAME; configs.add(config); config = new GeolocationFieldConfig(); config.inputFieldName = "/ipAsIntString"; config.outputFieldName = "/intStringIpCountry"; config.targetType = GeolocationField.COUNTRY_NAME; configs.add(config); config = new GeolocationFieldConfig(); config.inputFieldName = "/ipAsString"; config.outputFieldName = "/stringIpCountry"; config.targetType = GeolocationField.COUNTRY_NAME; configs.add(config); ProcessorRunner runner = new ProcessorRunner.Builder(GeolocationDProcessor.class) .addConfiguration("fieldTypeConverterConfigs", configs) .addConfiguration("geoIP2DBFile", databaseFile.getAbsolutePath()) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("ipAsInt", Field.create(GeolocationProcessor.ipAsStringToInt(ip))); map.put( "ipAsIntString", Field.create(String.valueOf(GeolocationProcessor.ipAsStringToInt(ip)))); map.put("ipAsString", Field.create(ip)); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(0, runner.getErrorRecords().size()); Assert.assertEquals(1, output.getRecords().get("a").size()); Field field = output.getRecords().get("a").get(0).get(); Assert.assertTrue(field.getValue() instanceof Map); Map<String, Field> result = field.getValueAsMap(); Assert.assertEquals(String.valueOf(result), 6, result.size()); Assert.assertEquals( "United States", Utils.checkNotNull(result.get("intStringIpCountry"), "intStringIpCountry").getValue()); Assert.assertEquals( "United States", Utils.checkNotNull(result.get("intIpCountry"), "intIpCountry").getValue()); Assert.assertEquals( "United States", Utils.checkNotNull(result.get("stringIpCountry"), "stringIpCountry").getValue()); } finally { runner.runDestroy(); } }
public static List<Record> produce20Records() throws IOException { List<Record> list = new ArrayList<>(); for (int i = 0; i < 20; i++) { Record record = RecordCreator.create(); Map<String, Field> map = new HashMap<>(); map.put("name", Field.create("NAME" + i)); map.put("lastStatusChange", Field.create(i)); record.set(Field.create(map)); list.add(record); } return list; }
@Override protected void process(Record record, SingleLaneBatchMaker batchMaker) throws StageException { Field field = record.get(fieldPath); String[] splits = null; ErrorCode error = null; if (field == null || field.getValue() == null) { error = Errors.SPLITTER_01; } else { String str; try { str = field.getValueAsString(); } catch (IllegalArgumentException e) { throw new OnRecordErrorException(Errors.SPLITTER_04, fieldPath, field.getType().name()); } splits = str.split(separatorStr, fieldPaths.length); if (splits.length < fieldPaths.length) { error = Errors.SPLITTER_02; } } if (error == null || onStagePreConditionFailure == OnStagePreConditionFailure.CONTINUE) { for (int i = 0; i < fieldPaths.length; i++) { try { if (splits != null && splits.length > i) { record.set(fieldPaths[i], Field.create(splits[i])); } else { record.set(fieldPaths[i], Field.create(Field.Type.STRING, null)); } } catch (IllegalArgumentException e) { throw new OnRecordErrorException( Errors.SPLITTER_05, fieldPath, record.getHeader().getSourceId(), e.toString()); } } if (removeUnsplitValue) { record.delete(fieldPath); } batchMaker.addRecord(record); } else { throw new OnRecordErrorException(error, record.getHeader().getSourceId(), fieldPath); } }
@Test public void testNewFieldWithTypedNull() throws Exception { // initial data in record is empty Record record = RecordCreator.create(); Map<String, Field> map = new HashMap<>(); record.set(Field.create(map)); final String script = Resources.toString(Resources.getResource("AssignTypedNullField.groovy"), Charsets.UTF_8); Processor processor = new GroovyProcessor(ProcessingMode.BATCH, script); ScriptingProcessorTestUtil.verifyTypedFieldWithNullValue( GroovyProcessor.class, processor, record); }
@Test public void testRegexInComplexListType() throws StageException { FieldRenamerConfig renameConfig = new FieldRenamerConfig(); // Any field containing a non-word character should be in single quotes renameConfig.fromFieldExpression = "/(*)[(*)]/'SQL(#)(.*)'"; renameConfig.toFieldExpression = "/$1[$2]/SQL$4"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.TO_ERROR; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .setOnRecordError(OnRecordError.TO_ERROR) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> innerMap1 = new LinkedHashMap<>(); innerMap1.put("SQL#1", Field.create(Field.Type.STRING, "foo1")); Map<String, Field> innerMap2 = new LinkedHashMap<>(); innerMap2.put("SQL#2", Field.create(Field.Type.STRING, "foo2")); List<Field> list = new LinkedList<>(); list.add(Field.create(innerMap1)); list.add(Field.create(innerMap2)); Map<String, Field> map = new HashMap<>(); map.put("list", Field.create(list)); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record r = output.getRecords().get("a").get(0); Assert.assertFalse(r.getEscapedFieldPaths().contains("/list[0]/'SQL#1'")); Assert.assertFalse(r.getEscapedFieldPaths().contains("/list[1]/'SQL#2'")); Assert.assertTrue(r.getEscapedFieldPaths().contains("/list[0]/SQL1")); Assert.assertTrue(r.getEscapedFieldPaths().contains("/list[1]/SQL2")); } finally { runner.runDestroy(); } }
@SuppressWarnings("unchecked") public List<Record> runNewTableRecord() throws Exception { HiveMetastoreTarget hiveTarget = new HiveMetastoreTargetBuilder().build(); TargetRunner runner = new TargetRunner.Builder(HiveMetastoreTarget.class, hiveTarget) .setOnRecordError(OnRecordError.STOP_PIPELINE) .build(); runner.runInit(); LinkedHashMap<String, HiveTypeInfo> columns = new LinkedHashMap<>(); columns.put("name", HiveType.STRING.getSupport().generateHiveTypeInfoFromResultSet("STRING")); columns.put( "surname", HiveType.STRING.getSupport().generateHiveTypeInfoFromResultSet("STRING")); LinkedHashMap<String, HiveTypeInfo> partitions = new LinkedHashMap<>(); partitions.put("dt", HiveType.STRING.getSupport().generateHiveTypeInfoFromResultSet("STRING")); Field newTableField = HiveMetastoreUtil.newSchemaMetadataFieldBuilder( "default", "tbl", columns, partitions, true, BaseHiveIT.getDefaultWareHouseDir(), HiveMetastoreUtil.generateAvroSchema(columns, "tbl")); Record record = RecordCreator.create(); record.set(newTableField); Assert.assertTrue(HiveMetastoreUtil.isSchemaChangeRecord(record)); runner.runWrite(ImmutableList.of(record)); assertTableStructure( "default.tbl", ImmutablePair.of("tbl.name", Types.VARCHAR), ImmutablePair.of("tbl.surname", Types.VARCHAR), ImmutablePair.of("tbl.dt", Types.VARCHAR)); try { return runner.getEventRecords(); } finally { runner.runDestroy(); } }
@Test public void testRenameMapField() throws StageException { FieldRenamerConfig renameConfig = new FieldRenamerConfig(); // Any field containing a non-word character should be in single quotes renameConfig.fromFieldExpression = "/first"; renameConfig.toFieldExpression = "/second"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .setOnRecordError(OnRecordError.TO_ERROR) .addOutputLane("a") .build(); runner.runInit(); Map<String, Field> renameableInnerMap = new HashMap<>(); renameableInnerMap.put("value", Field.create(Field.Type.STRING, "value")); Map<String, Field> map = new LinkedHashMap<>(); map.put("first", Field.create(renameableInnerMap)); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(Field.Type.MAP, map)); try { StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record r = output.getRecords().get("a").get(0); Assert.assertFalse(r.has("/first")); Assert.assertFalse(r.has("/first/value")); Assert.assertTrue(r.has("/second")); Assert.assertTrue(r.has("/second/value")); } finally { runner.runDestroy(); } }
@Test public void testTargetFieldExistsAppendNumbers() throws StageException { FieldRenamerConfig renameConfig = new FieldRenamerConfig(); // Any field containing a non-word character should be in single quotes renameConfig.fromFieldExpression = "/field"; renameConfig.toFieldExpression = "/col"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.APPEND_NUMBERS; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); // Test non-existent source with existing target field ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("field", Field.create(Field.Type.STRING, "field")); map.put("col", Field.create(Field.Type.STRING, "col")); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(Field.Type.MAP, map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Record r = output.getRecords().get("a").get(0); Assert.assertFalse(r.has("/field")); Assert.assertTrue(r.has("/col")); Assert.assertTrue(r.has("/col1")); Assert.assertEquals("col", r.get("/col").getValueAsString()); Assert.assertEquals("field", r.get("/col1").getValueAsString()); } finally { runner.runDestroy(); } }
@Test public void testGetFieldNull() throws Exception { // initial data in record Record record = RecordCreator.create(); Map<String, Field> map = new HashMap<>(); map.put("null_int", Field.create(Field.Type.INTEGER, null)); map.put("null_string", Field.create(Field.Type.STRING, null)); map.put("null_boolean", Field.create(Field.Type.BOOLEAN, null)); map.put("null_list", Field.create(Field.Type.LIST, null)); map.put("null_map", Field.create(Field.Type.MAP, null)); // original record has value in the field, so getFieldNull should return the value map.put("null_datetime", Field.createDatetime(new Date())); record.set(Field.create(map)); final String script = Resources.toString(Resources.getResource("GetFieldNullScript.groovy"), Charsets.UTF_8); Processor processor = new GroovyProcessor(ProcessingMode.BATCH, script); ScriptingProcessorTestUtil.verifyNullField(GroovyProcessor.class, processor, record); }
@Test public void testTargetFieldExistsReplace() throws StageException { // Standard overwrite condition. Source and target fields exist FieldRenamerConfig renameConfig = new FieldRenamerConfig(); renameConfig.fromFieldExpression = "/existing"; renameConfig.toFieldExpression = "/overwrite"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.REPLACE; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); // Test non-existent source with existing target field ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("existing", Field.create(Field.Type.STRING, "foo")); map.put("overwrite", Field.create(Field.Type.STRING, "bar")); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Field field = output.getRecords().get("a").get(0).get(); Assert.assertTrue(field.getValue() instanceof Map); Map<String, Field> result = field.getValueAsMap(); Assert.assertEquals(String.valueOf(result), 1, result.size()); Assert.assertTrue(result.containsKey("overwrite")); Assert.assertTrue(!result.containsKey("existing")); Assert.assertEquals("foo", result.get("overwrite").getValue()); } finally { runner.runDestroy(); } }
@Test public void testSourceWithQuotedSubstring() throws StageException { // source should be processed as quoted string. FieldRenamerConfig renameConfig1 = new FieldRenamerConfig(); renameConfig1.fromFieldExpression = "/'attr|OrderNum'"; renameConfig1.toFieldExpression = "/theOrderNum"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.REPLACE; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig1), errorHandler); // Test non-existent source with existing target field ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("attr|OrderNum", Field.create(Field.Type.STRING, "foo")); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Field field = output.getRecords().get("a").get(0).get(); Assert.assertTrue(field.getValue() instanceof Map); Map<String, Field> result = field.getValueAsMap(); Assert.assertEquals(String.valueOf(result), 1, result.size()); Assert.assertFalse(result.containsKey("/'attr|OrderNum'")); Assert.assertFalse(result.containsKey("'attr|OrderNum'")); Assert.assertFalse(result.containsKey("attr|OrderNum")); Assert.assertTrue(result.containsKey("theOrderNum")); } finally { runner.runDestroy(); } }
public static List<Record> createCsvRecords() throws IOException { List<Record> records = new ArrayList<>(); String line; BufferedReader bufferedReader = new BufferedReader( new FileReader( FlumeTestUtil.class.getClassLoader().getResource("testFlumeTarget.csv").getFile())); while ((line = bufferedReader.readLine()) != null) { String columns[] = line.split(","); List<Field> list = new ArrayList<>(); for (String column : columns) { Map<String, Field> map = new LinkedHashMap<>(); map.put("value", Field.create(column)); list.add(Field.create(map)); } Record record = RecordCreator.create("s", "s:1", null, null); record.set(Field.create(list)); records.add(record); } return records; }
@Test public void testNonExistingSourceAndTargetFields() throws StageException { // If neither the source or target fields exist, then field renaming is a noop, and should // succeed FieldRenamerConfig renameConfig = new FieldRenamerConfig(); renameConfig.fromFieldExpression = "/nonExisting"; renameConfig.toFieldExpression = "/alsoNonExisting"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("name", Field.create(Field.Type.STRING, null)); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(1, output.getRecords().get("a").size()); Field field = output.getRecords().get("a").get(0).get(); Assert.assertTrue(field.getValue() instanceof Map); Map<String, Field> result = field.getValueAsMap(); Assert.assertEquals(String.valueOf(result), 1, result.size()); Assert.assertTrue(result.containsKey("name")); Assert.assertEquals(null, result.get("name").getValue()); } finally { runner.runDestroy(); } }
@Test public void testTargetFieldExistsError() throws StageException { // If overwrite is set to false, overwriting should result in an error FieldRenamerConfig renameConfig = new FieldRenamerConfig(); renameConfig.fromFieldExpression = "/existing"; renameConfig.toFieldExpression = "/overwrite"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); // Test non-existent source with existing target field ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .setOnRecordError(OnRecordError.TO_ERROR) .addOutputLane("a") .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("existing", Field.create(Field.Type.STRING, "foo")); map.put("overwrite", Field.create(Field.Type.STRING, "bar")); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(0, output.getRecords().get("a").size()); Assert.assertEquals(1, runner.getErrorRecords().size()); } finally { runner.runDestroy(); } }
@Test public void testChangeFieldToTypedNull() throws Exception { // initial data in record Record record = RecordCreator.create(); Map<String, Field> map = new HashMap<>(); map.put("null_int", Field.create("this is string field")); map.put("null_string", Field.create(123L)); map.put("null_date", Field.create(true)); map.put("null_decimal", Field.createDate(null)); map.put("null_short", Field.create((short) 1000)); map.put("null_char", Field.create('c')); // add a list field List<Field> list1 = new LinkedList<>(); list1.add(Field.create("dummy field list")); map.put("null_list", Field.create(list1)); record.set(Field.create(map)); final String script = Resources.toString(Resources.getResource("AssignTypedNullField.groovy"), Charsets.UTF_8); Processor processor = new GroovyProcessor(ProcessingMode.BATCH, script); ScriptingProcessorTestUtil.verifyTypedFieldWithNullValue( GroovyProcessor.class, processor, record); }
@Test public void testUnreachableFields() throws Exception { FieldRenamerConfig renameConfig = new FieldRenamerConfig(); renameConfig.fromFieldExpression = "/a"; renameConfig.toFieldExpression = "/b/c/d"; FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler(); errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.TO_ERROR; errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR; errorHandler.existingToFieldHandling = ExistingToFieldHandling.REPLACE; FieldRenamerProcessor processor = new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler); ProcessorRunner runner = new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor) .addOutputLane("a") .setOnRecordError(OnRecordError.TO_ERROR) .build(); runner.runInit(); try { Map<String, Field> map = new LinkedHashMap<>(); map.put("a", Field.create(123)); Record record = RecordCreator.create("s", "s:1"); record.set(Field.create(map)); StageRunner.Output output = runner.runProcess(ImmutableList.of(record)); Assert.assertEquals(0, output.getRecords().get("a").size()); Assert.assertEquals(1, runner.getErrorRecords().size()); Record errorRecord = runner.getErrorRecords().get(0); Assert.assertEquals(Errors.FIELD_RENAMER_04.name(), errorRecord.getHeader().getErrorCode()); } finally { runner.runDestroy(); } }
@Test public void testSingleRecordMap() throws Exception { List<JdbcFieldColumnMapping> columnMappings = ImmutableList.of(new JdbcFieldColumnMapping("P_ID", "/p_id")); JdbcLookupDProcessor processor = new JdbcLookupDProcessor(); processor.hikariConfigBean = createConfigBean(h2ConnectionString, username, password); ProcessorRunner processorRunner = new ProcessorRunner.Builder(JdbcLookupDProcessor.class, processor) .addConfiguration("query", mapQuery) .addConfiguration("columnMappings", columnMappings) .addConfiguration("maxClobSize", 1000) .addConfiguration("maxBlobSize", 1000) .addOutputLane("lane") .build(); Record record = RecordCreator.create(); LinkedHashMap<String, Field> fields = new LinkedHashMap<>(); fields.put("first_name", Field.create("Adam")); fields.put("last_name", Field.create("Kunicki")); record.set(Field.create(fields)); List<Record> singleRecord = ImmutableList.of(record); processorRunner.runInit(); try { StageRunner.Output output = processorRunner.runProcess(singleRecord); Assert.assertEquals(1, output.getRecords().get("lane").size()); record = output.getRecords().get("lane").get(0); Assert.assertNotEquals(null, record.get("/p_id")); Assert.assertEquals(1, record.get("/p_id").getValueAsInteger()); } finally { processorRunner.runDestroy(); } }
protected List<Record> processKafkaMessage(String messageId, byte[] payload) throws StageException { List<Record> records = new ArrayList<>(); try (DataParser parser = parserFactory.getParser(messageId, payload)) { Record record = parser.parse(); while (record != null) { records.add(record); record = parser.parse(); } } catch (IOException | DataParserException ex) { handleException(messageId, ex); } if (produceSingleRecordPerMessage) { List<Field> list = new ArrayList<>(); for (Record record : records) { list.add(record.get()); } Record record = records.get(0); record.set(Field.create(list)); records.clear(); records.add(record); } return records; }
protected List<Record> processMessage(String messageId, Object message) throws StageException { List<Record> records = new ArrayList<>(); if (dataFormat == DataFormat.AVRO) { try (DataParser parser = parserFactory.getParser(messageId, (byte[]) message)) { Record record = parser.parse(); if (record != null) { records.add(record); } } catch (IOException | DataParserException ex) { LOG.debug("Got exception: '{}'", ex, ex); handleException(messageId, ex); } } else { try (DataParser parser = parserFactory.getParser(messageId, String.valueOf(message))) { Record record = parser.parse(); while (record != null) { records.add(record); record = parser.parse(); } } catch (IOException | DataParserException ex) { LOG.debug("Got exception: '{}'", ex, ex); handleException(messageId, ex); } } if (produceSingleRecordPerMessage) { List<Field> list = new ArrayList<>(); for (Record record : records) { list.add(record.get()); } Record record = records.get(0); record.set(Field.create(list)); records.clear(); records.add(record); } return records; }
@Test public void testChangeFieldToTypedNull() throws Exception { // initial data in record Record record = RecordCreator.create(); Map<String, Field> map = new HashMap<>(); map.put("null_int", Field.create("this is string field")); map.put("null_string", Field.create(123L)); map.put("null_date", Field.create(true)); map.put("null_decimal", Field.createDate(null)); map.put("null_time", Field.createTime(new Date())); // add list field List<Field> list1 = new LinkedList<>(); list1.add(Field.create("dummy field list")); map.put("null_list", Field.create(list1)); // add map field Map<String, Field> map1 = new HashMap<>(); map1.put("dummy", Field.create("dummy field map")); map.put("null_map", Field.create(map1)); record.set(Field.create(map)); Processor processor = new JythonProcessor( ProcessingMode.RECORD, "for record in records:\n" + " record.value['null_int'] = NULL_INTEGER\n" + " record.value['null_date'] = NULL_DATE\n" + " record.value['null_decimal'] = NULL_DECIMAL\n" + " record.value['null_string'] = NULL_STRING\n" + " record.value['null_time'] = NULL_TIME\n" + " record.value['null_list'] = NULL_LIST\n" + " record.value['null_map'] = NULL_MAP\n" + " output.write(record)\n"); ScriptingProcessorTestUtil.verifyTypedFieldWithNullValue( JythonDProcessor.class, processor, record); }