コード例 #1
0
  @Test
  public void testDifferentMatchesRegex() throws StageException {
    FieldRenamerConfig renameConfig = new FieldRenamerConfig();
    // Any field containing a non-word character should be in single quotes
    renameConfig.fromFieldExpression = "/'(.*)(#)(.*)'";
    renameConfig.toFieldExpression = "/$1hash$3";

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR;

    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler);

    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .setOnRecordError(OnRecordError.STOP_PIPELINE)
            .addOutputLane("a")
            .build();
    runner.runInit();

    Map<String, Field> map = new LinkedHashMap<>();
    map.put("#abcd", Field.create("hashabcd"));

    Record record1 = RecordCreator.create("s", "s:1");
    record1.set(Field.create(Field.Type.MAP, map));

    map = new LinkedHashMap<>();
    map.put("ab#cd", Field.create("abhashcd"));
    Record record2 = RecordCreator.create("s", "s:2");
    record2.set(Field.create(Field.Type.MAP, map));

    map = new LinkedHashMap<>();
    map.put("abcd#", Field.create("abcdhash"));
    Record record3 = RecordCreator.create("s", "s:3");
    record3.set(Field.create(Field.Type.MAP, map));

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record1, record2, record3));
      Assert.assertEquals(3, output.getRecords().get("a").size());
      for (Record record : output.getRecords().get("a")) {
        Map<String, Field> fieldMap = record.get().getValueAsMap();
        for (Map.Entry<String, Field> fieldEntry : fieldMap.entrySet()) {
          Assert.assertEquals(fieldEntry.getKey(), fieldEntry.getValue().getValueAsString());
        }
      }
    } finally {
      runner.runDestroy();
    }
  }
コード例 #2
0
  @Test
  public void testCategoryRegexInNonComplexType() throws StageException {
    FieldRenamerConfig renameConfig = new FieldRenamerConfig();
    // Any field containing a non-word character should be in single quotes
    renameConfig.fromFieldExpression = "/'(.*)[#&@|](.*)'";
    renameConfig.toFieldExpression = "/$1_$2";

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.APPEND_NUMBERS;

    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler);

    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .setOnRecordError(OnRecordError.TO_ERROR)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      Map<String, Field> map = new LinkedHashMap<>();
      map.put("a#b", Field.create(Field.Type.STRING, "foo1"));
      map.put("a_b", Field.create(Field.Type.STRING, "foo2"));
      map.put("a&b", Field.create(Field.Type.STRING, "foo3"));
      map.put("a|b", Field.create(Field.Type.STRING, "foo4"));
      map.put("a@b", Field.create(Field.Type.STRING, "foo5"));

      Record record = RecordCreator.create("s", "s:1");
      record.set(Field.create(Field.Type.MAP, map));

      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));

      Assert.assertEquals(1, output.getRecords().get("a").size());
      Record r = output.getRecords().get("a").get(0);
      Assert.assertFalse(r.has("/'a#b'"));
      Assert.assertFalse(r.has("/'a&b'"));
      Assert.assertFalse(r.has("/'a|b'"));
      Assert.assertFalse(r.has("/'a&b'"));

      Assert.assertTrue(r.has("/a_b"));
      Assert.assertEquals("foo2", r.get("/a_b").getValueAsString());

      Assert.assertTrue(r.has("/a_b1"));
      Assert.assertEquals("foo1", r.get("/a_b1").getValueAsString());

      Assert.assertTrue(r.has("/a_b2"));
      Assert.assertEquals("foo3", r.get("/a_b2").getValueAsString());

      Assert.assertTrue(r.has("/a_b3"));
      Assert.assertEquals("foo4", r.get("/a_b3").getValueAsString());

      Assert.assertTrue(r.has("/a_b4"));
      Assert.assertEquals("foo5", r.get("/a_b4").getValueAsString());
    } finally {
      runner.runDestroy();
    }
  }
コード例 #3
0
  @Test
  public void testNewFieldWithTypedNull() throws Exception {
    // initial data in record is empty
    Record record = RecordCreator.create();
    Map<String, Field> map = new HashMap<>();
    record.set(Field.create(map));

    Processor processor =
        new JythonProcessor(
            ProcessingMode.RECORD,
            "for record in records:\n"
                + "  record.value['null_int'] = NULL_INTEGER\n"
                + "  record.value['null_long'] = NULL_LONG\n"
                + "  record.value['null_float'] = NULL_FLOAT\n"
                + "  record.value['null_double'] = NULL_DOUBLE\n"
                + "  record.value['null_date'] = NULL_DATE\n"
                + "  record.value['null_datetime'] = NULL_DATETIME\n"
                + "  record.value['null_boolean'] = NULL_BOOLEAN\n"
                + "  record.value['null_decimal'] = NULL_DECIMAL\n"
                + "  record.value['null_byteArray'] = NULL_BYTE_ARRAY\n"
                + "  record.value['null_string'] = NULL_STRING\n"
                + "  record.value['null_list'] = NULL_LIST\n"
                + "  record.value['null_map'] = NULL_MAP\n"
                + "  record.value['null_time'] = NULL_TIME\n"
                + "  output.write(record)\n");

    ScriptingProcessorTestUtil.verifyTypedFieldWithNullValue(
        JythonDProcessor.class, processor, record);
  }
コード例 #4
0
  @Test
  public void testGetFieldNull() throws Exception {
    // initial data in record
    Record record = RecordCreator.create();
    Map<String, Field> map = new HashMap<>();
    map.put("null_int", Field.create(Field.Type.INTEGER, null));
    map.put("null_string", Field.create(Field.Type.STRING, null));
    map.put("null_boolean", Field.create(Field.Type.BOOLEAN, null));
    map.put("null_list", Field.create(Field.Type.LIST, null));
    map.put("null_map", Field.create(Field.Type.MAP, null));
    // original record has value in the field, so getFieldNull should return the value
    map.put("null_datetime", Field.createDatetime(new Date()));
    record.set(Field.create(map));

    Processor processor =
        new JythonProcessor(
            ProcessingMode.RECORD,
            "for record in records:\n"
                + "  if sdcFunctions.getFieldNull(record, '/null_int') == NULL_INTEGER:\n"
                + "      record.value['null_int'] = 123 \n"
                + "  if sdcFunctions.getFieldNull(record, '/null_string') == NULL_STRING:\n"
                + "      record.value['null_string'] = 'test' \n"
                + "  if sdcFunctions.getFieldNull(record, '/null_boolean') == NULL_BOOLEAN:\n"
                + "      record.value['null_boolean'] = True \n"
                + "  if sdcFunctions.getFieldNull(record, '/null_list') is NULL_LIST:\n"
                + "      record.value['null_list'] = ['elem1', 'elem2'] \n"
                + "  if sdcFunctions.getFieldNull(record, '/null_map') == NULL_MAP:\n"
                + "      record.value['null_map'] = {'x': 'X', 'y': 'Y'} \n"
                + "  if sdcFunctions.getFieldNull(record, '/null_datetime') == NULL_DATETIME:\n"
                + // this should be false
                "      record.value['null_datetime'] = NULL_DATETIME \n"
                + "  output.write(record);\n");

    ScriptingProcessorTestUtil.verifyNullField(JythonProcessor.class, processor, record);
  }
コード例 #5
0
  public List<Record> runNewPartitionRecord() throws Exception {
    HiveMetastoreTarget hiveTarget = new HiveMetastoreTargetBuilder().build();

    TargetRunner runner =
        new TargetRunner.Builder(HiveMetastoreTarget.class, hiveTarget)
            .setOnRecordError(OnRecordError.TO_ERROR)
            .build();
    runner.runInit();

    Assert.assertEquals("There should be no error records", 0, runner.getErrorRecords().size());
    LinkedHashMap<String, String> partitionVals = new LinkedHashMap<String, String>();
    partitionVals.put("dt", "2016");

    Field newPartitionField =
        HiveMetastoreUtil.newPartitionMetadataFieldBuilder(
            "default", "tbl", partitionVals, "/user/hive/warehouse/tbl/dt=2016");
    Record record = RecordCreator.create();
    record.set(newPartitionField);
    runner.runWrite(ImmutableList.of(record));

    try {
      return runner.getEventRecords();
    } finally {
      runner.runDestroy();
    }
  }
コード例 #6
0
  @Test
  public void testKeepNonExistingFiled() throws StageException {
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldFilterDProcessor.class)
            .addConfiguration("fields", ImmutableList.of("/city"))
            .addConfiguration("filterOperation", FilterOperation.KEEP)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      Map<String, Field> map = new LinkedHashMap<>();
      map.put("name", Field.create("a"));
      map.put("age", Field.create("b"));
      map.put("streetAddress", Field.create("c"));
      Record record = RecordCreator.create("s", "s:1");
      record.set(Field.create(map));

      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());
      Field field = output.getRecords().get("a").get(0).get();
      Assert.assertTrue(field.getValue() instanceof Map);
      Map<String, Field> result = field.getValueAsMap();
      Assert.assertTrue(result.size() == 0);
    } finally {
      runner.runDestroy();
    }
  }
コード例 #7
0
  @Test
  public void testMultipleRegexMatchingSameField() throws StageException {
    FieldRenamerConfig renameConfig1 = new FieldRenamerConfig();
    renameConfig1.fromFieldExpression = "/sql(.*)";
    renameConfig1.toFieldExpression = "/sqlRename$1";

    FieldRenamerConfig renamerConfig2 = new FieldRenamerConfig();
    renamerConfig2.fromFieldExpression = "/s(.*)";
    renamerConfig2.toFieldExpression = "/sRename$1";

    Map<String, Field> map = new LinkedHashMap<>();
    map.put("sqlField", Field.create(Field.Type.STRING, "foo"));
    Record record = RecordCreator.create("s", "s:1");
    record.set(Field.create(Field.Type.MAP, map));

    try {
      FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
      errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE;
      errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
      errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR;

      FieldRenamerProcessor processor =
          new FieldRenamerProcessor(ImmutableList.of(renameConfig1, renamerConfig2), errorHandler);

      // Test non-existent source with existing target field
      ProcessorRunner runner =
          new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
              .setOnRecordError(OnRecordError.STOP_PIPELINE)
              .addOutputLane("a")
              .build();
      runner.runInit();

      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.fail("Should throw error if multiple regex match the same field");
    } catch (OnRecordErrorException e) {
      Assert.assertEquals(Errors.FIELD_RENAMER_03, e.getErrorCode());
    }

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.CONTINUE;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR;

    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(ImmutableList.of(renameConfig1, renamerConfig2), errorHandler);

    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .setOnRecordError(OnRecordError.TO_ERROR)
            .addOutputLane("a")
            .build();
    runner.runInit();

    StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
    Assert.assertEquals(1, output.getRecords().get("a").size());
    Record r = output.getRecords().get("a").get(0);
    Assert.assertTrue(r.has("/sqlField"));
  }
コード例 #8
0
  @Test
  public void testMissingColumnMappingList() throws Exception {
    List<JdbcFieldColumnMapping> columnMappings = ImmutableList.of();

    JdbcLookupDProcessor processor = new JdbcLookupDProcessor();
    processor.hikariConfigBean = createConfigBean(h2ConnectionString, username, password);

    ProcessorRunner processorRunner =
        new ProcessorRunner.Builder(JdbcLookupDProcessor.class, processor)
            .addConfiguration("query", listQuery)
            .addConfiguration("columnMappings", columnMappings)
            .addConfiguration("maxClobSize", 1000)
            .addConfiguration("maxBlobSize", 1000)
            .addOutputLane("lane")
            .build();

    Record record1 = RecordCreator.create();
    List<Field> fields1 = new ArrayList<>();
    fields1.add(Field.create("Adam"));
    fields1.add(Field.create("Kunicki"));
    record1.set(Field.create(fields1));

    Record record2 = RecordCreator.create();
    List<Field> fields2 = new ArrayList<>();
    fields2.add(Field.create("Jon"));
    fields2.add(Field.create("Natkins"));
    record2.set(Field.create(fields2));

    Record record3 = RecordCreator.create();
    List<Field> fields3 = new ArrayList<>();
    fields3.add(Field.create("Jon"));
    fields3.add(Field.create("Daulton"));
    record3.set(Field.create(fields3));

    List<Record> records = ImmutableList.of(record1, record2, record3);
    processorRunner.runInit();
    List<Record> outputRecords = processorRunner.runProcess(records).getRecords().get("lane");

    Assert.assertEquals(
        1, outputRecords.get(0).get("[2]").getValueAsMap().get("value").getValueAsInteger());
    Assert.assertEquals(
        2, outputRecords.get(1).get("[2]").getValueAsMap().get("value").getValueAsInteger());
    Assert.assertEquals(
        3, outputRecords.get(2).get("[2]").getValueAsMap().get("value").getValueAsInteger());
  }
コード例 #9
0
 public static List<Record> createStringRecords() {
   List<Record> records = new ArrayList<>(9);
   for (int i = 0; i < 9; i++) {
     Record r = RecordCreator.create("s", "s:1", (TEST_STRING + i).getBytes(), MIME);
     r.set(Field.create((TEST_STRING + i)));
     records.add(r);
   }
   return records;
 }
コード例 #10
0
  @Test
  public void testRenameMultipleListElementsWithConstIdxExpr() throws StageException {
    FieldRenamerConfig renameConfig1 = new FieldRenamerConfig();
    renameConfig1.fromFieldExpression = "/listOfInts[0]";
    renameConfig1.toFieldExpression = "/nonExisting0";

    FieldRenamerConfig renameConfig2 = new FieldRenamerConfig();
    renameConfig2.fromFieldExpression = "/listOfInts[1]";
    renameConfig2.toFieldExpression = "/nonExisting1";

    FieldRenamerConfig renameConfig3 = new FieldRenamerConfig();
    renameConfig3.fromFieldExpression = "/listOfInts[2]";
    renameConfig3.toFieldExpression = "/nonExisting2";

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.REPLACE;

    // Reverse order in configuration so as to preserve array indices
    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(
            ImmutableList.of(renameConfig3, renameConfig2, renameConfig1), errorHandler);

    // Test non-existent source with existing target field
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      Map<String, Field> map = new LinkedHashMap<>();
      map.put(
          "listOfInts",
          Field.create(
              ImmutableList.of(
                  Field.create(Field.Type.INTEGER, 1),
                  Field.create(Field.Type.INTEGER, 2),
                  Field.create(Field.Type.INTEGER, 3))));
      Record record = RecordCreator.create("s", "s:1");
      record.set(Field.create(map));
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());
      Map<String, Field> result = output.getRecords().get("a").get(0).get().getValueAsMap();
      Assert.assertTrue(result.containsKey("listOfInts"));
      Assert.assertTrue(result.get("listOfInts").getValueAsList().isEmpty());
      Assert.assertTrue(result.containsKey("nonExisting0"));
      Assert.assertTrue(result.containsKey("nonExisting1"));
      Assert.assertTrue(result.containsKey("nonExisting2"));
      Assert.assertEquals(1, result.get("nonExisting0").getValueAsInteger());
      Assert.assertEquals(2, result.get("nonExisting1").getValueAsInteger());
      Assert.assertEquals(3, result.get("nonExisting2").getValueAsInteger());
    } finally {
      runner.runDestroy();
    }
  }
コード例 #11
0
  @Test
  public void testLookup() throws Exception {
    String ip = "128.101.101.101";
    List<GeolocationFieldConfig> configs = new ArrayList<>();
    GeolocationFieldConfig config;
    config = new GeolocationFieldConfig();
    config.inputFieldName = "/ipAsInt";
    config.outputFieldName = "/intIpCountry";
    config.targetType = GeolocationField.COUNTRY_NAME;
    configs.add(config);
    config = new GeolocationFieldConfig();
    config.inputFieldName = "/ipAsIntString";
    config.outputFieldName = "/intStringIpCountry";
    config.targetType = GeolocationField.COUNTRY_NAME;
    configs.add(config);
    config = new GeolocationFieldConfig();
    config.inputFieldName = "/ipAsString";
    config.outputFieldName = "/stringIpCountry";
    config.targetType = GeolocationField.COUNTRY_NAME;
    configs.add(config);

    ProcessorRunner runner =
        new ProcessorRunner.Builder(GeolocationDProcessor.class)
            .addConfiguration("fieldTypeConverterConfigs", configs)
            .addConfiguration("geoIP2DBFile", databaseFile.getAbsolutePath())
            .addOutputLane("a")
            .build();
    runner.runInit();
    try {
      Map<String, Field> map = new LinkedHashMap<>();
      map.put("ipAsInt", Field.create(GeolocationProcessor.ipAsStringToInt(ip)));
      map.put(
          "ipAsIntString", Field.create(String.valueOf(GeolocationProcessor.ipAsStringToInt(ip))));
      map.put("ipAsString", Field.create(ip));
      Record record = RecordCreator.create("s", "s:1");
      record.set(Field.create(map));
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(0, runner.getErrorRecords().size());
      Assert.assertEquals(1, output.getRecords().get("a").size());
      Field field = output.getRecords().get("a").get(0).get();
      Assert.assertTrue(field.getValue() instanceof Map);
      Map<String, Field> result = field.getValueAsMap();
      Assert.assertEquals(String.valueOf(result), 6, result.size());
      Assert.assertEquals(
          "United States",
          Utils.checkNotNull(result.get("intStringIpCountry"), "intStringIpCountry").getValue());
      Assert.assertEquals(
          "United States",
          Utils.checkNotNull(result.get("intIpCountry"), "intIpCountry").getValue());
      Assert.assertEquals(
          "United States",
          Utils.checkNotNull(result.get("stringIpCountry"), "stringIpCountry").getValue());
    } finally {
      runner.runDestroy();
    }
  }
コード例 #12
0
 public static List<Record> produce20Records() throws IOException {
   List<Record> list = new ArrayList<>();
   for (int i = 0; i < 20; i++) {
     Record record = RecordCreator.create();
     Map<String, Field> map = new HashMap<>();
     map.put("name", Field.create("NAME" + i));
     map.put("lastStatusChange", Field.create(i));
     record.set(Field.create(map));
     list.add(record);
   }
   return list;
 }
コード例 #13
0
 @Override
 protected void process(Record record, SingleLaneBatchMaker batchMaker) throws StageException {
   Field field = record.get(fieldPath);
   String[] splits = null;
   ErrorCode error = null;
   if (field == null || field.getValue() == null) {
     error = Errors.SPLITTER_01;
   } else {
     String str;
     try {
       str = field.getValueAsString();
     } catch (IllegalArgumentException e) {
       throw new OnRecordErrorException(Errors.SPLITTER_04, fieldPath, field.getType().name());
     }
     splits = str.split(separatorStr, fieldPaths.length);
     if (splits.length < fieldPaths.length) {
       error = Errors.SPLITTER_02;
     }
   }
   if (error == null || onStagePreConditionFailure == OnStagePreConditionFailure.CONTINUE) {
     for (int i = 0; i < fieldPaths.length; i++) {
       try {
         if (splits != null && splits.length > i) {
           record.set(fieldPaths[i], Field.create(splits[i]));
         } else {
           record.set(fieldPaths[i], Field.create(Field.Type.STRING, null));
         }
       } catch (IllegalArgumentException e) {
         throw new OnRecordErrorException(
             Errors.SPLITTER_05, fieldPath, record.getHeader().getSourceId(), e.toString());
       }
     }
     if (removeUnsplitValue) {
       record.delete(fieldPath);
     }
     batchMaker.addRecord(record);
   } else {
     throw new OnRecordErrorException(error, record.getHeader().getSourceId(), fieldPath);
   }
 }
コード例 #14
0
  @Test
  public void testNewFieldWithTypedNull() throws Exception {
    // initial data in record is empty
    Record record = RecordCreator.create();
    Map<String, Field> map = new HashMap<>();
    record.set(Field.create(map));

    final String script =
        Resources.toString(Resources.getResource("AssignTypedNullField.groovy"), Charsets.UTF_8);
    Processor processor = new GroovyProcessor(ProcessingMode.BATCH, script);
    ScriptingProcessorTestUtil.verifyTypedFieldWithNullValue(
        GroovyProcessor.class, processor, record);
  }
コード例 #15
0
  @Test
  public void testRegexInComplexListType() throws StageException {
    FieldRenamerConfig renameConfig = new FieldRenamerConfig();
    // Any field containing a non-word character should be in single quotes
    renameConfig.fromFieldExpression = "/(*)[(*)]/'SQL(#)(.*)'";
    renameConfig.toFieldExpression = "/$1[$2]/SQL$4";

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR;

    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler);

    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .setOnRecordError(OnRecordError.TO_ERROR)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      Map<String, Field> innerMap1 = new LinkedHashMap<>();
      innerMap1.put("SQL#1", Field.create(Field.Type.STRING, "foo1"));

      Map<String, Field> innerMap2 = new LinkedHashMap<>();
      innerMap2.put("SQL#2", Field.create(Field.Type.STRING, "foo2"));

      List<Field> list = new LinkedList<>();
      list.add(Field.create(innerMap1));
      list.add(Field.create(innerMap2));

      Map<String, Field> map = new HashMap<>();
      map.put("list", Field.create(list));

      Record record = RecordCreator.create("s", "s:1");
      record.set(Field.create(map));

      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));

      Assert.assertEquals(1, output.getRecords().get("a").size());
      Record r = output.getRecords().get("a").get(0);
      Assert.assertFalse(r.getEscapedFieldPaths().contains("/list[0]/'SQL#1'"));
      Assert.assertFalse(r.getEscapedFieldPaths().contains("/list[1]/'SQL#2'"));
      Assert.assertTrue(r.getEscapedFieldPaths().contains("/list[0]/SQL1"));
      Assert.assertTrue(r.getEscapedFieldPaths().contains("/list[1]/SQL2"));
    } finally {
      runner.runDestroy();
    }
  }
コード例 #16
0
  @SuppressWarnings("unchecked")
  public List<Record> runNewTableRecord() throws Exception {
    HiveMetastoreTarget hiveTarget = new HiveMetastoreTargetBuilder().build();

    TargetRunner runner =
        new TargetRunner.Builder(HiveMetastoreTarget.class, hiveTarget)
            .setOnRecordError(OnRecordError.STOP_PIPELINE)
            .build();
    runner.runInit();

    LinkedHashMap<String, HiveTypeInfo> columns = new LinkedHashMap<>();
    columns.put("name", HiveType.STRING.getSupport().generateHiveTypeInfoFromResultSet("STRING"));
    columns.put(
        "surname", HiveType.STRING.getSupport().generateHiveTypeInfoFromResultSet("STRING"));

    LinkedHashMap<String, HiveTypeInfo> partitions = new LinkedHashMap<>();
    partitions.put("dt", HiveType.STRING.getSupport().generateHiveTypeInfoFromResultSet("STRING"));

    Field newTableField =
        HiveMetastoreUtil.newSchemaMetadataFieldBuilder(
            "default",
            "tbl",
            columns,
            partitions,
            true,
            BaseHiveIT.getDefaultWareHouseDir(),
            HiveMetastoreUtil.generateAvroSchema(columns, "tbl"));

    Record record = RecordCreator.create();
    record.set(newTableField);
    Assert.assertTrue(HiveMetastoreUtil.isSchemaChangeRecord(record));

    runner.runWrite(ImmutableList.of(record));

    assertTableStructure(
        "default.tbl",
        ImmutablePair.of("tbl.name", Types.VARCHAR),
        ImmutablePair.of("tbl.surname", Types.VARCHAR),
        ImmutablePair.of("tbl.dt", Types.VARCHAR));

    try {
      return runner.getEventRecords();
    } finally {
      runner.runDestroy();
    }
  }
コード例 #17
0
  @Test
  public void testRenameMapField() throws StageException {
    FieldRenamerConfig renameConfig = new FieldRenamerConfig();
    // Any field containing a non-word character should be in single quotes
    renameConfig.fromFieldExpression = "/first";
    renameConfig.toFieldExpression = "/second";

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR;

    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler);

    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .setOnRecordError(OnRecordError.TO_ERROR)
            .addOutputLane("a")
            .build();
    runner.runInit();

    Map<String, Field> renameableInnerMap = new HashMap<>();
    renameableInnerMap.put("value", Field.create(Field.Type.STRING, "value"));

    Map<String, Field> map = new LinkedHashMap<>();
    map.put("first", Field.create(renameableInnerMap));
    Record record = RecordCreator.create("s", "s:1");
    record.set(Field.create(Field.Type.MAP, map));

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));

      Assert.assertEquals(1, output.getRecords().get("a").size());
      Record r = output.getRecords().get("a").get(0);
      Assert.assertFalse(r.has("/first"));
      Assert.assertFalse(r.has("/first/value"));
      Assert.assertTrue(r.has("/second"));
      Assert.assertTrue(r.has("/second/value"));
    } finally {
      runner.runDestroy();
    }
  }
コード例 #18
0
  @Test
  public void testTargetFieldExistsAppendNumbers() throws StageException {
    FieldRenamerConfig renameConfig = new FieldRenamerConfig();
    // Any field containing a non-word character should be in single quotes
    renameConfig.fromFieldExpression = "/field";
    renameConfig.toFieldExpression = "/col";

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.APPEND_NUMBERS;

    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler);

    // Test non-existent source with existing target field
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      Map<String, Field> map = new LinkedHashMap<>();
      map.put("field", Field.create(Field.Type.STRING, "field"));
      map.put("col", Field.create(Field.Type.STRING, "col"));

      Record record = RecordCreator.create("s", "s:1");
      record.set(Field.create(Field.Type.MAP, map));

      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));

      Assert.assertEquals(1, output.getRecords().get("a").size());
      Record r = output.getRecords().get("a").get(0);
      Assert.assertFalse(r.has("/field"));
      Assert.assertTrue(r.has("/col"));
      Assert.assertTrue(r.has("/col1"));
      Assert.assertEquals("col", r.get("/col").getValueAsString());
      Assert.assertEquals("field", r.get("/col1").getValueAsString());
    } finally {
      runner.runDestroy();
    }
  }
コード例 #19
0
  @Test
  public void testGetFieldNull() throws Exception {
    // initial data in record
    Record record = RecordCreator.create();
    Map<String, Field> map = new HashMap<>();
    map.put("null_int", Field.create(Field.Type.INTEGER, null));
    map.put("null_string", Field.create(Field.Type.STRING, null));
    map.put("null_boolean", Field.create(Field.Type.BOOLEAN, null));
    map.put("null_list", Field.create(Field.Type.LIST, null));
    map.put("null_map", Field.create(Field.Type.MAP, null));
    // original record has value in the field, so getFieldNull should return the value
    map.put("null_datetime", Field.createDatetime(new Date()));
    record.set(Field.create(map));

    final String script =
        Resources.toString(Resources.getResource("GetFieldNullScript.groovy"), Charsets.UTF_8);
    Processor processor = new GroovyProcessor(ProcessingMode.BATCH, script);
    ScriptingProcessorTestUtil.verifyNullField(GroovyProcessor.class, processor, record);
  }
コード例 #20
0
  @Test
  public void testTargetFieldExistsReplace() throws StageException {
    // Standard overwrite condition. Source and target fields exist
    FieldRenamerConfig renameConfig = new FieldRenamerConfig();
    renameConfig.fromFieldExpression = "/existing";
    renameConfig.toFieldExpression = "/overwrite";

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.REPLACE;

    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler);

    // Test non-existent source with existing target field
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      Map<String, Field> map = new LinkedHashMap<>();
      map.put("existing", Field.create(Field.Type.STRING, "foo"));
      map.put("overwrite", Field.create(Field.Type.STRING, "bar"));
      Record record = RecordCreator.create("s", "s:1");
      record.set(Field.create(map));

      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());
      Field field = output.getRecords().get("a").get(0).get();
      Assert.assertTrue(field.getValue() instanceof Map);
      Map<String, Field> result = field.getValueAsMap();
      Assert.assertEquals(String.valueOf(result), 1, result.size());
      Assert.assertTrue(result.containsKey("overwrite"));
      Assert.assertTrue(!result.containsKey("existing"));
      Assert.assertEquals("foo", result.get("overwrite").getValue());
    } finally {
      runner.runDestroy();
    }
  }
コード例 #21
0
  @Test
  public void testSourceWithQuotedSubstring() throws StageException {
    // source should be processed as quoted string.
    FieldRenamerConfig renameConfig1 = new FieldRenamerConfig();
    renameConfig1.fromFieldExpression = "/'attr|OrderNum'";
    renameConfig1.toFieldExpression = "/theOrderNum";

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.REPLACE;

    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(ImmutableList.of(renameConfig1), errorHandler);

    // Test non-existent source with existing target field
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      Map<String, Field> map = new LinkedHashMap<>();
      map.put("attr|OrderNum", Field.create(Field.Type.STRING, "foo"));
      Record record = RecordCreator.create("s", "s:1");
      record.set(Field.create(map));
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());
      Field field = output.getRecords().get("a").get(0).get();
      Assert.assertTrue(field.getValue() instanceof Map);
      Map<String, Field> result = field.getValueAsMap();
      Assert.assertEquals(String.valueOf(result), 1, result.size());
      Assert.assertFalse(result.containsKey("/'attr|OrderNum'"));
      Assert.assertFalse(result.containsKey("'attr|OrderNum'"));
      Assert.assertFalse(result.containsKey("attr|OrderNum"));
      Assert.assertTrue(result.containsKey("theOrderNum"));
    } finally {
      runner.runDestroy();
    }
  }
コード例 #22
0
 public static List<Record> createCsvRecords() throws IOException {
   List<Record> records = new ArrayList<>();
   String line;
   BufferedReader bufferedReader =
       new BufferedReader(
           new FileReader(
               FlumeTestUtil.class.getClassLoader().getResource("testFlumeTarget.csv").getFile()));
   while ((line = bufferedReader.readLine()) != null) {
     String columns[] = line.split(",");
     List<Field> list = new ArrayList<>();
     for (String column : columns) {
       Map<String, Field> map = new LinkedHashMap<>();
       map.put("value", Field.create(column));
       list.add(Field.create(map));
     }
     Record record = RecordCreator.create("s", "s:1", null, null);
     record.set(Field.create(list));
     records.add(record);
   }
   return records;
 }
コード例 #23
0
  @Test
  public void testNonExistingSourceAndTargetFields() throws StageException {
    // If neither the source or target fields exist, then field renaming is a noop, and should
    // succeed
    FieldRenamerConfig renameConfig = new FieldRenamerConfig();
    renameConfig.fromFieldExpression = "/nonExisting";
    renameConfig.toFieldExpression = "/alsoNonExisting";

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR;
    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler);

    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      Map<String, Field> map = new LinkedHashMap<>();
      map.put("name", Field.create(Field.Type.STRING, null));
      Record record = RecordCreator.create("s", "s:1");
      record.set(Field.create(map));

      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());
      Field field = output.getRecords().get("a").get(0).get();
      Assert.assertTrue(field.getValue() instanceof Map);
      Map<String, Field> result = field.getValueAsMap();
      Assert.assertEquals(String.valueOf(result), 1, result.size());
      Assert.assertTrue(result.containsKey("name"));
      Assert.assertEquals(null, result.get("name").getValue());
    } finally {
      runner.runDestroy();
    }
  }
コード例 #24
0
  @Test
  public void testTargetFieldExistsError() throws StageException {
    // If overwrite is set to false, overwriting should result in an error
    FieldRenamerConfig renameConfig = new FieldRenamerConfig();
    renameConfig.fromFieldExpression = "/existing";
    renameConfig.toFieldExpression = "/overwrite";

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR;

    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler);

    // Test non-existent source with existing target field
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .setOnRecordError(OnRecordError.TO_ERROR)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      Map<String, Field> map = new LinkedHashMap<>();
      map.put("existing", Field.create(Field.Type.STRING, "foo"));
      map.put("overwrite", Field.create(Field.Type.STRING, "bar"));
      Record record = RecordCreator.create("s", "s:1");
      record.set(Field.create(map));

      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));

      Assert.assertEquals(0, output.getRecords().get("a").size());
      Assert.assertEquals(1, runner.getErrorRecords().size());
    } finally {
      runner.runDestroy();
    }
  }
コード例 #25
0
  @Test
  public void testChangeFieldToTypedNull() throws Exception {
    // initial data in record
    Record record = RecordCreator.create();
    Map<String, Field> map = new HashMap<>();
    map.put("null_int", Field.create("this is string field"));
    map.put("null_string", Field.create(123L));
    map.put("null_date", Field.create(true));
    map.put("null_decimal", Field.createDate(null));
    map.put("null_short", Field.create((short) 1000));
    map.put("null_char", Field.create('c'));
    // add a list field
    List<Field> list1 = new LinkedList<>();
    list1.add(Field.create("dummy field list"));
    map.put("null_list", Field.create(list1));
    record.set(Field.create(map));

    final String script =
        Resources.toString(Resources.getResource("AssignTypedNullField.groovy"), Charsets.UTF_8);
    Processor processor = new GroovyProcessor(ProcessingMode.BATCH, script);
    ScriptingProcessorTestUtil.verifyTypedFieldWithNullValue(
        GroovyProcessor.class, processor, record);
  }
コード例 #26
0
  @Test
  public void testUnreachableFields() throws Exception {
    FieldRenamerConfig renameConfig = new FieldRenamerConfig();
    renameConfig.fromFieldExpression = "/a";
    renameConfig.toFieldExpression = "/b/c/d";

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.REPLACE;

    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler);

    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .addOutputLane("a")
            .setOnRecordError(OnRecordError.TO_ERROR)
            .build();
    runner.runInit();

    try {
      Map<String, Field> map = new LinkedHashMap<>();
      map.put("a", Field.create(123));
      Record record = RecordCreator.create("s", "s:1");
      record.set(Field.create(map));

      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));

      Assert.assertEquals(0, output.getRecords().get("a").size());
      Assert.assertEquals(1, runner.getErrorRecords().size());
      Record errorRecord = runner.getErrorRecords().get(0);
      Assert.assertEquals(Errors.FIELD_RENAMER_04.name(), errorRecord.getHeader().getErrorCode());
    } finally {
      runner.runDestroy();
    }
  }
コード例 #27
0
  @Test
  public void testSingleRecordMap() throws Exception {
    List<JdbcFieldColumnMapping> columnMappings =
        ImmutableList.of(new JdbcFieldColumnMapping("P_ID", "/p_id"));

    JdbcLookupDProcessor processor = new JdbcLookupDProcessor();
    processor.hikariConfigBean = createConfigBean(h2ConnectionString, username, password);

    ProcessorRunner processorRunner =
        new ProcessorRunner.Builder(JdbcLookupDProcessor.class, processor)
            .addConfiguration("query", mapQuery)
            .addConfiguration("columnMappings", columnMappings)
            .addConfiguration("maxClobSize", 1000)
            .addConfiguration("maxBlobSize", 1000)
            .addOutputLane("lane")
            .build();

    Record record = RecordCreator.create();
    LinkedHashMap<String, Field> fields = new LinkedHashMap<>();
    fields.put("first_name", Field.create("Adam"));
    fields.put("last_name", Field.create("Kunicki"));
    record.set(Field.create(fields));

    List<Record> singleRecord = ImmutableList.of(record);
    processorRunner.runInit();
    try {
      StageRunner.Output output = processorRunner.runProcess(singleRecord);
      Assert.assertEquals(1, output.getRecords().get("lane").size());

      record = output.getRecords().get("lane").get(0);

      Assert.assertNotEquals(null, record.get("/p_id"));
      Assert.assertEquals(1, record.get("/p_id").getValueAsInteger());
    } finally {
      processorRunner.runDestroy();
    }
  }
コード例 #28
0
 protected List<Record> processKafkaMessage(String messageId, byte[] payload)
     throws StageException {
   List<Record> records = new ArrayList<>();
   try (DataParser parser = parserFactory.getParser(messageId, payload)) {
     Record record = parser.parse();
     while (record != null) {
       records.add(record);
       record = parser.parse();
     }
   } catch (IOException | DataParserException ex) {
     handleException(messageId, ex);
   }
   if (produceSingleRecordPerMessage) {
     List<Field> list = new ArrayList<>();
     for (Record record : records) {
       list.add(record.get());
     }
     Record record = records.get(0);
     record.set(Field.create(list));
     records.clear();
     records.add(record);
   }
   return records;
 }
コード例 #29
0
 protected List<Record> processMessage(String messageId, Object message) throws StageException {
   List<Record> records = new ArrayList<>();
   if (dataFormat == DataFormat.AVRO) {
     try (DataParser parser = parserFactory.getParser(messageId, (byte[]) message)) {
       Record record = parser.parse();
       if (record != null) {
         records.add(record);
       }
     } catch (IOException | DataParserException ex) {
       LOG.debug("Got exception: '{}'", ex, ex);
       handleException(messageId, ex);
     }
   } else {
     try (DataParser parser = parserFactory.getParser(messageId, String.valueOf(message))) {
       Record record = parser.parse();
       while (record != null) {
         records.add(record);
         record = parser.parse();
       }
     } catch (IOException | DataParserException ex) {
       LOG.debug("Got exception: '{}'", ex, ex);
       handleException(messageId, ex);
     }
   }
   if (produceSingleRecordPerMessage) {
     List<Field> list = new ArrayList<>();
     for (Record record : records) {
       list.add(record.get());
     }
     Record record = records.get(0);
     record.set(Field.create(list));
     records.clear();
     records.add(record);
   }
   return records;
 }
コード例 #30
0
  @Test
  public void testChangeFieldToTypedNull() throws Exception {
    // initial data in record
    Record record = RecordCreator.create();
    Map<String, Field> map = new HashMap<>();
    map.put("null_int", Field.create("this is string field"));
    map.put("null_string", Field.create(123L));
    map.put("null_date", Field.create(true));
    map.put("null_decimal", Field.createDate(null));
    map.put("null_time", Field.createTime(new Date()));
    // add list field
    List<Field> list1 = new LinkedList<>();
    list1.add(Field.create("dummy field list"));
    map.put("null_list", Field.create(list1));
    // add map field
    Map<String, Field> map1 = new HashMap<>();
    map1.put("dummy", Field.create("dummy field map"));
    map.put("null_map", Field.create(map1));

    record.set(Field.create(map));

    Processor processor =
        new JythonProcessor(
            ProcessingMode.RECORD,
            "for record in records:\n"
                + "  record.value['null_int'] = NULL_INTEGER\n"
                + "  record.value['null_date'] = NULL_DATE\n"
                + "  record.value['null_decimal'] = NULL_DECIMAL\n"
                + "  record.value['null_string'] = NULL_STRING\n"
                + "  record.value['null_time'] = NULL_TIME\n"
                + "  record.value['null_list'] = NULL_LIST\n"
                + "  record.value['null_map'] = NULL_MAP\n"
                + "  output.write(record)\n");
    ScriptingProcessorTestUtil.verifyTypedFieldWithNullValue(
        JythonDProcessor.class, processor, record);
  }