@Test
  public void testNestedKeep6() throws StageException {
    Record record = createNestedRecord();
    /*
     * keep all entries of a map by specifying path just upto the map
     */
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldFilterDProcessor.class)
            .addConfiguration("fields", ImmutableList.of("/USA[0]/SanFrancisco/noe"))
            .addConfiguration("filterOperation", FilterOperation.KEEP)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());

      Record resultRecord = output.getRecords().get("a").get(0);
      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe"));
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString(), "a");
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/noe/streets[0][1]/name").getValueAsString(), "b");
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/noe/streets[1][0]/name").getValueAsString(), "c");
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/noe/streets[1][1]/name").getValueAsString(), "d");
    } finally {
      runner.runDestroy();
    }
  }
  @Test
  public void testCategoryRegexInNonComplexType() throws StageException {
    FieldRenamerConfig renameConfig = new FieldRenamerConfig();
    // Any field containing a non-word character should be in single quotes
    renameConfig.fromFieldExpression = "/'(.*)[#&@|](.*)'";
    renameConfig.toFieldExpression = "/$1_$2";

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.APPEND_NUMBERS;

    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler);

    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .setOnRecordError(OnRecordError.TO_ERROR)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      Map<String, Field> map = new LinkedHashMap<>();
      map.put("a#b", Field.create(Field.Type.STRING, "foo1"));
      map.put("a_b", Field.create(Field.Type.STRING, "foo2"));
      map.put("a&b", Field.create(Field.Type.STRING, "foo3"));
      map.put("a|b", Field.create(Field.Type.STRING, "foo4"));
      map.put("a@b", Field.create(Field.Type.STRING, "foo5"));

      Record record = RecordCreator.create("s", "s:1");
      record.set(Field.create(Field.Type.MAP, map));

      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));

      Assert.assertEquals(1, output.getRecords().get("a").size());
      Record r = output.getRecords().get("a").get(0);
      Assert.assertFalse(r.has("/'a#b'"));
      Assert.assertFalse(r.has("/'a&b'"));
      Assert.assertFalse(r.has("/'a|b'"));
      Assert.assertFalse(r.has("/'a&b'"));

      Assert.assertTrue(r.has("/a_b"));
      Assert.assertEquals("foo2", r.get("/a_b").getValueAsString());

      Assert.assertTrue(r.has("/a_b1"));
      Assert.assertEquals("foo1", r.get("/a_b1").getValueAsString());

      Assert.assertTrue(r.has("/a_b2"));
      Assert.assertEquals("foo3", r.get("/a_b2").getValueAsString());

      Assert.assertTrue(r.has("/a_b3"));
      Assert.assertEquals("foo4", r.get("/a_b3").getValueAsString());

      Assert.assertTrue(r.has("/a_b4"));
      Assert.assertEquals("foo5", r.get("/a_b4").getValueAsString());
    } finally {
      runner.runDestroy();
    }
  }
  @Test
  public void testWildCardRemove1() throws StageException {

    Record record = createNestedRecord();

    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldFilterDProcessor.class)
            .addConfiguration(
                "fields", ImmutableList.of("/USA[*]/SanFrancisco/*/streets[*][*]/name"))
            .addConfiguration("filterOperation", FilterOperation.REMOVE)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());

      Record resultRecord = output.getRecords().get("a").get(0);
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][0]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][1]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][0]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][1]/name"));

      Assert.assertEquals(
          resultRecord.get("/USA[1]/SantaMonica/cole/streets[0][0]/name").getValueAsString(), "e");
      Assert.assertEquals(
          resultRecord.get("/USA[1]/SantaMonica/cole/streets[0][1]/name").getValueAsString(), "f");
    } finally {
      runner.runDestroy();
    }
  }
 static void checkRecords(List<Record> expectedRecords, List<Record> actualRecords)
     throws Exception {
   Assert.assertEquals(
       "Record Size Does not match.", expectedRecords.size(), actualRecords.size());
   for (int i = 0; i < actualRecords.size(); i++) {
     Record actualRecord = actualRecords.get(i);
     Record expectedRecord = expectedRecords.get(i);
     checkField("", expectedRecord.get(), actualRecord.get());
   }
 }
  @Test
  public void testNestedKeep5() throws StageException {
    Record record = createNestedRecord();
    /*
     * keep all entries of a list by specifying path just upto the list
     */
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldFilterDProcessor.class)
            .addConfiguration("fields", ImmutableList.of("/USA[0]/SanFrancisco/folsom/streets[0]"))
            .addConfiguration("filterOperation", FilterOperation.KEEP)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());

      Record resultRecord = output.getRecords().get("a").get(0);
      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0]"));
      // Its a list and it is empty. use wild card [*] to preserve the contents of the list
      Assert.assertEquals(
          2, resultRecord.get("/USA[0]/SanFrancisco/folsom/streets[0]").getValueAsList().size());
    } finally {
      runner.runDestroy();
    }
  }
  @Test
  public void testNestedKeep3() throws StageException {
    /*
     * try to retain the second element from the root array list.
     * Make sure that this turns out to be the first element in the resulting record
     */
    Record record = createNestedRecord();
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldFilterDProcessor.class)
            .addConfiguration(
                "fields", ImmutableList.of("/USA[1]/SantaMonica/cole/streets[0][1]/name"))
            .addConfiguration("filterOperation", FilterOperation.KEEP)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());

      Record resultRecord = output.getRecords().get("a").get(0);

      Assert.assertTrue(resultRecord.has("/USA[0]/SantaMonica/cole/streets[0][0]/name"));
      Assert.assertEquals(
          "f", resultRecord.get("/USA[0]/SantaMonica/cole/streets[0][0]/name").getValueAsString());

      Assert.assertFalse(resultRecord.has("/USA[0]/SantaMonica/cole/streets[0][1]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco"));
      Assert.assertFalse(resultRecord.has("/USA[1]"));
    } finally {
      runner.runDestroy();
    }
  }
Esempio n. 7
0
 public static String csvRecordToString(Record r, CSVFormat csvFormat) throws IOException {
   StringWriter stringWriter = new StringWriter();
   CSVPrinter csvPrinter = new CSVPrinter(stringWriter, csvFormat);
   csvPrinter.printRecord(CsvUtil.fieldToCsv(r.get()));
   csvPrinter.flush();
   csvPrinter.close();
   return stringWriter.toString();
 }
 /**
  * Serializes a record to a protobuf message using the specified descriptor.
  *
  * @param record Record to serialize
  * @param desc Protobuf descriptor
  * @param messageTypeToExtensionMap Protobuf extension map
  * @param defaultValueMap Protobuf default field values
  * @return serialized message
  * @throws DataGeneratorException
  */
 public static DynamicMessage sdcFieldToProtobufMsg(
     Record record,
     Descriptors.Descriptor desc,
     Map<String, Set<Descriptors.FieldDescriptor>> messageTypeToExtensionMap,
     Map<String, Object> defaultValueMap)
     throws DataGeneratorException {
   return sdcFieldToProtobufMsg(
       record, record.get(), "", desc, messageTypeToExtensionMap, defaultValueMap);
 }
  @Test
  public void testTargetFieldExistsAppendNumbers() throws StageException {
    FieldRenamerConfig renameConfig = new FieldRenamerConfig();
    // Any field containing a non-word character should be in single quotes
    renameConfig.fromFieldExpression = "/field";
    renameConfig.toFieldExpression = "/col";

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.CONTINUE;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.APPEND_NUMBERS;

    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler);

    // Test non-existent source with existing target field
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      Map<String, Field> map = new LinkedHashMap<>();
      map.put("field", Field.create(Field.Type.STRING, "field"));
      map.put("col", Field.create(Field.Type.STRING, "col"));

      Record record = RecordCreator.create("s", "s:1");
      record.set(Field.create(Field.Type.MAP, map));

      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));

      Assert.assertEquals(1, output.getRecords().get("a").size());
      Record r = output.getRecords().get("a").get(0);
      Assert.assertFalse(r.has("/field"));
      Assert.assertTrue(r.has("/col"));
      Assert.assertTrue(r.has("/col1"));
      Assert.assertEquals("col", r.get("/col").getValueAsString());
      Assert.assertEquals("field", r.get("/col1").getValueAsString());
    } finally {
      runner.runDestroy();
    }
  }
  @Test
  public void testNestedKeep2() throws StageException {
    /*
     * In a deep nested record try to retain arbitrary paths
     */
    Record record = createNestedRecord();
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldFilterDProcessor.class)
            .addConfiguration(
                "fields",
                ImmutableList.of(
                    "/USA[0]/SanFrancisco/noe/streets[1][1]/name",
                    "/USA[1]/SantaMonica/cole/streets[0][1]/name"))
            .addConfiguration("filterOperation", FilterOperation.KEEP)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());

      Record resultRecord = output.getRecords().get("a").get(0);

      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][0]/name"));
      Assert.assertEquals(
          "d", resultRecord.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString());

      Assert.assertTrue(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][0]/name"));
      Assert.assertEquals(
          "f", resultRecord.get("/USA[1]/SantaMonica/cole/streets[0][0]/name").getValueAsString());

      Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][1]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][0]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][1]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][1]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][0]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][1]/name"));

    } finally {
      runner.runDestroy();
    }
  }
  @Test
  public void testWildCardKeep2() throws StageException {
    /*
     * Use wil card in array within array
     */
    Record record = createNestedRecord();
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldFilterDProcessor.class)
            .addConfiguration("fields", ImmutableList.of("/USA[0]/SanFrancisco/noe/streets[0][*]"))
            .addConfiguration("filterOperation", FilterOperation.KEEP)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());

      Record resultRecord = output.getRecords().get("a").get(0);
      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0]"));
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString(), "a");
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/noe/streets[0][1]/name").getValueAsString(), "b");

    } finally {
      runner.runDestroy();
    }

    /*
     * Use wil card in array
     */
    record = createNestedRecord();
    runner =
        new ProcessorRunner.Builder(FieldFilterDProcessor.class)
            .addConfiguration("fields", ImmutableList.of("/USA[0]/SanFrancisco/noe/streets[*]"))
            .addConfiguration("filterOperation", FilterOperation.KEEP)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());

      Record resultRecord = output.getRecords().get("a").get(0);
      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets"));
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString(), "a");
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/noe/streets[0][1]/name").getValueAsString(), "b");
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/noe/streets[1][0]/name").getValueAsString(), "c");
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/noe/streets[1][1]/name").getValueAsString(), "d");

    } finally {
      runner.runDestroy();
    }
  }
  @Test
  public void testDifferentMatchesRegex() throws StageException {
    FieldRenamerConfig renameConfig = new FieldRenamerConfig();
    // Any field containing a non-word character should be in single quotes
    renameConfig.fromFieldExpression = "/'(.*)(#)(.*)'";
    renameConfig.toFieldExpression = "/$1hash$3";

    FieldRenamerProcessorErrorHandler errorHandler = new FieldRenamerProcessorErrorHandler();
    errorHandler.nonExistingFromFieldHandling = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.multipleFromFieldsMatching = OnStagePreConditionFailure.TO_ERROR;
    errorHandler.existingToFieldHandling = ExistingToFieldHandling.TO_ERROR;

    FieldRenamerProcessor processor =
        new FieldRenamerProcessor(ImmutableList.of(renameConfig), errorHandler);

    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldRenamerDProcessor.class, processor)
            .setOnRecordError(OnRecordError.STOP_PIPELINE)
            .addOutputLane("a")
            .build();
    runner.runInit();

    Map<String, Field> map = new LinkedHashMap<>();
    map.put("#abcd", Field.create("hashabcd"));

    Record record1 = RecordCreator.create("s", "s:1");
    record1.set(Field.create(Field.Type.MAP, map));

    map = new LinkedHashMap<>();
    map.put("ab#cd", Field.create("abhashcd"));
    Record record2 = RecordCreator.create("s", "s:2");
    record2.set(Field.create(Field.Type.MAP, map));

    map = new LinkedHashMap<>();
    map.put("abcd#", Field.create("abcdhash"));
    Record record3 = RecordCreator.create("s", "s:3");
    record3.set(Field.create(Field.Type.MAP, map));

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record1, record2, record3));
      Assert.assertEquals(3, output.getRecords().get("a").size());
      for (Record record : output.getRecords().get("a")) {
        Map<String, Field> fieldMap = record.get().getValueAsMap();
        for (Map.Entry<String, Field> fieldEntry : fieldMap.entrySet()) {
          Assert.assertEquals(fieldEntry.getKey(), fieldEntry.getValue().getValueAsString());
        }
      }
    } finally {
      runner.runDestroy();
    }
  }
Esempio n. 13
0
  @Test
  public void testSingleRecordMap() throws Exception {
    List<JdbcFieldColumnMapping> columnMappings =
        ImmutableList.of(new JdbcFieldColumnMapping("P_ID", "/p_id"));

    JdbcLookupDProcessor processor = new JdbcLookupDProcessor();
    processor.hikariConfigBean = createConfigBean(h2ConnectionString, username, password);

    ProcessorRunner processorRunner =
        new ProcessorRunner.Builder(JdbcLookupDProcessor.class, processor)
            .addConfiguration("query", mapQuery)
            .addConfiguration("columnMappings", columnMappings)
            .addConfiguration("maxClobSize", 1000)
            .addConfiguration("maxBlobSize", 1000)
            .addOutputLane("lane")
            .build();

    Record record = RecordCreator.create();
    LinkedHashMap<String, Field> fields = new LinkedHashMap<>();
    fields.put("first_name", Field.create("Adam"));
    fields.put("last_name", Field.create("Kunicki"));
    record.set(Field.create(fields));

    List<Record> singleRecord = ImmutableList.of(record);
    processorRunner.runInit();
    try {
      StageRunner.Output output = processorRunner.runProcess(singleRecord);
      Assert.assertEquals(1, output.getRecords().get("lane").size());

      record = output.getRecords().get("lane").get(0);

      Assert.assertNotEquals(null, record.get("/p_id"));
      Assert.assertEquals(1, record.get("/p_id").getValueAsInteger());
    } finally {
      processorRunner.runDestroy();
    }
  }
 protected static void insertRows(String insertTemplate, List<Record> records)
     throws SQLException {
   try (Statement st = connection.createStatement()) {
     for (Record record : records) {
       List<String> values = new ArrayList<>();
       for (String fieldPath : record.getEscapedFieldPaths()) {
         // Skip root field
         if (!fieldPath.equals("")) {
           values.add(getStringRepOfFieldValueForInsert(record.get(fieldPath)));
         }
       }
       st.addBatch(String.format(insertTemplate, values.toArray()));
     }
     st.executeBatch();
   }
 }
 @Override
 protected void process(Record record, SingleLaneBatchMaker batchMaker) throws StageException {
   Field field = record.get(fieldPath);
   String[] splits = null;
   ErrorCode error = null;
   if (field == null || field.getValue() == null) {
     error = Errors.SPLITTER_01;
   } else {
     String str;
     try {
       str = field.getValueAsString();
     } catch (IllegalArgumentException e) {
       throw new OnRecordErrorException(Errors.SPLITTER_04, fieldPath, field.getType().name());
     }
     splits = str.split(separatorStr, fieldPaths.length);
     if (splits.length < fieldPaths.length) {
       error = Errors.SPLITTER_02;
     }
   }
   if (error == null || onStagePreConditionFailure == OnStagePreConditionFailure.CONTINUE) {
     for (int i = 0; i < fieldPaths.length; i++) {
       try {
         if (splits != null && splits.length > i) {
           record.set(fieldPaths[i], Field.create(splits[i]));
         } else {
           record.set(fieldPaths[i], Field.create(Field.Type.STRING, null));
         }
       } catch (IllegalArgumentException e) {
         throw new OnRecordErrorException(
             Errors.SPLITTER_05, fieldPath, record.getHeader().getSourceId(), e.toString());
       }
     }
     if (removeUnsplitValue) {
       record.delete(fieldPath);
     }
     batchMaker.addRecord(record);
   } else {
     throw new OnRecordErrorException(error, record.getHeader().getSourceId(), fieldPath);
   }
 }
Esempio n. 16
0
 protected List<Record> processKafkaMessage(String messageId, byte[] payload)
     throws StageException {
   List<Record> records = new ArrayList<>();
   try (DataParser parser = parserFactory.getParser(messageId, payload)) {
     Record record = parser.parse();
     while (record != null) {
       records.add(record);
       record = parser.parse();
     }
   } catch (IOException | DataParserException ex) {
     handleException(messageId, ex);
   }
   if (produceSingleRecordPerMessage) {
     List<Field> list = new ArrayList<>();
     for (Record record : records) {
       list.add(record.get());
     }
     Record record = records.get(0);
     record.set(Field.create(list));
     records.clear();
     records.add(record);
   }
   return records;
 }
 protected List<Record> processMessage(String messageId, Object message) throws StageException {
   List<Record> records = new ArrayList<>();
   if (dataFormat == DataFormat.AVRO) {
     try (DataParser parser = parserFactory.getParser(messageId, (byte[]) message)) {
       Record record = parser.parse();
       if (record != null) {
         records.add(record);
       }
     } catch (IOException | DataParserException ex) {
       LOG.debug("Got exception: '{}'", ex, ex);
       handleException(messageId, ex);
     }
   } else {
     try (DataParser parser = parserFactory.getParser(messageId, String.valueOf(message))) {
       Record record = parser.parse();
       while (record != null) {
         records.add(record);
         record = parser.parse();
       }
     } catch (IOException | DataParserException ex) {
       LOG.debug("Got exception: '{}'", ex, ex);
       handleException(messageId, ex);
     }
   }
   if (produceSingleRecordPerMessage) {
     List<Field> list = new ArrayList<>();
     for (Record record : records) {
       list.add(record.get());
     }
     Record record = records.get(0);
     record.set(Field.create(list));
     records.clear();
     records.add(record);
   }
   return records;
 }
  @Test
  public void testNestedKeep4() throws StageException {
    Record record = createNestedRecord();
    /*
     * Keep non existing nested path "/USA[0]/SanFrancisco/cole".
     * Only objects upto /USA[0]/SanFrancisco should exist
     */
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldFilterDProcessor.class)
            .addConfiguration(
                "fields",
                ImmutableList.of(
                    "/USA[0]/SanFrancisco/cole/streets[0][0]/name",
                    "/USA[0]/SanFrancisco/cole/streets[1][0]/name"))
            .addConfiguration("filterOperation", FilterOperation.KEEP)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());

      Record resultRecord = output.getRecords().get("a").get(0);
      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco"));
      Assert.assertEquals(0, resultRecord.get("/USA[0]/SanFrancisco").getValueAsMap().size());

      Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][0]/name"));
      Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][1]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][0]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][1]/name"));

    } finally {
      runner.runDestroy();
    }
  }
  @Test
  public void testNestedKeep1() throws StageException {
    /*
     * In a deep nested field path try to retain the second elements of an array within an array
     */
    Record record = createNestedRecord();

    // Keep only second elements of array within array "/USA[0]/SanFrancisco/noe/streets"
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldFilterDProcessor.class)
            .addConfiguration(
                "fields",
                ImmutableList.of(
                    "/USA[0]/SanFrancisco/noe/streets[0][1]/name",
                    "/USA[0]/SanFrancisco/noe/streets[1][1]/name"))
            .addConfiguration("filterOperation", FilterOperation.KEEP)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());

      Record resultRecord = output.getRecords().get("a").get(0);
      // Note that since the first element was removed, the second element is the new first element
      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][0]/name"));
      Assert.assertEquals(
          "b", resultRecord.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString());

      // Note that since the first element was removed, the second element is the new first element
      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][0]/name"));
      Assert.assertEquals(
          "d", resultRecord.get("/USA[0]/SanFrancisco/noe/streets[1][0]/name").getValueAsString());

      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][1]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][1]/name"));
      Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][0]/name"));
      Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][1]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][0]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][1]/name"));
    } finally {
      runner.runDestroy();
    }

    /*
     * In a deep nested field path try to retain the first elements of an array within an array
     */
    record = createNestedRecord();
    // Keep only first elements of array within array "/USA[0]/SanFrancisco/noe/streets"
    runner =
        new ProcessorRunner.Builder(FieldFilterDProcessor.class)
            .addConfiguration(
                "fields",
                ImmutableList.of(
                    "/USA[0]/SanFrancisco/noe/streets[0][0]/name",
                    "/USA[0]/SanFrancisco/noe/streets[1][0]/name"))
            .addConfiguration("filterOperation", FilterOperation.KEEP)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());

      Record resultRecord = output.getRecords().get("a").get(0);

      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][0]/name"));
      Assert.assertEquals(
          "a", resultRecord.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString());

      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][0]/name"));
      Assert.assertEquals(
          "c", resultRecord.get("/USA[0]/SanFrancisco/noe/streets[1][0]/name").getValueAsString());

      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][1]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][1]/name"));
      Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][0]/name"));
      Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][1]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][0]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][1]/name"));
    } finally {
      runner.runDestroy();
    }
  }
  private Record createNestedRecord() {
    Field name1 = Field.create("a");
    Field name2 = Field.create("b");
    Map<String, Field> nameMap1 = new HashMap<>();
    nameMap1.put("name", name1);
    Map<String, Field> nameMap2 = new HashMap<>();
    nameMap2.put("name", name2);

    Field name3 = Field.create("c");
    Field name4 = Field.create("d");
    Map<String, Field> nameMap3 = new HashMap<>();
    nameMap3.put("name", name3);
    Map<String, Field> nameMap4 = new HashMap<>();
    nameMap4.put("name", name4);

    Field name5 = Field.create("e");
    Field name6 = Field.create("f");
    Map<String, Field> nameMap5 = new HashMap<>();
    nameMap5.put("name", name5);
    Map<String, Field> nameMap6 = new HashMap<>();
    nameMap6.put("name", name6);

    Field name7 = Field.create("g");
    Field name8 = Field.create("h");

    Map<String, Field> nameMap7 = new HashMap<>();
    nameMap7.put("name", name7);
    Map<String, Field> nameMap8 = new HashMap<>();
    nameMap8.put("name", name8);

    Field first =
        Field.create(
            Field.Type.LIST, ImmutableList.of(Field.create(nameMap1), Field.create(nameMap2)));
    Field second =
        Field.create(
            Field.Type.LIST, ImmutableList.of(Field.create(nameMap3), Field.create(nameMap4)));
    Field third =
        Field.create(
            Field.Type.LIST, ImmutableList.of(Field.create(nameMap5), Field.create(nameMap6)));
    Field fourth =
        Field.create(
            Field.Type.LIST, ImmutableList.of(Field.create(nameMap7), Field.create(nameMap8)));

    Map<String, Field> noe = new HashMap<>();
    noe.put("streets", Field.create(ImmutableList.of(first, second)));

    Map<String, Field> folsom = new HashMap<>();
    folsom.put("streets", Field.create(ImmutableList.of(fourth)));

    Map<String, Field> cole = new HashMap<>();
    cole.put("streets", Field.create(ImmutableList.of(third)));

    Map<String, Field> sfArea = new HashMap<>();
    sfArea.put("noe", Field.create(noe));
    sfArea.put("folsom", Field.create(folsom));

    Map<String, Field> utahArea = new HashMap<>();
    utahArea.put("cole", Field.create(cole));

    Map<String, Field> california = new HashMap<>();
    california.put("SanFrancisco", Field.create(sfArea));

    Map<String, Field> utah = new HashMap<>();
    utah.put("SantaMonica", Field.create(utahArea));

    Map<String, Field> map = new LinkedHashMap<>();
    map.put(
        "USA",
        Field.create(
            Field.Type.LIST, ImmutableList.of(Field.create(california), Field.create(utah))));

    Record record = RecordCreator.create("s", "s:1");
    record.set(Field.create(map));

    // Nested record looks like this:
    Assert.assertEquals(
        record.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString(), "a");
    Assert.assertEquals(
        record.get("/USA[0]/SanFrancisco/noe/streets[0][1]/name").getValueAsString(), "b");
    Assert.assertEquals(
        record.get("/USA[0]/SanFrancisco/noe/streets[1][0]/name").getValueAsString(), "c");
    Assert.assertEquals(
        record.get("/USA[0]/SanFrancisco/noe/streets[1][1]/name").getValueAsString(), "d");
    Assert.assertEquals(
        record.get("/USA[1]/SantaMonica/cole/streets[0][0]/name").getValueAsString(), "e");
    Assert.assertEquals(
        record.get("/USA[1]/SantaMonica/cole/streets[0][1]/name").getValueAsString(), "f");
    Assert.assertEquals(
        record.get("/USA[0]/SanFrancisco/folsom/streets[0][0]/name").getValueAsString(), "g");
    Assert.assertEquals(
        record.get("/USA[0]/SanFrancisco/folsom/streets[0][1]/name").getValueAsString(), "h");

    return record;
  }
  @Test
  public void testWildCardKeep3() throws StageException {

    /*
     * Use wil card in map and array
     */
    Record record = createNestedRecord();
    ProcessorRunner runner =
        new ProcessorRunner.Builder(FieldFilterDProcessor.class)
            .addConfiguration(
                "fields", ImmutableList.of("/USA[0]/SanFrancisco/*/streets[*][1]/name"))
            .addConfiguration("filterOperation", FilterOperation.KEEP)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());

      Record resultRecord = output.getRecords().get("a").get(0);
      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][0]/name"));
      Assert.assertEquals(
          "b", resultRecord.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString());
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[0][1]/name"));
      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][0]/name"));
      Assert.assertEquals(
          "d", resultRecord.get("/USA[0]/SanFrancisco/noe/streets[1][0]/name").getValueAsString());
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/noe/streets[1][1]/name"));
      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][0]/name"));
      Assert.assertFalse(resultRecord.has("/USA[0]/SanFrancisco/folsom/streets[0][1]/name"));

      Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][0]/name"));
      Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][1]/name"));
    } finally {
      runner.runDestroy();
    }

    /*
     * Use wil card in map. Make sure the entire tree of elements is preserved
     */
    record = createNestedRecord();
    runner =
        new ProcessorRunner.Builder(FieldFilterDProcessor.class)
            .addConfiguration("fields", ImmutableList.of("/USA[0]/SanFrancisco/*"))
            .addConfiguration("filterOperation", FilterOperation.KEEP)
            .addOutputLane("a")
            .build();
    runner.runInit();

    try {
      StageRunner.Output output = runner.runProcess(ImmutableList.of(record));
      Assert.assertEquals(1, output.getRecords().get("a").size());

      Record resultRecord = output.getRecords().get("a").get(0);
      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/noe"));
      Assert.assertTrue(resultRecord.has("/USA[0]/SanFrancisco/folsom"));

      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/noe/streets[0][0]/name").getValueAsString(), "a");
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/noe/streets[0][1]/name").getValueAsString(), "b");
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/noe/streets[1][0]/name").getValueAsString(), "c");
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/noe/streets[1][1]/name").getValueAsString(), "d");
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/folsom/streets[0][0]/name").getValueAsString(), "g");
      Assert.assertEquals(
          record.get("/USA[0]/SanFrancisco/folsom/streets[0][1]/name").getValueAsString(), "h");

      Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][0]/name"));
      Assert.assertFalse(resultRecord.has("/USA[1]/SantaMonica/cole/streets[0][1]/name"));
    } finally {
      runner.runDestroy();
    }
  }
  @Test(timeout = 30000)
  public void testProduceDelimitedWithHeader() throws Exception {
    SourceRunner sourceRunner =
        new SourceRunner.Builder(ClusterHdfsDSource.class)
            .addOutputLane("lane")
            .setExecutionMode(ExecutionMode.CLUSTER_BATCH)
            .addConfiguration("hdfsUri", miniDFS.getURI().toString())
            .addConfiguration("hdfsDirLocations", Arrays.asList(dir.toUri().getPath()))
            .addConfiguration("recursive", false)
            .addConfiguration("hdfsConfigs", new HashMap<String, String>())
            .addConfiguration("dataFormat", DataFormat.DELIMITED)
            .addConfiguration("csvFileFormat", CsvMode.CSV)
            .addConfiguration("csvHeader", CsvHeader.WITH_HEADER)
            .addConfiguration("csvMaxObjectLen", 4096)
            .addConfiguration("csvRecordType", CsvRecordType.LIST)
            .addConfiguration("textMaxLineLen", 1024)
            .addConfiguration("produceSingleRecordPerMessage", false)
            .addConfiguration("regex", null)
            .addConfiguration("grokPatternDefinition", null)
            .addConfiguration("enableLog4jCustomLogFormat", false)
            .addConfiguration("customLogFormat", null)
            .addConfiguration("fieldPathsToGroupName", null)
            .addConfiguration("log4jCustomLogFormat", null)
            .addConfiguration("grokPattern", null)
            .addConfiguration("hdfsKerberos", false)
            .addConfiguration("hdfsConfDir", hadoopConfDir)
            .setResourcesDir(resourcesDir)
            .build();
    sourceRunner.runInit();

    List<Map.Entry> list = new ArrayList<>();
    list.add(new Pair("HEADER_COL_1,HEADER_COL_2", null));
    list.add(new Pair("path::" + "1", new String("a,b\nC,D\nc,d")));

    Thread th = createThreadForAddingBatch(sourceRunner, list);
    try {
      StageRunner.Output output = sourceRunner.runProduce(null, 5);

      String newOffset = output.getNewOffset();
      Assert.assertEquals("path::" + "1", newOffset);
      List<Record> records = output.getRecords().get("lane");
      Assert.assertEquals(3, records.size());
      Record record = records.get(0);
      Assert.assertEquals(
          "a",
          record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
      Assert.assertEquals(
          "HEADER_COL_1",
          record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString());
      Assert.assertEquals(
          "b",
          record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
      Assert.assertEquals(
          "HEADER_COL_2",
          record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString());
      record = records.get(1);
      Assert.assertEquals(
          "C",
          record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
      Assert.assertEquals(
          "HEADER_COL_1",
          record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString());
      Assert.assertEquals(
          "D",
          record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
      Assert.assertEquals(
          "HEADER_COL_2",
          record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString());
      record = records.get(2);
      Assert.assertEquals(
          "c",
          record.get().getValueAsList().get(0).getValueAsMap().get("value").getValueAsString());
      Assert.assertEquals(
          "HEADER_COL_1",
          record.get().getValueAsList().get(0).getValueAsMap().get("header").getValueAsString());
      Assert.assertEquals(
          "d",
          record.get().getValueAsList().get(1).getValueAsMap().get("value").getValueAsString());
      Assert.assertEquals(
          "HEADER_COL_2",
          record.get().getValueAsList().get(1).getValueAsMap().get("header").getValueAsString());
      if (sourceRunner != null) {
        sourceRunner.runDestroy();
      }
    } finally {
      th.interrupt();
    }
  }
  @Test(timeout = 30000)
  public void testProduceAvroData() throws Exception {
    SourceRunner sourceRunner =
        new SourceRunner.Builder(ClusterHdfsDSource.class)
            .addOutputLane("lane")
            .setExecutionMode(ExecutionMode.CLUSTER_BATCH)
            .addConfiguration("hdfsUri", miniDFS.getURI().toString())
            .addConfiguration("hdfsDirLocations", Arrays.asList(dir.toUri().getPath()))
            .addConfiguration("recursive", false)
            .addConfiguration("hdfsConfigs", new HashMap<String, String>())
            .addConfiguration("dataFormat", DataFormat.AVRO)
            .addConfiguration("csvFileFormat", CsvMode.CSV)
            .addConfiguration("csvHeader", CsvHeader.WITH_HEADER)
            .addConfiguration("csvMaxObjectLen", 4096)
            .addConfiguration("textMaxLineLen", 1024)
            .addConfiguration("produceSingleRecordPerMessage", false)
            .addConfiguration("regex", null)
            .addConfiguration("grokPatternDefinition", null)
            .addConfiguration("enableLog4jCustomLogFormat", false)
            .addConfiguration("customLogFormat", null)
            .addConfiguration("fieldPathsToGroupName", null)
            .addConfiguration("log4jCustomLogFormat", null)
            .addConfiguration("grokPattern", null)
            .addConfiguration("hdfsKerberos", false)
            .addConfiguration("hdfsConfDir", hadoopConfDir)
            .setResourcesDir(resourcesDir)
            .build();
    sourceRunner.runInit();

    List<Map.Entry> list = new ArrayList<>();
    list.add(
        new Pair(
            "path::" + "1" + "::1",
            createAvroData("a", 30, ImmutableList.of("*****@*****.**", "*****@*****.**"))));
    list.add(
        new Pair(
            "path::" + "1" + "::2",
            createAvroData("b", 40, ImmutableList.of("*****@*****.**", "*****@*****.**"))));

    Thread th = createThreadForAddingBatch(sourceRunner, list);
    try {
      StageRunner.Output output = sourceRunner.runProduce(null, 5);
      String newOffset = output.getNewOffset();
      Assert.assertEquals("path::" + "1::2", newOffset);
      List<Record> records = output.getRecords().get("lane");
      Assert.assertEquals(2, records.size());

      Record record = records.get(0);
      Assert.assertTrue(record.has("/name"));
      Assert.assertEquals("a", record.get("/name").getValueAsString());
      Assert.assertTrue(record.has("/age"));
      Assert.assertEquals(30, record.get("/age").getValueAsInteger());
      Assert.assertTrue(record.has("/emails"));
      Assert.assertTrue(record.get("/emails").getValueAsList() instanceof List);
      List<Field> emails = record.get("/emails").getValueAsList();
      Assert.assertEquals(2, emails.size());
      Assert.assertEquals("*****@*****.**", emails.get(0).getValueAsString());
      Assert.assertEquals("*****@*****.**", emails.get(1).getValueAsString());

      record = records.get(1);
      Assert.assertTrue(record.has("/name"));
      Assert.assertEquals("b", record.get("/name").getValueAsString());
      Assert.assertTrue(record.has("/age"));
      Assert.assertEquals(40, record.get("/age").getValueAsInteger());
      Assert.assertTrue(record.has("/emails"));
      Assert.assertTrue(record.get("/emails").getValueAsList() instanceof List);
      emails = record.get("/emails").getValueAsList();
      Assert.assertEquals(2, emails.size());
      Assert.assertEquals("*****@*****.**", emails.get(0).getValueAsString());
      Assert.assertEquals("*****@*****.**", emails.get(1).getValueAsString());

    } finally {
      th.interrupt();
    }
  }