@Override protected void process(Record record, SingleLaneBatchMaker batchMaker) throws StageException { Field field = record.get(fieldPath); String[] splits = null; ErrorCode error = null; if (field == null || field.getValue() == null) { error = Errors.SPLITTER_01; } else { String str; try { str = field.getValueAsString(); } catch (IllegalArgumentException e) { throw new OnRecordErrorException(Errors.SPLITTER_04, fieldPath, field.getType().name()); } splits = str.split(separatorStr, fieldPaths.length); if (splits.length < fieldPaths.length) { error = Errors.SPLITTER_02; } } if (error == null || onStagePreConditionFailure == OnStagePreConditionFailure.CONTINUE) { for (int i = 0; i < fieldPaths.length; i++) { try { if (splits != null && splits.length > i) { record.set(fieldPaths[i], Field.create(splits[i])); } else { record.set(fieldPaths[i], Field.create(Field.Type.STRING, null)); } } catch (IllegalArgumentException e) { throw new OnRecordErrorException( Errors.SPLITTER_05, fieldPath, record.getHeader().getSourceId(), e.toString()); } } if (removeUnsplitValue) { record.delete(fieldPath); } batchMaker.addRecord(record); } else { throw new OnRecordErrorException(error, record.getHeader().getSourceId(), fieldPath); } }
@Override protected void process(Record record, SingleLaneBatchMaker batchMaker) throws StageException { Set<String> fieldPaths = record.getFieldPaths(); List<String> list; switch (filterOperation) { case REMOVE: list = new ArrayList<>(); for (String field : fields) { List<String> matchingFieldPaths = FieldRegexUtil.getMatchingFieldPaths(field, fieldPaths); list.addAll(matchingFieldPaths); } break; case KEEP: // Algorithm: // - Get all possible field paths in the record // // - Remove arguments fields which must be retained, its parent fields and the child fields // from above set // (Account for presence of wild card characters while doing so) The remaining set of // fields is what must be // removed from the record. // // - Sort this set before deleting fields. Last element of a list must be removed first. Set<String> fieldsToRemove = new HashSet<>(); // List all the possible field paths in this record fieldsToRemove.addAll(fieldPaths); for (String field : fields) { // Keep parent fields // get the parent fieldPaths for each of the fields to keep List<String> parentFieldPaths = getParentFields(field); // remove parent paths from the fieldsToRemove set // Note that parent names could contain wild card characters for (String parentField : parentFieldPaths) { List<String> matchingFieldPaths = FieldRegexUtil.getMatchingFieldPaths(parentField, fieldPaths); fieldsToRemove.removeAll(matchingFieldPaths); } // Keep the field itself // remove the field path itself from the fieldsToRemove set // Consider wild card characters List<String> matchingFieldPaths = FieldRegexUtil.getMatchingFieldPaths(field, fieldPaths); fieldsToRemove.removeAll(matchingFieldPaths); // Keep the children of the field // For each of the fieldPaths that match the argument field path, generate all the child // paths List<String> childFieldsToRemove = new ArrayList<>(); for (String matchingFieldPath : matchingFieldPaths) { for (String fieldToRemove : fieldsToRemove) { if (fieldToRemove.startsWith(matchingFieldPath)) { childFieldsToRemove.add(fieldToRemove); } } } fieldsToRemove.removeAll(childFieldsToRemove); } list = new ArrayList<>(fieldsToRemove); break; default: throw new IllegalStateException( Utils.format("Unexpected Filter Operation '{}'", filterOperation.name())); } Collections.sort(list); for (int i = list.size() - 1; i >= 0; i--) { record.delete(list.get(i)); } batchMaker.addRecord(record); }