private static List<Map.Entry<String, Object>> orderMapEntries( RecordDataSchema schema, DataMap map) { List<Map.Entry<String, Object>> output = new ArrayList<Map.Entry<String, Object>>(map.size()); List<RecordDataSchema.Field> fields = schema.getFields(); // collect fields in the record schema in the order the fields are declared for (RecordDataSchema.Field field : fields) { String fieldName = field.getName(); Object found = map.get(fieldName); if (found != null) { output.add(new AbstractMap.SimpleImmutableEntry<String, Object>(fieldName, found)); } } // collect fields that are in the DataMap that is not in the record schema. List<Map.Entry<String, Object>> uncollected = new ArrayList<Map.Entry<String, Object>>(map.size() - output.size()); for (Map.Entry<String, Object> e : map.entrySet()) { if (schema.contains(e.getKey()) == false) { uncollected.add(e); } } Collections.sort( uncollected, new Comparator<Map.Entry<String, Object>>() { @Override public int compare(Map.Entry<String, Object> o1, Map.Entry<String, Object> o2) { return o1.getKey().compareTo(o2.getKey()); } }); output.addAll(uncollected); return output; }
@Override public void key(String key) throws IOException { DataSchema newSchema = null; if (_currentSchema != null) { switch (_currentSchema.getType()) { case RECORD: RecordDataSchema recordSchema = (RecordDataSchema) _currentSchema; RecordDataSchema.Field field = recordSchema.getField(key); if (field != null) { newSchema = field.getType(); } break; case UNION: UnionDataSchema unionSchema = (UnionDataSchema) _currentSchema; newSchema = unionSchema.getType(key); break; case MAP: MapDataSchema mapSchema = (MapDataSchema) _currentSchema; newSchema = mapSchema.getValues(); break; } } _pendingSchema = newSchema; super.key(key); }
private void recordType( DataSchema schema, Map<String, NamedDataSchema> foundTypes, List<NamedDataSchema> typeOrder) { if (schema instanceof NamedDataSchema) { NamedDataSchema namedDataSchema = (NamedDataSchema) schema; if (!foundTypes.containsKey(namedDataSchema.getFullName())) { foundTypes.put(namedDataSchema.getFullName(), namedDataSchema); if (schema instanceof RecordDataSchema) // recurse into record, record any contained types. { RecordDataSchema recordDataSchema = (RecordDataSchema) schema; for (RecordDataSchema.Field field : recordDataSchema.getFields()) { recordType(field.getType(), foundTypes, typeOrder); } } typeOrder.add(namedDataSchema); } } else if (schema instanceof ArrayDataSchema) { ArrayDataSchema arraySchema = (ArrayDataSchema) schema; recordType(arraySchema.getItems(), foundTypes, typeOrder); } else if (schema instanceof MapDataSchema) { MapDataSchema mapSchema = (MapDataSchema) schema; recordType(mapSchema.getValues(), foundTypes, typeOrder); } else if (schema instanceof UnionDataSchema) { UnionDataSchema unionSchema = (UnionDataSchema) schema; for (DataSchema type : unionSchema.getTypes()) { recordType(type, foundTypes, typeOrder); } } }
@Override public void validate(ValidatorContext context) { DataElement element = context.dataElement(); DataSchema schema = element.getSchema(); if (schema != null) { validateSchema(context, schema); } DataElement parentElement = element.getParent(); if (parentElement != null) { DataSchema parentSchema = parentElement.getSchema(); // check if the value belongs to a field in a record // if it belongs to a field, check if the field has // validators. if (parentSchema != null && parentSchema.getType() == DataSchema.Type.RECORD) { Object name = element.getName(); if (name.getClass() == String.class) { RecordDataSchema recordDataSchema = (RecordDataSchema) parentSchema; RecordDataSchema.Field field = recordDataSchema.getField((String) name); if (field != null) { getAndInvokeValidatorList(context, field); } } } } }
public static FieldInfo fieldInfo(RecordTemplate recordTemplate, String fieldName) { RecordDataSchema schema = recordTemplate.schema(); RecordDataSchema.Field field = schema.getField(fieldName); String getterName = methodName( field.getType().getDereferencedType() == DataSchema.Type.BOOLEAN ? "is" : "get", fieldName); try { Method method = recordTemplate.getClass().getMethod(getterName); Class<?> fieldClass = method.getReturnType(); return new FieldInfo(field, fieldClass); } catch (NoSuchMethodException e) { throw new IllegalStateException("Cannot find method " + getterName, e); } }
public static class FooRecord extends RecordTemplate { public static final RecordDataSchema SCHEMA = (RecordDataSchema) DataTemplateUtil.parseSchema( "{ \"type\" : \"record\", \"name\" : \"Foo\", \"fields\" : [ { \"name\" : \"bar\", \"type\" : \"string\" } ] }"); public static final RecordDataSchema.Field FIELD_bar = SCHEMA.getField("bar"); public FooRecord() { super(new DataMap(), SCHEMA); } public FooRecord(DataMap map) { super(map, SCHEMA); } public String getBar(GetMode mode) { return obtainDirect(FIELD_bar, String.class, mode); } public void removeBar() { remove(FIELD_bar); } public void setBar(String value) { putDirect(FIELD_bar, String.class, value); } }
private Object translate(Object value, DataSchema dataSchema, Schema avroSchema) { AvroOverride avroOverride = getAvroOverride(dataSchema); if (avroOverride != null) { return avroOverride .getCustomDataTranslator() .dataToAvroGeneric(this, value, dataSchema, avroSchema); } DataSchema dereferencedDataSchema = dataSchema.getDereferencedDataSchema(); DataSchema.Type type = dereferencedDataSchema.getType(); Object result; switch (type) { case NULL: if (value != Data.NULL) { appendMessage("value must be null for null schema"); result = BAD_RESULT; break; } result = null; break; case BOOLEAN: result = ((Boolean) value).booleanValue(); break; case INT: result = ((Number) value).intValue(); break; case LONG: result = ((Number) value).longValue(); break; case FLOAT: result = ((Number) value).floatValue(); break; case DOUBLE: result = ((Number) value).doubleValue(); break; case STRING: result = new Utf8((String) value); break; case BYTES: result = ByteBuffer.wrap(translateBytes(value)); break; case ENUM: String enumValue = value.toString(); EnumDataSchema enumDataSchema = (EnumDataSchema) dereferencedDataSchema; if (enumDataSchema.getSymbols().contains(enumValue) == false) { appendMessage( "enum value %1$s not one of %2$s", enumValue, enumDataSchema.getSymbols()); result = BAD_RESULT; break; } result = _avroAdapter.createEnumSymbol(avroSchema, enumValue); break; case FIXED: byte[] bytes = translateBytes(value); FixedDataSchema fixedDataSchema = (FixedDataSchema) dereferencedDataSchema; if (fixedDataSchema.getSize() != bytes.length) { appendMessage( "ByteString size %1$d != FixedDataSchema size %2$d", bytes.length, fixedDataSchema.getSize()); result = null; break; } GenericData.Fixed fixed = new GenericData.Fixed(avroSchema); fixed.bytes(bytes); result = fixed; break; case MAP: DataMap map = (DataMap) value; DataSchema valueDataSchema = ((MapDataSchema) dereferencedDataSchema).getValues(); Schema valueAvroSchema = avroSchema.getValueType(); Map<String, Object> avroMap = new HashMap<String, Object>(map.size()); for (Map.Entry<String, Object> entry : map.entrySet()) { String key = entry.getKey(); _path.addLast(key); Object entryAvroValue = translate(entry.getValue(), valueDataSchema, valueAvroSchema); _path.removeLast(); avroMap.put(key, entryAvroValue); } result = avroMap; break; case ARRAY: DataList list = (DataList) value; DataSchema elementDataSchema = ((ArrayDataSchema) dereferencedDataSchema).getItems(); Schema elementAvroSchema = avroSchema.getElementType(); GenericData.Array<Object> avroList = new GenericData.Array<Object>(list.size(), avroSchema); for (int i = 0; i < list.size(); i++) { _path.addLast(i); Object entryAvroValue = translate(list.get(i), elementDataSchema, elementAvroSchema); _path.removeLast(); avroList.add(entryAvroValue); } result = avroList; break; case RECORD: map = (DataMap) value; RecordDataSchema recordDataSchema = (RecordDataSchema) dereferencedDataSchema; GenericData.Record avroRecord = new GenericData.Record(avroSchema); for (RecordDataSchema.Field field : recordDataSchema.getFields()) { String fieldName = field.getName(); DataSchema fieldDataSchema = field.getType(); Schema.Field avroField = avroSchema.getField(fieldName); if (avroField == null) { // field present in input but there is no field for it in Avro schema. // TODO: Whether and how to indicate this condition to clients. continue; } _path.addLast(fieldName); Schema fieldAvroSchema = avroField.schema(); Object fieldValue = map.get(fieldName); boolean isOptional = field.getOptional(); if (isOptional) { if (fieldDataSchema.getDereferencedType() != DataSchema.Type.UNION) { if (fieldValue == null) { fieldValue = Data.NULL; fieldDataSchema = DataSchemaConstants.NULL_DATA_SCHEMA; } Map.Entry<String, Schema> fieldAvroEntry = findUnionMember(fieldDataSchema, fieldAvroSchema); if (fieldAvroEntry == null) { _path.removeLast(); continue; } fieldAvroSchema = fieldAvroEntry.getValue(); } else { // already a union if (fieldValue == null) { // field is not present fieldValue = Data.NULL; fieldDataSchema = DataSchemaConstants.NULL_DATA_SCHEMA; } } } else { if (fieldValue == null) { appendMessage("required field is absent"); _path.removeLast(); continue; } } Object fieldAvroValue = translate(fieldValue, fieldDataSchema, fieldAvroSchema); avroRecord.put(fieldName, fieldAvroValue); _path.removeLast(); } result = avroRecord; break; case UNION: UnionDataSchema unionDataSchema = (UnionDataSchema) dereferencedDataSchema; String key; Object memberValue; if (value == Data.NULL) { key = DataSchemaConstants.NULL_TYPE; memberValue = Data.NULL; } else { map = (DataMap) value; Map.Entry<String, Object> entry = map.entrySet().iterator().next(); key = entry.getKey(); memberValue = entry.getValue(); } DataSchema memberDataSchema = unionDataSchema.getType(key); Map.Entry<String, Schema> memberAvroEntry = findUnionMember(memberDataSchema, avroSchema); if (memberAvroEntry == null) { result = BAD_RESULT; break; } Schema memberAvroSchema = memberAvroEntry.getValue(); _path.addLast(memberAvroEntry.getKey()); Object memberAvroValue = translate(memberValue, memberDataSchema, memberAvroSchema); _path.removeLast(); result = memberAvroValue; break; default: appendMessage("schema type unknown %1$s", dereferencedDataSchema.getType()); result = BAD_RESULT; break; } return result; }
private Object translate(Object value, DataSchema dataSchema, Schema avroSchema) { AvroOverride avroOverride = getAvroOverride(dataSchema); if (avroOverride != null) { return avroOverride .getCustomDataTranslator() .avroGenericToData(this, value, avroSchema, dataSchema); } DataSchema dereferencedDataSchema = dataSchema.getDereferencedDataSchema(); DataSchema.Type type = dereferencedDataSchema.getType(); Object result; switch (type) { case NULL: if (value != null) { appendMessage("value must be null for null schema"); result = BAD_RESULT; break; } result = Data.NULL; break; case BOOLEAN: result = ((Boolean) value).booleanValue(); break; case INT: result = ((Number) value).intValue(); break; case LONG: result = ((Number) value).longValue(); break; case FLOAT: result = ((Number) value).floatValue(); break; case DOUBLE: result = ((Number) value).doubleValue(); break; case STRING: result = value.toString(); break; case BYTES: ByteBuffer byteBuffer = (ByteBuffer) value; ByteString byteString = ByteString.copy(byteBuffer); byteBuffer.rewind(); result = byteString; break; case ENUM: String enumValue = value.toString(); EnumDataSchema enumDataSchema = (EnumDataSchema) dereferencedDataSchema; if (enumDataSchema.getSymbols().contains(enumValue) == false) { appendMessage( "enum value %1$s not one of %2$s", enumValue, enumDataSchema.getSymbols()); result = BAD_RESULT; break; } result = enumValue; break; case FIXED: GenericFixed fixed = (GenericFixed) value; byte[] fixedBytes = fixed.bytes(); FixedDataSchema fixedDataSchema = (FixedDataSchema) dereferencedDataSchema; if (fixedDataSchema.getSize() != fixedBytes.length) { appendMessage( "GenericFixed size %1$d != FixedDataSchema size %2$d", fixedBytes.length, fixedDataSchema.getSize()); result = BAD_RESULT; break; } byteString = ByteString.copy(fixedBytes); result = byteString; break; case MAP: @SuppressWarnings("unchecked") Map<?, Object> map = (Map<?, Object>) value; DataSchema valueDataSchema = ((MapDataSchema) dereferencedDataSchema).getValues(); Schema valueAvroSchema = avroSchema.getValueType(); DataMap dataMap = new DataMap(map.size()); for (Map.Entry<?, Object> entry : map.entrySet()) { String key = entry.getKey().toString(); _path.addLast(key); Object entryValue = translate(entry.getValue(), valueDataSchema, valueAvroSchema); _path.removeLast(); dataMap.put(key, entryValue); } result = dataMap; break; case ARRAY: GenericArray<?> list = (GenericArray<?>) value; DataSchema elementDataSchema = ((ArrayDataSchema) dereferencedDataSchema).getItems(); Schema elementAvroSchema = avroSchema.getElementType(); DataList dataList = new DataList(list.size()); for (int i = 0; i < list.size(); i++) { _path.addLast(i); Object entryValue = translate(list.get(i), elementDataSchema, elementAvroSchema); _path.removeLast(); dataList.add(entryValue); } result = dataList; break; case RECORD: GenericRecord record = (GenericRecord) value; RecordDataSchema recordDataSchema = (RecordDataSchema) dereferencedDataSchema; dataMap = new DataMap(avroSchema.getFields().size()); for (RecordDataSchema.Field field : recordDataSchema.getFields()) { String fieldName = field.getName(); Object fieldValue = record.get(fieldName); // fieldValue could be null if the Avro schema does not contain the named field or // the field is present with a null value. In either case we do not add a value // to the translated DataMap. We do not consider optional/required/default here // either (i.e. it is not an error if a required field is missing); the user can // later call ValidateDataAgainstSchema with various // settings for RequiredMode to obtain the desired behaviour. if (fieldValue == null) { continue; } boolean isOptional = field.getOptional(); DataSchema fieldDataSchema = field.getType(); Schema fieldAvroSchema = avroSchema.getField(fieldName).schema(); if (isOptional && (fieldDataSchema.getDereferencedType() != DataSchema.Type.UNION)) { // Avro schema should be union with 2 types: null and the field's type. Map.Entry<String, Schema> fieldAvroEntry = findUnionMember(fieldDataSchema, fieldAvroSchema); if (fieldAvroEntry == null) { continue; } fieldAvroSchema = fieldAvroEntry.getValue(); } _path.addLast(fieldName); dataMap.put(fieldName, translate(fieldValue, fieldDataSchema, fieldAvroSchema)); _path.removeLast(); } result = dataMap; break; case UNION: UnionDataSchema unionDataSchema = (UnionDataSchema) dereferencedDataSchema; Map.Entry<DataSchema, Schema> memberSchemas = findUnionMemberSchema(value, unionDataSchema, avroSchema); if (memberSchemas == null) { result = BAD_RESULT; break; } if (value == null) { // schema must be "null" schema result = Data.NULL; } else { DataSchema memberDataSchema = memberSchemas.getKey(); Schema memberAvroSchema = memberSchemas.getValue(); String key = memberDataSchema.getUnionMemberKey(); dataMap = new DataMap(1); _path.addLast(key); dataMap.put(key, translate(value, memberDataSchema, memberAvroSchema)); _path.removeLast(); result = dataMap; } break; default: appendMessage("schema type unknown %1$s", dereferencedDataSchema.getType()); result = BAD_RESULT; break; } return result; }
private void checkRecord(RecordDataSchema older, RecordDataSchema newer) { checkName(older, newer); List<RecordDataSchema.Field> commonFields = new ArrayList<RecordDataSchema.Field>(newer.getFields().size()); List<String> newerRequiredAdded = new CheckerArrayList<String>(); List<String> newerOptionalAdded = new CheckerArrayList<String>(); List<String> requiredToOptional = new CheckerArrayList<String>(); List<String> optionalToRequired = new CheckerArrayList<String>(); List<String> newerRequiredRemoved = new CheckerArrayList<String>(); List<String> newerOptionalRemoved = new CheckerArrayList<String>(); for (RecordDataSchema.Field newerField : newer.getFields()) { String fieldName = newerField.getName(); RecordDataSchema.Field olderField = older.getField(fieldName); if (olderField == null) { (newerField.getOptional() ? newerOptionalAdded : newerRequiredAdded).add(fieldName); } else { commonFields.add(newerField); boolean newerFieldOptional = newerField.getOptional(); if (newerFieldOptional != olderField.getOptional()) { (newerFieldOptional ? requiredToOptional : optionalToRequired).add(fieldName); } } } for (RecordDataSchema.Field olderField : older.getFields()) { String fieldName = olderField.getName(); RecordDataSchema.Field newerField = newer.getField(fieldName); if (newerField == null) { (olderField.getOptional() ? newerOptionalRemoved : newerRequiredRemoved).add(fieldName); } } if (newerRequiredAdded.isEmpty() == false) { appendMessage( CompatibilityMessage.Impact.BREAKS_NEW_READER, "new record added required fields %s", newerRequiredAdded); } if (newerRequiredRemoved.isEmpty() == false) { appendMessage( CompatibilityMessage.Impact.BREAKS_OLD_READER, "new record removed required fields %s", newerRequiredRemoved); } if (optionalToRequired.isEmpty() == false) { appendMessage( CompatibilityMessage.Impact.BREAKS_NEW_READER, "new record changed optional fields to required fields %s", optionalToRequired); } if (requiredToOptional.isEmpty() == false) { appendMessage( CompatibilityMessage.Impact.BREAKS_OLD_READER, "new record changed required fields to optional fields %s", requiredToOptional); } if (newerOptionalAdded.isEmpty() == false) { appendMessage( CompatibilityMessage.Impact.OLD_READER_IGNORES_DATA, "new record added optional fields %s", newerOptionalAdded); } if (newerOptionalRemoved.isEmpty() == false) { appendMessage( CompatibilityMessage.Impact.NEW_READER_IGNORES_DATA, "new record removed optional fields %s", newerOptionalRemoved); } for (RecordDataSchema.Field newerField : commonFields) { String fieldName = newerField.getName(); _path.addLast(fieldName); RecordDataSchema.Field olderField = older.getField(fieldName); assert (olderField != null); check(olderField.getType(), newerField.getType()); _path.removeLast(); } }