Example #1
0
  private void checkFixed(FixedDataSchema older, FixedDataSchema newer) {
    checkName(older, newer);

    _path.addLast(DataSchemaConstants.SIZE_KEY);

    int olderSize = older.getSize();
    int newerSize = newer.getSize();
    if (olderSize != newerSize) {
      appendMessage(
          CompatibilityMessage.Impact.BREAKS_NEW_AND_OLD_READERS,
          "fixed size changed from %d to %d",
          olderSize,
          newerSize);
    }

    _path.removeLast();
  }
Example #2
0
    private Object translate(Object value, DataSchema dataSchema, Schema avroSchema) {
      AvroOverride avroOverride = getAvroOverride(dataSchema);
      if (avroOverride != null) {
        return avroOverride
            .getCustomDataTranslator()
            .dataToAvroGeneric(this, value, dataSchema, avroSchema);
      }

      DataSchema dereferencedDataSchema = dataSchema.getDereferencedDataSchema();
      DataSchema.Type type = dereferencedDataSchema.getType();
      Object result;
      switch (type) {
        case NULL:
          if (value != Data.NULL) {
            appendMessage("value must be null for null schema");
            result = BAD_RESULT;
            break;
          }
          result = null;
          break;
        case BOOLEAN:
          result = ((Boolean) value).booleanValue();
          break;
        case INT:
          result = ((Number) value).intValue();
          break;
        case LONG:
          result = ((Number) value).longValue();
          break;
        case FLOAT:
          result = ((Number) value).floatValue();
          break;
        case DOUBLE:
          result = ((Number) value).doubleValue();
          break;
        case STRING:
          result = new Utf8((String) value);
          break;
        case BYTES:
          result = ByteBuffer.wrap(translateBytes(value));
          break;
        case ENUM:
          String enumValue = value.toString();
          EnumDataSchema enumDataSchema = (EnumDataSchema) dereferencedDataSchema;
          if (enumDataSchema.getSymbols().contains(enumValue) == false) {
            appendMessage(
                "enum value %1$s not one of %2$s", enumValue, enumDataSchema.getSymbols());
            result = BAD_RESULT;
            break;
          }
          result = _avroAdapter.createEnumSymbol(avroSchema, enumValue);
          break;
        case FIXED:
          byte[] bytes = translateBytes(value);
          FixedDataSchema fixedDataSchema = (FixedDataSchema) dereferencedDataSchema;
          if (fixedDataSchema.getSize() != bytes.length) {
            appendMessage(
                "ByteString size %1$d != FixedDataSchema size %2$d",
                bytes.length, fixedDataSchema.getSize());
            result = null;
            break;
          }
          GenericData.Fixed fixed = new GenericData.Fixed(avroSchema);
          fixed.bytes(bytes);
          result = fixed;
          break;
        case MAP:
          DataMap map = (DataMap) value;
          DataSchema valueDataSchema = ((MapDataSchema) dereferencedDataSchema).getValues();
          Schema valueAvroSchema = avroSchema.getValueType();
          Map<String, Object> avroMap = new HashMap<String, Object>(map.size());
          for (Map.Entry<String, Object> entry : map.entrySet()) {
            String key = entry.getKey();
            _path.addLast(key);
            Object entryAvroValue = translate(entry.getValue(), valueDataSchema, valueAvroSchema);
            _path.removeLast();
            avroMap.put(key, entryAvroValue);
          }
          result = avroMap;
          break;
        case ARRAY:
          DataList list = (DataList) value;
          DataSchema elementDataSchema = ((ArrayDataSchema) dereferencedDataSchema).getItems();
          Schema elementAvroSchema = avroSchema.getElementType();
          GenericData.Array<Object> avroList =
              new GenericData.Array<Object>(list.size(), avroSchema);
          for (int i = 0; i < list.size(); i++) {
            _path.addLast(i);
            Object entryAvroValue = translate(list.get(i), elementDataSchema, elementAvroSchema);
            _path.removeLast();
            avroList.add(entryAvroValue);
          }
          result = avroList;
          break;
        case RECORD:
          map = (DataMap) value;
          RecordDataSchema recordDataSchema = (RecordDataSchema) dereferencedDataSchema;
          GenericData.Record avroRecord = new GenericData.Record(avroSchema);
          for (RecordDataSchema.Field field : recordDataSchema.getFields()) {
            String fieldName = field.getName();
            DataSchema fieldDataSchema = field.getType();
            Schema.Field avroField = avroSchema.getField(fieldName);
            if (avroField == null) {
              // field present in input but there is no field for it in Avro schema.
              // TODO: Whether and how to indicate this condition to clients.
              continue;
            }
            _path.addLast(fieldName);
            Schema fieldAvroSchema = avroField.schema();
            Object fieldValue = map.get(fieldName);
            boolean isOptional = field.getOptional();
            if (isOptional) {
              if (fieldDataSchema.getDereferencedType() != DataSchema.Type.UNION) {
                if (fieldValue == null) {
                  fieldValue = Data.NULL;
                  fieldDataSchema = DataSchemaConstants.NULL_DATA_SCHEMA;
                }
                Map.Entry<String, Schema> fieldAvroEntry =
                    findUnionMember(fieldDataSchema, fieldAvroSchema);
                if (fieldAvroEntry == null) {
                  _path.removeLast();
                  continue;
                }
                fieldAvroSchema = fieldAvroEntry.getValue();
              } else {
                // already a union
                if (fieldValue == null) {
                  // field is not present
                  fieldValue = Data.NULL;
                  fieldDataSchema = DataSchemaConstants.NULL_DATA_SCHEMA;
                }
              }
            } else {
              if (fieldValue == null) {
                appendMessage("required field is absent");
                _path.removeLast();
                continue;
              }
            }
            Object fieldAvroValue = translate(fieldValue, fieldDataSchema, fieldAvroSchema);
            avroRecord.put(fieldName, fieldAvroValue);
            _path.removeLast();
          }
          result = avroRecord;
          break;
        case UNION:
          UnionDataSchema unionDataSchema = (UnionDataSchema) dereferencedDataSchema;
          String key;
          Object memberValue;
          if (value == Data.NULL) {
            key = DataSchemaConstants.NULL_TYPE;
            memberValue = Data.NULL;
          } else {
            map = (DataMap) value;
            Map.Entry<String, Object> entry = map.entrySet().iterator().next();
            key = entry.getKey();
            memberValue = entry.getValue();
          }
          DataSchema memberDataSchema = unionDataSchema.getType(key);
          Map.Entry<String, Schema> memberAvroEntry = findUnionMember(memberDataSchema, avroSchema);
          if (memberAvroEntry == null) {
            result = BAD_RESULT;
            break;
          }
          Schema memberAvroSchema = memberAvroEntry.getValue();
          _path.addLast(memberAvroEntry.getKey());
          Object memberAvroValue = translate(memberValue, memberDataSchema, memberAvroSchema);
          _path.removeLast();
          result = memberAvroValue;
          break;
        default:
          appendMessage("schema type unknown %1$s", dereferencedDataSchema.getType());
          result = BAD_RESULT;
          break;
      }
      return result;
    }
Example #3
0
    private Object translate(Object value, DataSchema dataSchema, Schema avroSchema) {
      AvroOverride avroOverride = getAvroOverride(dataSchema);
      if (avroOverride != null) {
        return avroOverride
            .getCustomDataTranslator()
            .avroGenericToData(this, value, avroSchema, dataSchema);
      }

      DataSchema dereferencedDataSchema = dataSchema.getDereferencedDataSchema();
      DataSchema.Type type = dereferencedDataSchema.getType();
      Object result;
      switch (type) {
        case NULL:
          if (value != null) {
            appendMessage("value must be null for null schema");
            result = BAD_RESULT;
            break;
          }
          result = Data.NULL;
          break;
        case BOOLEAN:
          result = ((Boolean) value).booleanValue();
          break;
        case INT:
          result = ((Number) value).intValue();
          break;
        case LONG:
          result = ((Number) value).longValue();
          break;
        case FLOAT:
          result = ((Number) value).floatValue();
          break;
        case DOUBLE:
          result = ((Number) value).doubleValue();
          break;
        case STRING:
          result = value.toString();
          break;
        case BYTES:
          ByteBuffer byteBuffer = (ByteBuffer) value;
          ByteString byteString = ByteString.copy(byteBuffer);
          byteBuffer.rewind();
          result = byteString;
          break;
        case ENUM:
          String enumValue = value.toString();
          EnumDataSchema enumDataSchema = (EnumDataSchema) dereferencedDataSchema;
          if (enumDataSchema.getSymbols().contains(enumValue) == false) {
            appendMessage(
                "enum value %1$s not one of %2$s", enumValue, enumDataSchema.getSymbols());
            result = BAD_RESULT;
            break;
          }
          result = enumValue;
          break;
        case FIXED:
          GenericFixed fixed = (GenericFixed) value;
          byte[] fixedBytes = fixed.bytes();
          FixedDataSchema fixedDataSchema = (FixedDataSchema) dereferencedDataSchema;
          if (fixedDataSchema.getSize() != fixedBytes.length) {
            appendMessage(
                "GenericFixed size %1$d != FixedDataSchema size %2$d",
                fixedBytes.length, fixedDataSchema.getSize());
            result = BAD_RESULT;
            break;
          }
          byteString = ByteString.copy(fixedBytes);
          result = byteString;
          break;
        case MAP:
          @SuppressWarnings("unchecked")
          Map<?, Object> map = (Map<?, Object>) value;
          DataSchema valueDataSchema = ((MapDataSchema) dereferencedDataSchema).getValues();
          Schema valueAvroSchema = avroSchema.getValueType();
          DataMap dataMap = new DataMap(map.size());
          for (Map.Entry<?, Object> entry : map.entrySet()) {
            String key = entry.getKey().toString();
            _path.addLast(key);
            Object entryValue = translate(entry.getValue(), valueDataSchema, valueAvroSchema);
            _path.removeLast();
            dataMap.put(key, entryValue);
          }
          result = dataMap;
          break;
        case ARRAY:
          GenericArray<?> list = (GenericArray<?>) value;
          DataSchema elementDataSchema = ((ArrayDataSchema) dereferencedDataSchema).getItems();
          Schema elementAvroSchema = avroSchema.getElementType();
          DataList dataList = new DataList(list.size());
          for (int i = 0; i < list.size(); i++) {
            _path.addLast(i);
            Object entryValue = translate(list.get(i), elementDataSchema, elementAvroSchema);
            _path.removeLast();
            dataList.add(entryValue);
          }
          result = dataList;
          break;
        case RECORD:
          GenericRecord record = (GenericRecord) value;
          RecordDataSchema recordDataSchema = (RecordDataSchema) dereferencedDataSchema;
          dataMap = new DataMap(avroSchema.getFields().size());
          for (RecordDataSchema.Field field : recordDataSchema.getFields()) {
            String fieldName = field.getName();
            Object fieldValue = record.get(fieldName);
            // fieldValue could be null if the Avro schema does not contain the named field or
            // the field is present with a null value. In either case we do not add a value
            // to the translated DataMap. We do not consider optional/required/default here
            // either (i.e. it is not an error if a required field is missing); the user can
            // later call ValidateDataAgainstSchema with various
            // settings for RequiredMode to obtain the desired behaviour.
            if (fieldValue == null) {
              continue;
            }
            boolean isOptional = field.getOptional();
            DataSchema fieldDataSchema = field.getType();
            Schema fieldAvroSchema = avroSchema.getField(fieldName).schema();
            if (isOptional && (fieldDataSchema.getDereferencedType() != DataSchema.Type.UNION)) {
              // Avro schema should be union with 2 types: null and the field's type.
              Map.Entry<String, Schema> fieldAvroEntry =
                  findUnionMember(fieldDataSchema, fieldAvroSchema);
              if (fieldAvroEntry == null) {
                continue;
              }
              fieldAvroSchema = fieldAvroEntry.getValue();
            }
            _path.addLast(fieldName);
            dataMap.put(fieldName, translate(fieldValue, fieldDataSchema, fieldAvroSchema));
            _path.removeLast();
          }
          result = dataMap;
          break;
        case UNION:
          UnionDataSchema unionDataSchema = (UnionDataSchema) dereferencedDataSchema;
          Map.Entry<DataSchema, Schema> memberSchemas =
              findUnionMemberSchema(value, unionDataSchema, avroSchema);
          if (memberSchemas == null) {
            result = BAD_RESULT;
            break;
          }
          if (value == null) {
            // schema must be "null" schema
            result = Data.NULL;
          } else {
            DataSchema memberDataSchema = memberSchemas.getKey();
            Schema memberAvroSchema = memberSchemas.getValue();
            String key = memberDataSchema.getUnionMemberKey();
            dataMap = new DataMap(1);
            _path.addLast(key);
            dataMap.put(key, translate(value, memberDataSchema, memberAvroSchema));
            _path.removeLast();
            result = dataMap;
          }
          break;
        default:
          appendMessage("schema type unknown %1$s", dereferencedDataSchema.getType());
          result = BAD_RESULT;
          break;
      }
      return result;
    }