private static List<Map.Entry<String, Object>> orderMapEntries(
     RecordDataSchema schema, DataMap map) {
   List<Map.Entry<String, Object>> output = new ArrayList<Map.Entry<String, Object>>(map.size());
   List<RecordDataSchema.Field> fields = schema.getFields();
   // collect fields in the record schema in the order the fields are declared
   for (RecordDataSchema.Field field : fields) {
     String fieldName = field.getName();
     Object found = map.get(fieldName);
     if (found != null) {
       output.add(new AbstractMap.SimpleImmutableEntry<String, Object>(fieldName, found));
     }
   }
   // collect fields that are in the DataMap that is not in the record schema.
   List<Map.Entry<String, Object>> uncollected =
       new ArrayList<Map.Entry<String, Object>>(map.size() - output.size());
   for (Map.Entry<String, Object> e : map.entrySet()) {
     if (schema.contains(e.getKey()) == false) {
       uncollected.add(e);
     }
   }
   Collections.sort(
       uncollected,
       new Comparator<Map.Entry<String, Object>>() {
         @Override
         public int compare(Map.Entry<String, Object> o1, Map.Entry<String, Object> o2) {
           return o1.getKey().compareTo(o2.getKey());
         }
       });
   output.addAll(uncollected);
   return output;
 }
Пример #2
0
    private Object translate(Object value, DataSchema dataSchema, Schema avroSchema) {
      AvroOverride avroOverride = getAvroOverride(dataSchema);
      if (avroOverride != null) {
        return avroOverride
            .getCustomDataTranslator()
            .dataToAvroGeneric(this, value, dataSchema, avroSchema);
      }

      DataSchema dereferencedDataSchema = dataSchema.getDereferencedDataSchema();
      DataSchema.Type type = dereferencedDataSchema.getType();
      Object result;
      switch (type) {
        case NULL:
          if (value != Data.NULL) {
            appendMessage("value must be null for null schema");
            result = BAD_RESULT;
            break;
          }
          result = null;
          break;
        case BOOLEAN:
          result = ((Boolean) value).booleanValue();
          break;
        case INT:
          result = ((Number) value).intValue();
          break;
        case LONG:
          result = ((Number) value).longValue();
          break;
        case FLOAT:
          result = ((Number) value).floatValue();
          break;
        case DOUBLE:
          result = ((Number) value).doubleValue();
          break;
        case STRING:
          result = new Utf8((String) value);
          break;
        case BYTES:
          result = ByteBuffer.wrap(translateBytes(value));
          break;
        case ENUM:
          String enumValue = value.toString();
          EnumDataSchema enumDataSchema = (EnumDataSchema) dereferencedDataSchema;
          if (enumDataSchema.getSymbols().contains(enumValue) == false) {
            appendMessage(
                "enum value %1$s not one of %2$s", enumValue, enumDataSchema.getSymbols());
            result = BAD_RESULT;
            break;
          }
          result = _avroAdapter.createEnumSymbol(avroSchema, enumValue);
          break;
        case FIXED:
          byte[] bytes = translateBytes(value);
          FixedDataSchema fixedDataSchema = (FixedDataSchema) dereferencedDataSchema;
          if (fixedDataSchema.getSize() != bytes.length) {
            appendMessage(
                "ByteString size %1$d != FixedDataSchema size %2$d",
                bytes.length, fixedDataSchema.getSize());
            result = null;
            break;
          }
          GenericData.Fixed fixed = new GenericData.Fixed(avroSchema);
          fixed.bytes(bytes);
          result = fixed;
          break;
        case MAP:
          DataMap map = (DataMap) value;
          DataSchema valueDataSchema = ((MapDataSchema) dereferencedDataSchema).getValues();
          Schema valueAvroSchema = avroSchema.getValueType();
          Map<String, Object> avroMap = new HashMap<String, Object>(map.size());
          for (Map.Entry<String, Object> entry : map.entrySet()) {
            String key = entry.getKey();
            _path.addLast(key);
            Object entryAvroValue = translate(entry.getValue(), valueDataSchema, valueAvroSchema);
            _path.removeLast();
            avroMap.put(key, entryAvroValue);
          }
          result = avroMap;
          break;
        case ARRAY:
          DataList list = (DataList) value;
          DataSchema elementDataSchema = ((ArrayDataSchema) dereferencedDataSchema).getItems();
          Schema elementAvroSchema = avroSchema.getElementType();
          GenericData.Array<Object> avroList =
              new GenericData.Array<Object>(list.size(), avroSchema);
          for (int i = 0; i < list.size(); i++) {
            _path.addLast(i);
            Object entryAvroValue = translate(list.get(i), elementDataSchema, elementAvroSchema);
            _path.removeLast();
            avroList.add(entryAvroValue);
          }
          result = avroList;
          break;
        case RECORD:
          map = (DataMap) value;
          RecordDataSchema recordDataSchema = (RecordDataSchema) dereferencedDataSchema;
          GenericData.Record avroRecord = new GenericData.Record(avroSchema);
          for (RecordDataSchema.Field field : recordDataSchema.getFields()) {
            String fieldName = field.getName();
            DataSchema fieldDataSchema = field.getType();
            Schema.Field avroField = avroSchema.getField(fieldName);
            if (avroField == null) {
              // field present in input but there is no field for it in Avro schema.
              // TODO: Whether and how to indicate this condition to clients.
              continue;
            }
            _path.addLast(fieldName);
            Schema fieldAvroSchema = avroField.schema();
            Object fieldValue = map.get(fieldName);
            boolean isOptional = field.getOptional();
            if (isOptional) {
              if (fieldDataSchema.getDereferencedType() != DataSchema.Type.UNION) {
                if (fieldValue == null) {
                  fieldValue = Data.NULL;
                  fieldDataSchema = DataSchemaConstants.NULL_DATA_SCHEMA;
                }
                Map.Entry<String, Schema> fieldAvroEntry =
                    findUnionMember(fieldDataSchema, fieldAvroSchema);
                if (fieldAvroEntry == null) {
                  _path.removeLast();
                  continue;
                }
                fieldAvroSchema = fieldAvroEntry.getValue();
              } else {
                // already a union
                if (fieldValue == null) {
                  // field is not present
                  fieldValue = Data.NULL;
                  fieldDataSchema = DataSchemaConstants.NULL_DATA_SCHEMA;
                }
              }
            } else {
              if (fieldValue == null) {
                appendMessage("required field is absent");
                _path.removeLast();
                continue;
              }
            }
            Object fieldAvroValue = translate(fieldValue, fieldDataSchema, fieldAvroSchema);
            avroRecord.put(fieldName, fieldAvroValue);
            _path.removeLast();
          }
          result = avroRecord;
          break;
        case UNION:
          UnionDataSchema unionDataSchema = (UnionDataSchema) dereferencedDataSchema;
          String key;
          Object memberValue;
          if (value == Data.NULL) {
            key = DataSchemaConstants.NULL_TYPE;
            memberValue = Data.NULL;
          } else {
            map = (DataMap) value;
            Map.Entry<String, Object> entry = map.entrySet().iterator().next();
            key = entry.getKey();
            memberValue = entry.getValue();
          }
          DataSchema memberDataSchema = unionDataSchema.getType(key);
          Map.Entry<String, Schema> memberAvroEntry = findUnionMember(memberDataSchema, avroSchema);
          if (memberAvroEntry == null) {
            result = BAD_RESULT;
            break;
          }
          Schema memberAvroSchema = memberAvroEntry.getValue();
          _path.addLast(memberAvroEntry.getKey());
          Object memberAvroValue = translate(memberValue, memberDataSchema, memberAvroSchema);
          _path.removeLast();
          result = memberAvroValue;
          break;
        default:
          appendMessage("schema type unknown %1$s", dereferencedDataSchema.getType());
          result = BAD_RESULT;
          break;
      }
      return result;
    }
Пример #3
0
    private Object translate(Object value, DataSchema dataSchema, Schema avroSchema) {
      AvroOverride avroOverride = getAvroOverride(dataSchema);
      if (avroOverride != null) {
        return avroOverride
            .getCustomDataTranslator()
            .avroGenericToData(this, value, avroSchema, dataSchema);
      }

      DataSchema dereferencedDataSchema = dataSchema.getDereferencedDataSchema();
      DataSchema.Type type = dereferencedDataSchema.getType();
      Object result;
      switch (type) {
        case NULL:
          if (value != null) {
            appendMessage("value must be null for null schema");
            result = BAD_RESULT;
            break;
          }
          result = Data.NULL;
          break;
        case BOOLEAN:
          result = ((Boolean) value).booleanValue();
          break;
        case INT:
          result = ((Number) value).intValue();
          break;
        case LONG:
          result = ((Number) value).longValue();
          break;
        case FLOAT:
          result = ((Number) value).floatValue();
          break;
        case DOUBLE:
          result = ((Number) value).doubleValue();
          break;
        case STRING:
          result = value.toString();
          break;
        case BYTES:
          ByteBuffer byteBuffer = (ByteBuffer) value;
          ByteString byteString = ByteString.copy(byteBuffer);
          byteBuffer.rewind();
          result = byteString;
          break;
        case ENUM:
          String enumValue = value.toString();
          EnumDataSchema enumDataSchema = (EnumDataSchema) dereferencedDataSchema;
          if (enumDataSchema.getSymbols().contains(enumValue) == false) {
            appendMessage(
                "enum value %1$s not one of %2$s", enumValue, enumDataSchema.getSymbols());
            result = BAD_RESULT;
            break;
          }
          result = enumValue;
          break;
        case FIXED:
          GenericFixed fixed = (GenericFixed) value;
          byte[] fixedBytes = fixed.bytes();
          FixedDataSchema fixedDataSchema = (FixedDataSchema) dereferencedDataSchema;
          if (fixedDataSchema.getSize() != fixedBytes.length) {
            appendMessage(
                "GenericFixed size %1$d != FixedDataSchema size %2$d",
                fixedBytes.length, fixedDataSchema.getSize());
            result = BAD_RESULT;
            break;
          }
          byteString = ByteString.copy(fixedBytes);
          result = byteString;
          break;
        case MAP:
          @SuppressWarnings("unchecked")
          Map<?, Object> map = (Map<?, Object>) value;
          DataSchema valueDataSchema = ((MapDataSchema) dereferencedDataSchema).getValues();
          Schema valueAvroSchema = avroSchema.getValueType();
          DataMap dataMap = new DataMap(map.size());
          for (Map.Entry<?, Object> entry : map.entrySet()) {
            String key = entry.getKey().toString();
            _path.addLast(key);
            Object entryValue = translate(entry.getValue(), valueDataSchema, valueAvroSchema);
            _path.removeLast();
            dataMap.put(key, entryValue);
          }
          result = dataMap;
          break;
        case ARRAY:
          GenericArray<?> list = (GenericArray<?>) value;
          DataSchema elementDataSchema = ((ArrayDataSchema) dereferencedDataSchema).getItems();
          Schema elementAvroSchema = avroSchema.getElementType();
          DataList dataList = new DataList(list.size());
          for (int i = 0; i < list.size(); i++) {
            _path.addLast(i);
            Object entryValue = translate(list.get(i), elementDataSchema, elementAvroSchema);
            _path.removeLast();
            dataList.add(entryValue);
          }
          result = dataList;
          break;
        case RECORD:
          GenericRecord record = (GenericRecord) value;
          RecordDataSchema recordDataSchema = (RecordDataSchema) dereferencedDataSchema;
          dataMap = new DataMap(avroSchema.getFields().size());
          for (RecordDataSchema.Field field : recordDataSchema.getFields()) {
            String fieldName = field.getName();
            Object fieldValue = record.get(fieldName);
            // fieldValue could be null if the Avro schema does not contain the named field or
            // the field is present with a null value. In either case we do not add a value
            // to the translated DataMap. We do not consider optional/required/default here
            // either (i.e. it is not an error if a required field is missing); the user can
            // later call ValidateDataAgainstSchema with various
            // settings for RequiredMode to obtain the desired behaviour.
            if (fieldValue == null) {
              continue;
            }
            boolean isOptional = field.getOptional();
            DataSchema fieldDataSchema = field.getType();
            Schema fieldAvroSchema = avroSchema.getField(fieldName).schema();
            if (isOptional && (fieldDataSchema.getDereferencedType() != DataSchema.Type.UNION)) {
              // Avro schema should be union with 2 types: null and the field's type.
              Map.Entry<String, Schema> fieldAvroEntry =
                  findUnionMember(fieldDataSchema, fieldAvroSchema);
              if (fieldAvroEntry == null) {
                continue;
              }
              fieldAvroSchema = fieldAvroEntry.getValue();
            }
            _path.addLast(fieldName);
            dataMap.put(fieldName, translate(fieldValue, fieldDataSchema, fieldAvroSchema));
            _path.removeLast();
          }
          result = dataMap;
          break;
        case UNION:
          UnionDataSchema unionDataSchema = (UnionDataSchema) dereferencedDataSchema;
          Map.Entry<DataSchema, Schema> memberSchemas =
              findUnionMemberSchema(value, unionDataSchema, avroSchema);
          if (memberSchemas == null) {
            result = BAD_RESULT;
            break;
          }
          if (value == null) {
            // schema must be "null" schema
            result = Data.NULL;
          } else {
            DataSchema memberDataSchema = memberSchemas.getKey();
            Schema memberAvroSchema = memberSchemas.getValue();
            String key = memberDataSchema.getUnionMemberKey();
            dataMap = new DataMap(1);
            _path.addLast(key);
            dataMap.put(key, translate(value, memberDataSchema, memberAvroSchema));
            _path.removeLast();
            result = dataMap;
          }
          break;
        default:
          appendMessage("schema type unknown %1$s", dereferencedDataSchema.getType());
          result = BAD_RESULT;
          break;
      }
      return result;
    }
Пример #4
0
  private void checkRecord(RecordDataSchema older, RecordDataSchema newer) {
    checkName(older, newer);

    List<RecordDataSchema.Field> commonFields =
        new ArrayList<RecordDataSchema.Field>(newer.getFields().size());
    List<String> newerRequiredAdded = new CheckerArrayList<String>();
    List<String> newerOptionalAdded = new CheckerArrayList<String>();
    List<String> requiredToOptional = new CheckerArrayList<String>();
    List<String> optionalToRequired = new CheckerArrayList<String>();
    List<String> newerRequiredRemoved = new CheckerArrayList<String>();
    List<String> newerOptionalRemoved = new CheckerArrayList<String>();

    for (RecordDataSchema.Field newerField : newer.getFields()) {
      String fieldName = newerField.getName();
      RecordDataSchema.Field olderField = older.getField(fieldName);
      if (olderField == null) {
        (newerField.getOptional() ? newerOptionalAdded : newerRequiredAdded).add(fieldName);
      } else {
        commonFields.add(newerField);
        boolean newerFieldOptional = newerField.getOptional();
        if (newerFieldOptional != olderField.getOptional()) {
          (newerFieldOptional ? requiredToOptional : optionalToRequired).add(fieldName);
        }
      }
    }
    for (RecordDataSchema.Field olderField : older.getFields()) {
      String fieldName = olderField.getName();
      RecordDataSchema.Field newerField = newer.getField(fieldName);
      if (newerField == null) {
        (olderField.getOptional() ? newerOptionalRemoved : newerRequiredRemoved).add(fieldName);
      }
    }

    if (newerRequiredAdded.isEmpty() == false) {
      appendMessage(
          CompatibilityMessage.Impact.BREAKS_NEW_READER,
          "new record added required fields %s",
          newerRequiredAdded);
    }

    if (newerRequiredRemoved.isEmpty() == false) {
      appendMessage(
          CompatibilityMessage.Impact.BREAKS_OLD_READER,
          "new record removed required fields %s",
          newerRequiredRemoved);
    }

    if (optionalToRequired.isEmpty() == false) {
      appendMessage(
          CompatibilityMessage.Impact.BREAKS_NEW_READER,
          "new record changed optional fields to required fields %s",
          optionalToRequired);
    }

    if (requiredToOptional.isEmpty() == false) {
      appendMessage(
          CompatibilityMessage.Impact.BREAKS_OLD_READER,
          "new record changed required fields to optional fields %s",
          requiredToOptional);
    }

    if (newerOptionalAdded.isEmpty() == false) {
      appendMessage(
          CompatibilityMessage.Impact.OLD_READER_IGNORES_DATA,
          "new record added optional fields %s",
          newerOptionalAdded);
    }

    if (newerOptionalRemoved.isEmpty() == false) {
      appendMessage(
          CompatibilityMessage.Impact.NEW_READER_IGNORES_DATA,
          "new record removed optional fields %s",
          newerOptionalRemoved);
    }

    for (RecordDataSchema.Field newerField : commonFields) {
      String fieldName = newerField.getName();

      _path.addLast(fieldName);

      RecordDataSchema.Field olderField = older.getField(fieldName);
      assert (olderField != null);
      check(olderField.getType(), newerField.getType());

      _path.removeLast();
    }
  }