示例#1
0
  /**
   * Returns true if the types of two avro schemas are equal. This ignores things like custom field
   * properties that the equals() implementation of Schema checks.
   *
   * @param schema1 The first schema to compare
   * @param schema2 The second schema to compare
   * @return True if the types are equal, otherwise false.
   */
  public static boolean avroSchemaTypesEqual(Schema schema1, Schema schema2) {
    if (schema1.getType() != schema2.getType()) {
      // if the types aren't equal, no need to go further. Return false
      return false;
    }

    if (schema1.getType() == Schema.Type.ENUM || schema1.getType() == Schema.Type.FIXED) {
      // Enum and Fixed types schemas should be equal using the Schema.equals
      // method.
      return schema1.equals(schema2);
    }
    if (schema1.getType() == Schema.Type.ARRAY) {
      // Avro element schemas should be equal, which is tested by recursively
      // calling this method.
      return avroSchemaTypesEqual(schema1.getElementType(), schema2.getElementType());
    } else if (schema1.getType() == Schema.Type.MAP) {
      // Map type values schemas should be equal, which is tested by recursively
      // calling this method.
      return avroSchemaTypesEqual(schema1.getValueType(), schema2.getValueType());
    } else if (schema1.getType() == Schema.Type.UNION) {
      // Compare Union fields in the same position by comparing their schemas
      // recursively calling this method.
      if (schema1.getTypes().size() != schema2.getTypes().size()) {
        return false;
      }
      for (int i = 0; i < schema1.getTypes().size(); i++) {
        if (!avroSchemaTypesEqual(schema1.getTypes().get(i), schema2.getTypes().get(i))) {
          return false;
        }
      }
      return true;
    } else if (schema1.getType() == Schema.Type.RECORD) {
      // Compare record fields that match in name by comparing their schemas
      // recursively calling this method.
      if (schema1.getFields().size() != schema2.getFields().size()) {
        return false;
      }
      for (Field field1 : schema1.getFields()) {
        Field field2 = schema2.getField(field1.name());
        if (field2 == null) {
          return false;
        }
        if (!avroSchemaTypesEqual(field1.schema(), field2.schema())) {
          return false;
        }
      }
      return true;
    } else {
      // All other types are primitive, so them matching in type is enough.
      return true;
    }
  }
示例#2
0
  /**
   * Called by {@link #containsRecursiveRecord(Schema)} and it recursively checks whether the input
   * schema contains recursive records.
   */
  protected static boolean containsRecursiveRecord(Schema s, Set<String> definedRecordNames) {

    /* if it is a record, check itself and all fields*/
    if (s.getType().equals(Schema.Type.RECORD)) {
      String name = s.getName();
      if (definedRecordNames.contains(name)) return true;

      /* add its own name into defined record set*/
      definedRecordNames.add(s.getName());

      /* check all fields */
      List<Field> fields = s.getFields();
      for (Field field : fields) {
        Schema fs = field.schema();
        if (containsRecursiveRecord(fs, definedRecordNames)) return true;
      }

      /* remove its own name from the name set */
      definedRecordNames.remove(s.getName());

      return false;
    }

    /* if it is an array, check its element type */
    else if (s.getType().equals(Schema.Type.ARRAY)) {
      Schema fs = s.getElementType();
      return containsRecursiveRecord(fs, definedRecordNames);
    }

    /*if it is a map, check its value type */
    else if (s.getType().equals(Schema.Type.MAP)) {
      Schema vs = s.getValueType();
      return containsRecursiveRecord(vs, definedRecordNames);
    }

    /* if it is a union, check all possible types */
    else if (s.getType().equals(Schema.Type.UNION)) {
      List<Schema> types = s.getTypes();
      for (Schema type : types) {
        if (containsRecursiveRecord(type, definedRecordNames)) return true;
      }
      return false;
    }

    /* return false for other cases */
    else {
      return false;
    }
  }
示例#3
0
  /** determine whether the input schema contains generic unions */
  public static boolean containsGenericUnion(Schema s) {

    /* if it is a record, check all fields*/
    if (s.getType().equals(Schema.Type.RECORD)) {
      List<Field> fields = s.getFields();
      for (Field field : fields) {
        Schema fs = field.schema();
        if (containsGenericUnion(fs)) return true;
      }
      return false;
    }

    /* if it is an array, check its element type */
    else if (s.getType().equals(Schema.Type.ARRAY)) {
      Schema fs = s.getElementType();
      return containsGenericUnion(fs);
    }

    /*if it is a map, check its value type */
    else if (s.getType().equals(Schema.Type.MAP)) {
      Schema vs = s.getValueType();
      return containsGenericUnion(vs);
    }

    /* if it is a union, check all possible types and itself */
    else if (s.getType().equals(Schema.Type.UNION)) {
      List<Schema> types = s.getTypes();
      for (Schema type : types) {
        if (containsGenericUnion(type)) return true;
      }
      /* check whether itself is acceptable (null-union) */
      return !isAcceptableUnion(s);
    }

    /* return false for other cases */
    else {
      return false;
    }
  }
示例#4
0
 /** Writes the given Avro datum into the given record, using the given Avro schema */
 private void extractTree(Object datum, Schema schema, Record outputRecord, String prefix) {
   // RECORD, ENUM, ARRAY, MAP, UNION, FIXED, STRING, BYTES, INT, LONG, FLOAT,
   // DOUBLE, BOOLEAN, NULL
   switch (schema.getType()) {
     case RECORD:
       {
         IndexedRecord avroRecord = (IndexedRecord) datum;
         String prefix2 = prefix + "/";
         for (Field field : schema.getFields()) {
           extractTree(
               avroRecord.get(field.pos()),
               field.schema(),
               outputRecord,
               prefix2 + field.name());
         }
         break;
       }
     case ENUM:
       {
         GenericEnumSymbol symbol = (GenericEnumSymbol) datum;
         outputRecord.put(prefix, symbol.toString());
         break;
       }
     case ARRAY:
       {
         Iterator iter = ((Collection) datum).iterator();
         while (iter.hasNext()) {
           extractTree(iter.next(), schema.getElementType(), outputRecord, prefix);
         }
         break;
       }
     case MAP:
       {
         Map<CharSequence, ?> map = (Map<CharSequence, ?>) datum;
         for (Map.Entry<CharSequence, ?> entry : map.entrySet()) {
           extractTree(
               entry.getValue(),
               schema.getValueType(),
               outputRecord,
               prefix + "/" + entry.getKey().toString());
         }
         break;
       }
     case UNION:
       {
         int index = GenericData.get().resolveUnion(schema, datum);
         // String typeName = schema.getTypes().get(index).getName();
         // String prefix2 = prefix + "/" + typeName;
         String prefix2 = prefix;
         extractTree(datum, schema.getTypes().get(index), outputRecord, prefix2);
         break;
       }
     case FIXED:
       {
         GenericFixed fixed = (GenericFixed) datum;
         outputRecord.put(prefix, fixed.bytes());
         // outputRecord.put(prefix, utf8toString(fixed.bytes()));
         break;
       }
     case BYTES:
       {
         ByteBuffer buf = (ByteBuffer) datum;
         int pos = buf.position();
         byte[] bytes = new byte[buf.remaining()];
         buf.get(bytes);
         buf.position(pos); // undo relative read
         outputRecord.put(prefix, bytes);
         // outputRecord.put(prefix, utf8toString(bytes));
         break;
       }
     case STRING:
       {
         outputRecord.put(prefix, datum.toString());
         break;
       }
     case INT:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case LONG:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case FLOAT:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case DOUBLE:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case BOOLEAN:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case NULL:
       {
         break;
       }
     default:
       throw new MorphlineRuntimeException("Unknown Avro schema type: " + schema.getType());
   }
 }
示例#5
0
    private Object translate(Object value, DataSchema dataSchema, Schema avroSchema) {
      AvroOverride avroOverride = getAvroOverride(dataSchema);
      if (avroOverride != null) {
        return avroOverride
            .getCustomDataTranslator()
            .dataToAvroGeneric(this, value, dataSchema, avroSchema);
      }

      DataSchema dereferencedDataSchema = dataSchema.getDereferencedDataSchema();
      DataSchema.Type type = dereferencedDataSchema.getType();
      Object result;
      switch (type) {
        case NULL:
          if (value != Data.NULL) {
            appendMessage("value must be null for null schema");
            result = BAD_RESULT;
            break;
          }
          result = null;
          break;
        case BOOLEAN:
          result = ((Boolean) value).booleanValue();
          break;
        case INT:
          result = ((Number) value).intValue();
          break;
        case LONG:
          result = ((Number) value).longValue();
          break;
        case FLOAT:
          result = ((Number) value).floatValue();
          break;
        case DOUBLE:
          result = ((Number) value).doubleValue();
          break;
        case STRING:
          result = new Utf8((String) value);
          break;
        case BYTES:
          result = ByteBuffer.wrap(translateBytes(value));
          break;
        case ENUM:
          String enumValue = value.toString();
          EnumDataSchema enumDataSchema = (EnumDataSchema) dereferencedDataSchema;
          if (enumDataSchema.getSymbols().contains(enumValue) == false) {
            appendMessage(
                "enum value %1$s not one of %2$s", enumValue, enumDataSchema.getSymbols());
            result = BAD_RESULT;
            break;
          }
          result = _avroAdapter.createEnumSymbol(avroSchema, enumValue);
          break;
        case FIXED:
          byte[] bytes = translateBytes(value);
          FixedDataSchema fixedDataSchema = (FixedDataSchema) dereferencedDataSchema;
          if (fixedDataSchema.getSize() != bytes.length) {
            appendMessage(
                "ByteString size %1$d != FixedDataSchema size %2$d",
                bytes.length, fixedDataSchema.getSize());
            result = null;
            break;
          }
          GenericData.Fixed fixed = new GenericData.Fixed(avroSchema);
          fixed.bytes(bytes);
          result = fixed;
          break;
        case MAP:
          DataMap map = (DataMap) value;
          DataSchema valueDataSchema = ((MapDataSchema) dereferencedDataSchema).getValues();
          Schema valueAvroSchema = avroSchema.getValueType();
          Map<String, Object> avroMap = new HashMap<String, Object>(map.size());
          for (Map.Entry<String, Object> entry : map.entrySet()) {
            String key = entry.getKey();
            _path.addLast(key);
            Object entryAvroValue = translate(entry.getValue(), valueDataSchema, valueAvroSchema);
            _path.removeLast();
            avroMap.put(key, entryAvroValue);
          }
          result = avroMap;
          break;
        case ARRAY:
          DataList list = (DataList) value;
          DataSchema elementDataSchema = ((ArrayDataSchema) dereferencedDataSchema).getItems();
          Schema elementAvroSchema = avroSchema.getElementType();
          GenericData.Array<Object> avroList =
              new GenericData.Array<Object>(list.size(), avroSchema);
          for (int i = 0; i < list.size(); i++) {
            _path.addLast(i);
            Object entryAvroValue = translate(list.get(i), elementDataSchema, elementAvroSchema);
            _path.removeLast();
            avroList.add(entryAvroValue);
          }
          result = avroList;
          break;
        case RECORD:
          map = (DataMap) value;
          RecordDataSchema recordDataSchema = (RecordDataSchema) dereferencedDataSchema;
          GenericData.Record avroRecord = new GenericData.Record(avroSchema);
          for (RecordDataSchema.Field field : recordDataSchema.getFields()) {
            String fieldName = field.getName();
            DataSchema fieldDataSchema = field.getType();
            Schema.Field avroField = avroSchema.getField(fieldName);
            if (avroField == null) {
              // field present in input but there is no field for it in Avro schema.
              // TODO: Whether and how to indicate this condition to clients.
              continue;
            }
            _path.addLast(fieldName);
            Schema fieldAvroSchema = avroField.schema();
            Object fieldValue = map.get(fieldName);
            boolean isOptional = field.getOptional();
            if (isOptional) {
              if (fieldDataSchema.getDereferencedType() != DataSchema.Type.UNION) {
                if (fieldValue == null) {
                  fieldValue = Data.NULL;
                  fieldDataSchema = DataSchemaConstants.NULL_DATA_SCHEMA;
                }
                Map.Entry<String, Schema> fieldAvroEntry =
                    findUnionMember(fieldDataSchema, fieldAvroSchema);
                if (fieldAvroEntry == null) {
                  _path.removeLast();
                  continue;
                }
                fieldAvroSchema = fieldAvroEntry.getValue();
              } else {
                // already a union
                if (fieldValue == null) {
                  // field is not present
                  fieldValue = Data.NULL;
                  fieldDataSchema = DataSchemaConstants.NULL_DATA_SCHEMA;
                }
              }
            } else {
              if (fieldValue == null) {
                appendMessage("required field is absent");
                _path.removeLast();
                continue;
              }
            }
            Object fieldAvroValue = translate(fieldValue, fieldDataSchema, fieldAvroSchema);
            avroRecord.put(fieldName, fieldAvroValue);
            _path.removeLast();
          }
          result = avroRecord;
          break;
        case UNION:
          UnionDataSchema unionDataSchema = (UnionDataSchema) dereferencedDataSchema;
          String key;
          Object memberValue;
          if (value == Data.NULL) {
            key = DataSchemaConstants.NULL_TYPE;
            memberValue = Data.NULL;
          } else {
            map = (DataMap) value;
            Map.Entry<String, Object> entry = map.entrySet().iterator().next();
            key = entry.getKey();
            memberValue = entry.getValue();
          }
          DataSchema memberDataSchema = unionDataSchema.getType(key);
          Map.Entry<String, Schema> memberAvroEntry = findUnionMember(memberDataSchema, avroSchema);
          if (memberAvroEntry == null) {
            result = BAD_RESULT;
            break;
          }
          Schema memberAvroSchema = memberAvroEntry.getValue();
          _path.addLast(memberAvroEntry.getKey());
          Object memberAvroValue = translate(memberValue, memberDataSchema, memberAvroSchema);
          _path.removeLast();
          result = memberAvroValue;
          break;
        default:
          appendMessage("schema type unknown %1$s", dereferencedDataSchema.getType());
          result = BAD_RESULT;
          break;
      }
      return result;
    }
示例#6
0
    private Object translate(Object value, DataSchema dataSchema, Schema avroSchema) {
      AvroOverride avroOverride = getAvroOverride(dataSchema);
      if (avroOverride != null) {
        return avroOverride
            .getCustomDataTranslator()
            .avroGenericToData(this, value, avroSchema, dataSchema);
      }

      DataSchema dereferencedDataSchema = dataSchema.getDereferencedDataSchema();
      DataSchema.Type type = dereferencedDataSchema.getType();
      Object result;
      switch (type) {
        case NULL:
          if (value != null) {
            appendMessage("value must be null for null schema");
            result = BAD_RESULT;
            break;
          }
          result = Data.NULL;
          break;
        case BOOLEAN:
          result = ((Boolean) value).booleanValue();
          break;
        case INT:
          result = ((Number) value).intValue();
          break;
        case LONG:
          result = ((Number) value).longValue();
          break;
        case FLOAT:
          result = ((Number) value).floatValue();
          break;
        case DOUBLE:
          result = ((Number) value).doubleValue();
          break;
        case STRING:
          result = value.toString();
          break;
        case BYTES:
          ByteBuffer byteBuffer = (ByteBuffer) value;
          ByteString byteString = ByteString.copy(byteBuffer);
          byteBuffer.rewind();
          result = byteString;
          break;
        case ENUM:
          String enumValue = value.toString();
          EnumDataSchema enumDataSchema = (EnumDataSchema) dereferencedDataSchema;
          if (enumDataSchema.getSymbols().contains(enumValue) == false) {
            appendMessage(
                "enum value %1$s not one of %2$s", enumValue, enumDataSchema.getSymbols());
            result = BAD_RESULT;
            break;
          }
          result = enumValue;
          break;
        case FIXED:
          GenericFixed fixed = (GenericFixed) value;
          byte[] fixedBytes = fixed.bytes();
          FixedDataSchema fixedDataSchema = (FixedDataSchema) dereferencedDataSchema;
          if (fixedDataSchema.getSize() != fixedBytes.length) {
            appendMessage(
                "GenericFixed size %1$d != FixedDataSchema size %2$d",
                fixedBytes.length, fixedDataSchema.getSize());
            result = BAD_RESULT;
            break;
          }
          byteString = ByteString.copy(fixedBytes);
          result = byteString;
          break;
        case MAP:
          @SuppressWarnings("unchecked")
          Map<?, Object> map = (Map<?, Object>) value;
          DataSchema valueDataSchema = ((MapDataSchema) dereferencedDataSchema).getValues();
          Schema valueAvroSchema = avroSchema.getValueType();
          DataMap dataMap = new DataMap(map.size());
          for (Map.Entry<?, Object> entry : map.entrySet()) {
            String key = entry.getKey().toString();
            _path.addLast(key);
            Object entryValue = translate(entry.getValue(), valueDataSchema, valueAvroSchema);
            _path.removeLast();
            dataMap.put(key, entryValue);
          }
          result = dataMap;
          break;
        case ARRAY:
          GenericArray<?> list = (GenericArray<?>) value;
          DataSchema elementDataSchema = ((ArrayDataSchema) dereferencedDataSchema).getItems();
          Schema elementAvroSchema = avroSchema.getElementType();
          DataList dataList = new DataList(list.size());
          for (int i = 0; i < list.size(); i++) {
            _path.addLast(i);
            Object entryValue = translate(list.get(i), elementDataSchema, elementAvroSchema);
            _path.removeLast();
            dataList.add(entryValue);
          }
          result = dataList;
          break;
        case RECORD:
          GenericRecord record = (GenericRecord) value;
          RecordDataSchema recordDataSchema = (RecordDataSchema) dereferencedDataSchema;
          dataMap = new DataMap(avroSchema.getFields().size());
          for (RecordDataSchema.Field field : recordDataSchema.getFields()) {
            String fieldName = field.getName();
            Object fieldValue = record.get(fieldName);
            // fieldValue could be null if the Avro schema does not contain the named field or
            // the field is present with a null value. In either case we do not add a value
            // to the translated DataMap. We do not consider optional/required/default here
            // either (i.e. it is not an error if a required field is missing); the user can
            // later call ValidateDataAgainstSchema with various
            // settings for RequiredMode to obtain the desired behaviour.
            if (fieldValue == null) {
              continue;
            }
            boolean isOptional = field.getOptional();
            DataSchema fieldDataSchema = field.getType();
            Schema fieldAvroSchema = avroSchema.getField(fieldName).schema();
            if (isOptional && (fieldDataSchema.getDereferencedType() != DataSchema.Type.UNION)) {
              // Avro schema should be union with 2 types: null and the field's type.
              Map.Entry<String, Schema> fieldAvroEntry =
                  findUnionMember(fieldDataSchema, fieldAvroSchema);
              if (fieldAvroEntry == null) {
                continue;
              }
              fieldAvroSchema = fieldAvroEntry.getValue();
            }
            _path.addLast(fieldName);
            dataMap.put(fieldName, translate(fieldValue, fieldDataSchema, fieldAvroSchema));
            _path.removeLast();
          }
          result = dataMap;
          break;
        case UNION:
          UnionDataSchema unionDataSchema = (UnionDataSchema) dereferencedDataSchema;
          Map.Entry<DataSchema, Schema> memberSchemas =
              findUnionMemberSchema(value, unionDataSchema, avroSchema);
          if (memberSchemas == null) {
            result = BAD_RESULT;
            break;
          }
          if (value == null) {
            // schema must be "null" schema
            result = Data.NULL;
          } else {
            DataSchema memberDataSchema = memberSchemas.getKey();
            Schema memberAvroSchema = memberSchemas.getValue();
            String key = memberDataSchema.getUnionMemberKey();
            dataMap = new DataMap(1);
            _path.addLast(key);
            dataMap.put(key, translate(value, memberDataSchema, memberAvroSchema));
            _path.removeLast();
            result = dataMap;
          }
          break;
        default:
          appendMessage("schema type unknown %1$s", dereferencedDataSchema.getType());
          result = BAD_RESULT;
          break;
      }
      return result;
    }