private void processArrayField(CommonRecord record, GenericArray array, String fieldName) {
   List<CommonValue> currentArray;
   CommonValue arrayValue = record.getField(fieldName);
   if (arrayValue != null && arrayValue.isArray()) {
     currentArray = arrayValue.getArray().getList();
   } else {
     currentArray = new LinkedList<CommonValue>();
     record.setField(
         fieldName,
         commonFactory.createCommonValue(
             commonFactory.createCommonArray(array.getSchema(), currentArray)));
   }
   if (!array.isEmpty()) {
     Object rawItem = array.get(0);
     if (AvroGenericUtils.isRecord(rawItem)) {
       GenericArray<GenericRecord> recordItems = (GenericArray<GenericRecord>) array;
       // Adding new records
       for (GenericRecord item : recordItems) {
         CommonRecord newRecord = createCommonRecord(item);
         updateRecord(newRecord, item);
         currentArray.add(commonFactory.createCommonValue(newRecord));
       }
     } else if (AvroGenericUtils.isFixed(rawItem)) {
       GenericArray<GenericFixed> fixedItems = (GenericArray<GenericFixed>) array;
       if (AvroGenericUtils.isUuid(rawItem)) {
         // Removing items with given uuids
         for (GenericFixed item : fixedItems) {
           UUID currentUuid = AvroGenericUtils.createUuidFromFixed(item);
           Iterator<CommonValue> valueIt = currentArray.iterator();
           while (valueIt.hasNext()) {
             CommonRecord currentRecord = valueIt.next().getRecord();
             if (currentRecord.getUuid().equals(currentUuid)) {
               valueIt.remove();
               records.remove(currentUuid);
               break;
             }
           }
         }
       } else {
         for (GenericFixed item : fixedItems) {
           currentArray.add(
               commonFactory.createCommonValue(
                   commonFactory.createCommonFixed(item.getSchema(), item.bytes())));
         }
       }
     } else {
       // Adding new primitive items
       for (Object item : array) {
         currentArray.add(commonFactory.createCommonValue(item));
       }
     }
   }
 }
Exemple #2
0
 /** Writes the given Avro datum into the given record, using the given Avro schema */
 private void extractTree(Object datum, Schema schema, Record outputRecord, String prefix) {
   // RECORD, ENUM, ARRAY, MAP, UNION, FIXED, STRING, BYTES, INT, LONG, FLOAT,
   // DOUBLE, BOOLEAN, NULL
   switch (schema.getType()) {
     case RECORD:
       {
         IndexedRecord avroRecord = (IndexedRecord) datum;
         String prefix2 = prefix + "/";
         for (Field field : schema.getFields()) {
           extractTree(
               avroRecord.get(field.pos()),
               field.schema(),
               outputRecord,
               prefix2 + field.name());
         }
         break;
       }
     case ENUM:
       {
         GenericEnumSymbol symbol = (GenericEnumSymbol) datum;
         outputRecord.put(prefix, symbol.toString());
         break;
       }
     case ARRAY:
       {
         Iterator iter = ((Collection) datum).iterator();
         while (iter.hasNext()) {
           extractTree(iter.next(), schema.getElementType(), outputRecord, prefix);
         }
         break;
       }
     case MAP:
       {
         Map<CharSequence, ?> map = (Map<CharSequence, ?>) datum;
         for (Map.Entry<CharSequence, ?> entry : map.entrySet()) {
           extractTree(
               entry.getValue(),
               schema.getValueType(),
               outputRecord,
               prefix + "/" + entry.getKey().toString());
         }
         break;
       }
     case UNION:
       {
         int index = GenericData.get().resolveUnion(schema, datum);
         // String typeName = schema.getTypes().get(index).getName();
         // String prefix2 = prefix + "/" + typeName;
         String prefix2 = prefix;
         extractTree(datum, schema.getTypes().get(index), outputRecord, prefix2);
         break;
       }
     case FIXED:
       {
         GenericFixed fixed = (GenericFixed) datum;
         outputRecord.put(prefix, fixed.bytes());
         // outputRecord.put(prefix, utf8toString(fixed.bytes()));
         break;
       }
     case BYTES:
       {
         ByteBuffer buf = (ByteBuffer) datum;
         int pos = buf.position();
         byte[] bytes = new byte[buf.remaining()];
         buf.get(bytes);
         buf.position(pos); // undo relative read
         outputRecord.put(prefix, bytes);
         // outputRecord.put(prefix, utf8toString(bytes));
         break;
       }
     case STRING:
       {
         outputRecord.put(prefix, datum.toString());
         break;
       }
     case INT:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case LONG:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case FLOAT:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case DOUBLE:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case BOOLEAN:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case NULL:
       {
         break;
       }
     default:
       throw new MorphlineRuntimeException("Unknown Avro schema type: " + schema.getType());
   }
 }
Exemple #3
0
    private Object translate(Object value, DataSchema dataSchema, Schema avroSchema) {
      AvroOverride avroOverride = getAvroOverride(dataSchema);
      if (avroOverride != null) {
        return avroOverride
            .getCustomDataTranslator()
            .avroGenericToData(this, value, avroSchema, dataSchema);
      }

      DataSchema dereferencedDataSchema = dataSchema.getDereferencedDataSchema();
      DataSchema.Type type = dereferencedDataSchema.getType();
      Object result;
      switch (type) {
        case NULL:
          if (value != null) {
            appendMessage("value must be null for null schema");
            result = BAD_RESULT;
            break;
          }
          result = Data.NULL;
          break;
        case BOOLEAN:
          result = ((Boolean) value).booleanValue();
          break;
        case INT:
          result = ((Number) value).intValue();
          break;
        case LONG:
          result = ((Number) value).longValue();
          break;
        case FLOAT:
          result = ((Number) value).floatValue();
          break;
        case DOUBLE:
          result = ((Number) value).doubleValue();
          break;
        case STRING:
          result = value.toString();
          break;
        case BYTES:
          ByteBuffer byteBuffer = (ByteBuffer) value;
          ByteString byteString = ByteString.copy(byteBuffer);
          byteBuffer.rewind();
          result = byteString;
          break;
        case ENUM:
          String enumValue = value.toString();
          EnumDataSchema enumDataSchema = (EnumDataSchema) dereferencedDataSchema;
          if (enumDataSchema.getSymbols().contains(enumValue) == false) {
            appendMessage(
                "enum value %1$s not one of %2$s", enumValue, enumDataSchema.getSymbols());
            result = BAD_RESULT;
            break;
          }
          result = enumValue;
          break;
        case FIXED:
          GenericFixed fixed = (GenericFixed) value;
          byte[] fixedBytes = fixed.bytes();
          FixedDataSchema fixedDataSchema = (FixedDataSchema) dereferencedDataSchema;
          if (fixedDataSchema.getSize() != fixedBytes.length) {
            appendMessage(
                "GenericFixed size %1$d != FixedDataSchema size %2$d",
                fixedBytes.length, fixedDataSchema.getSize());
            result = BAD_RESULT;
            break;
          }
          byteString = ByteString.copy(fixedBytes);
          result = byteString;
          break;
        case MAP:
          @SuppressWarnings("unchecked")
          Map<?, Object> map = (Map<?, Object>) value;
          DataSchema valueDataSchema = ((MapDataSchema) dereferencedDataSchema).getValues();
          Schema valueAvroSchema = avroSchema.getValueType();
          DataMap dataMap = new DataMap(map.size());
          for (Map.Entry<?, Object> entry : map.entrySet()) {
            String key = entry.getKey().toString();
            _path.addLast(key);
            Object entryValue = translate(entry.getValue(), valueDataSchema, valueAvroSchema);
            _path.removeLast();
            dataMap.put(key, entryValue);
          }
          result = dataMap;
          break;
        case ARRAY:
          GenericArray<?> list = (GenericArray<?>) value;
          DataSchema elementDataSchema = ((ArrayDataSchema) dereferencedDataSchema).getItems();
          Schema elementAvroSchema = avroSchema.getElementType();
          DataList dataList = new DataList(list.size());
          for (int i = 0; i < list.size(); i++) {
            _path.addLast(i);
            Object entryValue = translate(list.get(i), elementDataSchema, elementAvroSchema);
            _path.removeLast();
            dataList.add(entryValue);
          }
          result = dataList;
          break;
        case RECORD:
          GenericRecord record = (GenericRecord) value;
          RecordDataSchema recordDataSchema = (RecordDataSchema) dereferencedDataSchema;
          dataMap = new DataMap(avroSchema.getFields().size());
          for (RecordDataSchema.Field field : recordDataSchema.getFields()) {
            String fieldName = field.getName();
            Object fieldValue = record.get(fieldName);
            // fieldValue could be null if the Avro schema does not contain the named field or
            // the field is present with a null value. In either case we do not add a value
            // to the translated DataMap. We do not consider optional/required/default here
            // either (i.e. it is not an error if a required field is missing); the user can
            // later call ValidateDataAgainstSchema with various
            // settings for RequiredMode to obtain the desired behaviour.
            if (fieldValue == null) {
              continue;
            }
            boolean isOptional = field.getOptional();
            DataSchema fieldDataSchema = field.getType();
            Schema fieldAvroSchema = avroSchema.getField(fieldName).schema();
            if (isOptional && (fieldDataSchema.getDereferencedType() != DataSchema.Type.UNION)) {
              // Avro schema should be union with 2 types: null and the field's type.
              Map.Entry<String, Schema> fieldAvroEntry =
                  findUnionMember(fieldDataSchema, fieldAvroSchema);
              if (fieldAvroEntry == null) {
                continue;
              }
              fieldAvroSchema = fieldAvroEntry.getValue();
            }
            _path.addLast(fieldName);
            dataMap.put(fieldName, translate(fieldValue, fieldDataSchema, fieldAvroSchema));
            _path.removeLast();
          }
          result = dataMap;
          break;
        case UNION:
          UnionDataSchema unionDataSchema = (UnionDataSchema) dereferencedDataSchema;
          Map.Entry<DataSchema, Schema> memberSchemas =
              findUnionMemberSchema(value, unionDataSchema, avroSchema);
          if (memberSchemas == null) {
            result = BAD_RESULT;
            break;
          }
          if (value == null) {
            // schema must be "null" schema
            result = Data.NULL;
          } else {
            DataSchema memberDataSchema = memberSchemas.getKey();
            Schema memberAvroSchema = memberSchemas.getValue();
            String key = memberDataSchema.getUnionMemberKey();
            dataMap = new DataMap(1);
            _path.addLast(key);
            dataMap.put(key, translate(value, memberDataSchema, memberAvroSchema));
            _path.removeLast();
            result = dataMap;
          }
          break;
        default:
          appendMessage("schema type unknown %1$s", dereferencedDataSchema.getType());
          result = BAD_RESULT;
          break;
      }
      return result;
    }
 private void processFixedField(CommonRecord record, GenericFixed fixed, String fieldName) {
   record.setField(
       fieldName,
       commonFactory.createCommonValue(
           commonFactory.createCommonFixed(fixed.getSchema(), fixed.bytes())));
 }