private void processArrayField(CommonRecord record, GenericArray array, String fieldName) { List<CommonValue> currentArray; CommonValue arrayValue = record.getField(fieldName); if (arrayValue != null && arrayValue.isArray()) { currentArray = arrayValue.getArray().getList(); } else { currentArray = new LinkedList<CommonValue>(); record.setField( fieldName, commonFactory.createCommonValue( commonFactory.createCommonArray(array.getSchema(), currentArray))); } if (!array.isEmpty()) { Object rawItem = array.get(0); if (AvroGenericUtils.isRecord(rawItem)) { GenericArray<GenericRecord> recordItems = (GenericArray<GenericRecord>) array; // Adding new records for (GenericRecord item : recordItems) { CommonRecord newRecord = createCommonRecord(item); updateRecord(newRecord, item); currentArray.add(commonFactory.createCommonValue(newRecord)); } } else if (AvroGenericUtils.isFixed(rawItem)) { GenericArray<GenericFixed> fixedItems = (GenericArray<GenericFixed>) array; if (AvroGenericUtils.isUuid(rawItem)) { // Removing items with given uuids for (GenericFixed item : fixedItems) { UUID currentUuid = AvroGenericUtils.createUuidFromFixed(item); Iterator<CommonValue> valueIt = currentArray.iterator(); while (valueIt.hasNext()) { CommonRecord currentRecord = valueIt.next().getRecord(); if (currentRecord.getUuid().equals(currentUuid)) { valueIt.remove(); records.remove(currentUuid); break; } } } } else { for (GenericFixed item : fixedItems) { currentArray.add( commonFactory.createCommonValue( commonFactory.createCommonFixed(item.getSchema(), item.bytes()))); } } } else { // Adding new primitive items for (Object item : array) { currentArray.add(commonFactory.createCommonValue(item)); } } } }
@Override public synchronized void onDeltaReceived(int index, GenericRecord data, boolean fullResync) { GenericFixed uuidFixed = (GenericFixed) data.get(UUID); UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed); if (LOG.isDebugEnabled()) { LOG.debug("Processing delta with uuid {}", uuidFixed.toString()); } CommonRecord currentRecord = null; if (!fullResync && records.containsKey(uuid)) { currentRecord = records.get(uuid); } else { records.clear(); currentRecord = createCommonRecord(data); rootRecord = currentRecord; } updateRecord(currentRecord, data); }
/** Writes the given Avro datum into the given record, using the given Avro schema */ private void extractTree(Object datum, Schema schema, Record outputRecord, String prefix) { // RECORD, ENUM, ARRAY, MAP, UNION, FIXED, STRING, BYTES, INT, LONG, FLOAT, // DOUBLE, BOOLEAN, NULL switch (schema.getType()) { case RECORD: { IndexedRecord avroRecord = (IndexedRecord) datum; String prefix2 = prefix + "/"; for (Field field : schema.getFields()) { extractTree( avroRecord.get(field.pos()), field.schema(), outputRecord, prefix2 + field.name()); } break; } case ENUM: { GenericEnumSymbol symbol = (GenericEnumSymbol) datum; outputRecord.put(prefix, symbol.toString()); break; } case ARRAY: { Iterator iter = ((Collection) datum).iterator(); while (iter.hasNext()) { extractTree(iter.next(), schema.getElementType(), outputRecord, prefix); } break; } case MAP: { Map<CharSequence, ?> map = (Map<CharSequence, ?>) datum; for (Map.Entry<CharSequence, ?> entry : map.entrySet()) { extractTree( entry.getValue(), schema.getValueType(), outputRecord, prefix + "/" + entry.getKey().toString()); } break; } case UNION: { int index = GenericData.get().resolveUnion(schema, datum); // String typeName = schema.getTypes().get(index).getName(); // String prefix2 = prefix + "/" + typeName; String prefix2 = prefix; extractTree(datum, schema.getTypes().get(index), outputRecord, prefix2); break; } case FIXED: { GenericFixed fixed = (GenericFixed) datum; outputRecord.put(prefix, fixed.bytes()); // outputRecord.put(prefix, utf8toString(fixed.bytes())); break; } case BYTES: { ByteBuffer buf = (ByteBuffer) datum; int pos = buf.position(); byte[] bytes = new byte[buf.remaining()]; buf.get(bytes); buf.position(pos); // undo relative read outputRecord.put(prefix, bytes); // outputRecord.put(prefix, utf8toString(bytes)); break; } case STRING: { outputRecord.put(prefix, datum.toString()); break; } case INT: { outputRecord.put(prefix, datum); break; } case LONG: { outputRecord.put(prefix, datum); break; } case FLOAT: { outputRecord.put(prefix, datum); break; } case DOUBLE: { outputRecord.put(prefix, datum); break; } case BOOLEAN: { outputRecord.put(prefix, datum); break; } case NULL: { break; } default: throw new MorphlineRuntimeException("Unknown Avro schema type: " + schema.getType()); } }
private Object translate(Object value, DataSchema dataSchema, Schema avroSchema) { AvroOverride avroOverride = getAvroOverride(dataSchema); if (avroOverride != null) { return avroOverride .getCustomDataTranslator() .avroGenericToData(this, value, avroSchema, dataSchema); } DataSchema dereferencedDataSchema = dataSchema.getDereferencedDataSchema(); DataSchema.Type type = dereferencedDataSchema.getType(); Object result; switch (type) { case NULL: if (value != null) { appendMessage("value must be null for null schema"); result = BAD_RESULT; break; } result = Data.NULL; break; case BOOLEAN: result = ((Boolean) value).booleanValue(); break; case INT: result = ((Number) value).intValue(); break; case LONG: result = ((Number) value).longValue(); break; case FLOAT: result = ((Number) value).floatValue(); break; case DOUBLE: result = ((Number) value).doubleValue(); break; case STRING: result = value.toString(); break; case BYTES: ByteBuffer byteBuffer = (ByteBuffer) value; ByteString byteString = ByteString.copy(byteBuffer); byteBuffer.rewind(); result = byteString; break; case ENUM: String enumValue = value.toString(); EnumDataSchema enumDataSchema = (EnumDataSchema) dereferencedDataSchema; if (enumDataSchema.getSymbols().contains(enumValue) == false) { appendMessage( "enum value %1$s not one of %2$s", enumValue, enumDataSchema.getSymbols()); result = BAD_RESULT; break; } result = enumValue; break; case FIXED: GenericFixed fixed = (GenericFixed) value; byte[] fixedBytes = fixed.bytes(); FixedDataSchema fixedDataSchema = (FixedDataSchema) dereferencedDataSchema; if (fixedDataSchema.getSize() != fixedBytes.length) { appendMessage( "GenericFixed size %1$d != FixedDataSchema size %2$d", fixedBytes.length, fixedDataSchema.getSize()); result = BAD_RESULT; break; } byteString = ByteString.copy(fixedBytes); result = byteString; break; case MAP: @SuppressWarnings("unchecked") Map<?, Object> map = (Map<?, Object>) value; DataSchema valueDataSchema = ((MapDataSchema) dereferencedDataSchema).getValues(); Schema valueAvroSchema = avroSchema.getValueType(); DataMap dataMap = new DataMap(map.size()); for (Map.Entry<?, Object> entry : map.entrySet()) { String key = entry.getKey().toString(); _path.addLast(key); Object entryValue = translate(entry.getValue(), valueDataSchema, valueAvroSchema); _path.removeLast(); dataMap.put(key, entryValue); } result = dataMap; break; case ARRAY: GenericArray<?> list = (GenericArray<?>) value; DataSchema elementDataSchema = ((ArrayDataSchema) dereferencedDataSchema).getItems(); Schema elementAvroSchema = avroSchema.getElementType(); DataList dataList = new DataList(list.size()); for (int i = 0; i < list.size(); i++) { _path.addLast(i); Object entryValue = translate(list.get(i), elementDataSchema, elementAvroSchema); _path.removeLast(); dataList.add(entryValue); } result = dataList; break; case RECORD: GenericRecord record = (GenericRecord) value; RecordDataSchema recordDataSchema = (RecordDataSchema) dereferencedDataSchema; dataMap = new DataMap(avroSchema.getFields().size()); for (RecordDataSchema.Field field : recordDataSchema.getFields()) { String fieldName = field.getName(); Object fieldValue = record.get(fieldName); // fieldValue could be null if the Avro schema does not contain the named field or // the field is present with a null value. In either case we do not add a value // to the translated DataMap. We do not consider optional/required/default here // either (i.e. it is not an error if a required field is missing); the user can // later call ValidateDataAgainstSchema with various // settings for RequiredMode to obtain the desired behaviour. if (fieldValue == null) { continue; } boolean isOptional = field.getOptional(); DataSchema fieldDataSchema = field.getType(); Schema fieldAvroSchema = avroSchema.getField(fieldName).schema(); if (isOptional && (fieldDataSchema.getDereferencedType() != DataSchema.Type.UNION)) { // Avro schema should be union with 2 types: null and the field's type. Map.Entry<String, Schema> fieldAvroEntry = findUnionMember(fieldDataSchema, fieldAvroSchema); if (fieldAvroEntry == null) { continue; } fieldAvroSchema = fieldAvroEntry.getValue(); } _path.addLast(fieldName); dataMap.put(fieldName, translate(fieldValue, fieldDataSchema, fieldAvroSchema)); _path.removeLast(); } result = dataMap; break; case UNION: UnionDataSchema unionDataSchema = (UnionDataSchema) dereferencedDataSchema; Map.Entry<DataSchema, Schema> memberSchemas = findUnionMemberSchema(value, unionDataSchema, avroSchema); if (memberSchemas == null) { result = BAD_RESULT; break; } if (value == null) { // schema must be "null" schema result = Data.NULL; } else { DataSchema memberDataSchema = memberSchemas.getKey(); Schema memberAvroSchema = memberSchemas.getValue(); String key = memberDataSchema.getUnionMemberKey(); dataMap = new DataMap(1); _path.addLast(key); dataMap.put(key, translate(value, memberDataSchema, memberAvroSchema)); _path.removeLast(); result = dataMap; } break; default: appendMessage("schema type unknown %1$s", dereferencedDataSchema.getType()); result = BAD_RESULT; break; } return result; }
private void processFixedField(CommonRecord record, GenericFixed fixed, String fieldName) { record.setField( fieldName, commonFactory.createCommonValue( commonFactory.createCommonFixed(fixed.getSchema(), fixed.bytes()))); }