/** * Get a map of field names to default values for an Avro schema. * * @param avroRecordSchema The schema to get the map of field names to values. * @return The map. */ public static Map<String, Object> getDefaultValueMap(Schema avroRecordSchema) { List<Field> defaultFields = new ArrayList<Field>(); for (Field f : avroRecordSchema.getFields()) { if (f.defaultValue() != null) { // Need to create a new Field here or we will get // org.apache.avro.AvroRuntimeException: Field already used: // schemaVersion defaultFields.add(new Field(f.name(), f.schema(), f.doc(), f.defaultValue(), f.order())); } } Schema defaultSchema = Schema.createRecord(defaultFields); Schema emptyRecordSchema = Schema.createRecord(new ArrayList<Field>()); DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(emptyRecordSchema); DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(emptyRecordSchema, defaultSchema); GenericRecord emptyRecord = new GenericData.Record(emptyRecordSchema); GenericRecord defaultRecord = AvroUtils.readAvroEntity(AvroUtils.writeAvroEntity(emptyRecord, writer), reader); Map<String, Object> defaultValueMap = new HashMap<String, Object>(); for (Field f : defaultFields) { defaultValueMap.put(f.name(), defaultRecord.get(f.name())); } return defaultValueMap; }
private static int hashCode(HashData data, Schema schema) throws IOException { Decoder decoder = data.decoder; switch (schema.getType()) { case RECORD: { int hashCode = 1; for (Field field : schema.getFields()) { if (field.order() == Field.Order.IGNORE) { GenericDatumReader.skip(field.schema(), decoder); continue; } hashCode = hashCode * 31 + hashCode(data, field.schema()); } return hashCode; } case ENUM: case INT: return decoder.readInt(); case FLOAT: return Float.floatToIntBits(decoder.readFloat()); case LONG: { long l = decoder.readLong(); return (int) (l ^ (l >>> 32)); } case DOUBLE: { long l = Double.doubleToLongBits(decoder.readDouble()); return (int) (l ^ (l >>> 32)); } case ARRAY: { Schema elementType = schema.getElementType(); int hashCode = 1; for (long l = decoder.readArrayStart(); l != 0; l = decoder.arrayNext()) for (long i = 0; i < l; i++) hashCode = hashCode * 31 + hashCode(data, elementType); return hashCode; } case MAP: throw new AvroRuntimeException("Can't hashCode maps!"); case UNION: return hashCode(data, schema.getTypes().get(decoder.readInt())); case FIXED: return hashBytes(1, data, schema.getFixedSize(), false); case STRING: return hashBytes(0, data, decoder.readInt(), false); case BYTES: return hashBytes(1, data, decoder.readInt(), true); case BOOLEAN: return decoder.readBoolean() ? 1231 : 1237; case NULL: return 0; default: throw new AvroRuntimeException("Unexpected schema to hashCode!"); } }
/** * If equal, return the number of bytes consumed. If greater than, return GT, if less than, return * LT. */ private static int compare(Decoders d, Schema schema) throws IOException { Decoder d1 = d.d1; Decoder d2 = d.d2; switch (schema.getType()) { case RECORD: { for (Field field : schema.getFields()) { if (field.order() == Field.Order.IGNORE) { GenericDatumReader.skip(field.schema(), d1); GenericDatumReader.skip(field.schema(), d2); continue; } int c = compare(d, field.schema()); if (c != 0) return (field.order() != Field.Order.DESCENDING) ? c : -c; } return 0; } case ENUM: case INT: { int i1 = d1.readInt(); int i2 = d2.readInt(); return i1 == i2 ? 0 : (i1 > i2 ? 1 : -1); } case LONG: { long l1 = d1.readLong(); long l2 = d2.readLong(); return l1 == l2 ? 0 : (l1 > l2 ? 1 : -1); } case ARRAY: { long i = 0; // position in array long r1 = 0, r2 = 0; // remaining in current block long l1 = 0, l2 = 0; // total array length while (true) { if (r1 == 0) { // refill blocks(s) r1 = d1.readLong(); if (r1 < 0) { r1 = -r1; d1.readLong(); } l1 += r1; } if (r2 == 0) { r2 = d2.readLong(); if (r2 < 0) { r2 = -r2; d2.readLong(); } l2 += r2; } if (r1 == 0 || r2 == 0) // empty block: done return (l1 == l2) ? 0 : ((l1 > l2) ? 1 : -1); long l = Math.min(l1, l2); while (i < l) { // compare to end of block int c = compare(d, schema.getElementType()); if (c != 0) return c; i++; r1--; r2--; } } } case MAP: throw new AvroRuntimeException("Can't compare maps!"); case UNION: { int i1 = d1.readInt(); int i2 = d2.readInt(); if (i1 == i2) { return compare(d, schema.getTypes().get(i1)); } else { return i1 - i2; } } case FIXED: { int size = schema.getFixedSize(); int c = compareBytes(d.d1.getBuf(), d.d1.getPos(), size, d.d2.getBuf(), d.d2.getPos(), size); d.d1.skipFixed(size); d.d2.skipFixed(size); return c; } case STRING: case BYTES: { int l1 = d1.readInt(); int l2 = d2.readInt(); int c = compareBytes(d.d1.getBuf(), d.d1.getPos(), l1, d.d2.getBuf(), d.d2.getPos(), l2); d.d1.skipFixed(l1); d.d2.skipFixed(l2); return c; } case FLOAT: { float f1 = d1.readFloat(); float f2 = d2.readFloat(); return (f1 == f2) ? 0 : ((f1 > f2) ? 1 : -1); } case DOUBLE: { double f1 = d1.readDouble(); double f2 = d2.readDouble(); return (f1 == f2) ? 0 : ((f1 > f2) ? 1 : -1); } case BOOLEAN: boolean b1 = d1.readBoolean(); boolean b2 = d2.readBoolean(); return (b1 == b2) ? 0 : (b1 ? 1 : -1); case NULL: return 0; default: throw new AvroRuntimeException("Unexpected schema to compare!"); } }