/** * Get a map of field names to default values for an Avro schema. * * @param avroRecordSchema The schema to get the map of field names to values. * @return The map. */ public static Map<String, Object> getDefaultValueMap(Schema avroRecordSchema) { List<Field> defaultFields = new ArrayList<Field>(); for (Field f : avroRecordSchema.getFields()) { if (f.defaultValue() != null) { // Need to create a new Field here or we will get // org.apache.avro.AvroRuntimeException: Field already used: // schemaVersion defaultFields.add(new Field(f.name(), f.schema(), f.doc(), f.defaultValue(), f.order())); } } Schema defaultSchema = Schema.createRecord(defaultFields); Schema emptyRecordSchema = Schema.createRecord(new ArrayList<Field>()); DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(emptyRecordSchema); DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(emptyRecordSchema, defaultSchema); GenericRecord emptyRecord = new GenericData.Record(emptyRecordSchema); GenericRecord defaultRecord = AvroUtils.readAvroEntity(AvroUtils.writeAvroEntity(emptyRecord, writer), reader); Map<String, Object> defaultValueMap = new HashMap<String, Object>(); for (Field f : defaultFields) { defaultValueMap.put(f.name(), defaultRecord.get(f.name())); } return defaultValueMap; }
@Test public void testDeepCopy() { // Set all non-default fields in an Interop instance: Interop.Builder interopBuilder = Interop.newBuilder(); interopBuilder.setArrayField(Arrays.asList(new Double[] {1.1, 1.2, 1.3, 1.4})); interopBuilder.setBoolField(true); interopBuilder.setBytesField(ByteBuffer.wrap(new byte[] {1, 2, 3, 4})); interopBuilder.setDoubleField(3.14d); interopBuilder.setEnumField(Kind.B); interopBuilder.setFixedField( new MD5(new byte[] {4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1})); interopBuilder.setFloatField(6.022f); interopBuilder.setIntField(32); interopBuilder.setLongField(64L); Map<java.lang.String, org.apache.avro.Foo> map = new HashMap<java.lang.String, org.apache.avro.Foo>(1); map.put("foo", Foo.newBuilder().setLabel("bar").build()); interopBuilder.setMapField(map); interopBuilder.setNullField(null); Node.Builder rootBuilder = Node.newBuilder().setLabel("/"); Node.Builder homeBuilder = Node.newBuilder().setLabel("home"); homeBuilder.setChildren(new ArrayList<Node>(0)); rootBuilder.setChildren(Arrays.asList(new Node[] {homeBuilder.build()})); interopBuilder.setRecordField(rootBuilder.build()); interopBuilder.setStringField("Hello"); interopBuilder.setUnionField(true); Interop interop = interopBuilder.build(); // Verify that deepCopy works for all fields: for (Field field : Interop.SCHEMA$.getFields()) { // Original field and deep copy should be equivalent: if (interop.get(field.pos()) instanceof ByteBuffer) { assertTrue( Arrays.equals( ((ByteBuffer) interop.get(field.pos())).array(), ((ByteBuffer) GenericData.get().deepCopy(field.schema(), interop.get(field.pos()))) .array())); } else { assertEquals( interop.get(field.pos()), SpecificData.get().deepCopy(field.schema(), interop.get(field.pos()))); } // Original field and deep copy should be different instances: if ((field.schema().getType() != Type.ENUM) && (field.schema().getType() != Type.NULL) && (field.schema().getType() != Type.STRING)) { assertFalse( "Field " + field.name() + " is same instance in deep copy", interop.get(field.pos()) == GenericData.get().deepCopy(field.schema(), interop.get(field.pos()))); } } }
/* * for each avro field, search for nested schema. * if field is nested, create tree and recursive * else fetch field as a element */ private static void fetchToTree(Field field, Tree parent) { // System.out.println(" Field: " + field.name()); if (field.schema().getType().toString().equalsIgnoreCase("RECORD")) { // if(!multipleData) // multipleData = true; // Tree child = new Tree(parent); child.setName(field.name()); List<Field> list = field.schema().getFields(); Iterator<Field> it = list.iterator(); while (it.hasNext()) { Field fieldOfField = it.next(); fetchToTree(fieldOfField, child); } parent.getTrees().add(child); } else if (field.schema().getType().getName().equalsIgnoreCase("ARRAY")) { if (field.schema().getElementType().getType().name().toString().equalsIgnoreCase("RECORD")) { if (!multipleData) multipleData = true; Schema arraySchema = field.schema().getElementType(); Tree childParent = new Tree(parent); childParent.setName(field.name()); // employee // parent.getTrees().add(childParent); // Tree child = new Tree(childParent); // child.setName(arraySchema.getName());//employeerecord List<Field> list = arraySchema.getFields(); Iterator<Field> it = list.iterator(); while (it.hasNext()) { Field fieldOfField = it.next(); fetchToTree(fieldOfField, childParent); } parent.getTrees().add(childParent); } } else { Element elementNew = new Element(parent); elementNew.setName(field.name()); parent.getElements().add(elementNew); } // return parent; }
/** * Returns true if the types of two avro schemas are equal. This ignores things like custom field * properties that the equals() implementation of Schema checks. * * @param schema1 The first schema to compare * @param schema2 The second schema to compare * @return True if the types are equal, otherwise false. */ public static boolean avroSchemaTypesEqual(Schema schema1, Schema schema2) { if (schema1.getType() != schema2.getType()) { // if the types aren't equal, no need to go further. Return false return false; } if (schema1.getType() == Schema.Type.ENUM || schema1.getType() == Schema.Type.FIXED) { // Enum and Fixed types schemas should be equal using the Schema.equals // method. return schema1.equals(schema2); } if (schema1.getType() == Schema.Type.ARRAY) { // Avro element schemas should be equal, which is tested by recursively // calling this method. return avroSchemaTypesEqual(schema1.getElementType(), schema2.getElementType()); } else if (schema1.getType() == Schema.Type.MAP) { // Map type values schemas should be equal, which is tested by recursively // calling this method. return avroSchemaTypesEqual(schema1.getValueType(), schema2.getValueType()); } else if (schema1.getType() == Schema.Type.UNION) { // Compare Union fields in the same position by comparing their schemas // recursively calling this method. if (schema1.getTypes().size() != schema2.getTypes().size()) { return false; } for (int i = 0; i < schema1.getTypes().size(); i++) { if (!avroSchemaTypesEqual(schema1.getTypes().get(i), schema2.getTypes().get(i))) { return false; } } return true; } else if (schema1.getType() == Schema.Type.RECORD) { // Compare record fields that match in name by comparing their schemas // recursively calling this method. if (schema1.getFields().size() != schema2.getFields().size()) { return false; } for (Field field1 : schema1.getFields()) { Field field2 = schema2.getField(field1.name()); if (field2 == null) { return false; } if (!avroSchemaTypesEqual(field1.schema(), field2.schema())) { return false; } } return true; } else { // All other types are primitive, so them matching in type is enough. return true; } }
@BeforeClass public static void before() throws Exception { final String filePath = TestUtils.getFileFromResourceUrl( DictionariesTest.class.getClassLoader().getResource(AVRO_DATA)); if (INDEX_DIR.exists()) { FileUtils.deleteQuietly(INDEX_DIR); } final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns( new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test"); final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null); driver.init(config); driver.build(); final Schema schema = AvroUtils.extractSchemaFromAvro(new File(filePath)); final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath)); final org.apache.avro.Schema avroSchema = avroReader.getSchema(); final String[] columns = new String[avroSchema.getFields().size()]; int i = 0; for (final Field f : avroSchema.getFields()) { columns[i] = f.name(); i++; } uniqueEntries = new HashMap<String, Set<Object>>(); for (final String column : columns) { uniqueEntries.put(column, new HashSet<Object>()); } while (avroReader.hasNext()) { final GenericRecord rec = avroReader.next(); for (final String column : columns) { Object val = rec.get(column); if (val instanceof Utf8) { val = ((Utf8) val).toString(); } uniqueEntries .get(column) .add(getAppropriateType(schema.getFieldSpecFor(column).getDataType(), val)); } } }
@Override @SuppressWarnings("unchecked") protected void writeRecord(Schema schema, Object datum, Encoder out) throws IOException { if (persistent == null) { persistent = (T) datum; } if (!writeDirtyBits) { super.writeRecord(schema, datum, out); return; } // check if top level schema if (schema.equals(persistent.getSchema())) { // write readable fields and dirty fields info boolean[] dirtyFields = new boolean[schema.getFields().size()]; boolean[] readableFields = new boolean[schema.getFields().size()]; StateManager manager = persistent.getStateManager(); int i = 0; for (@SuppressWarnings("unused") Field field : schema.getFields()) { dirtyFields[i] = manager.isDirty(persistent, i); readableFields[i] = manager.isReadable(persistent, i); i++; } IOUtils.writeBoolArray(out, dirtyFields); IOUtils.writeBoolArray(out, readableFields); for (Field field : schema.getFields()) { if (readableFields[field.pos()]) { write(field.schema(), getData().getField(datum, field.name(), field.pos()), out); } } } else { super.writeRecord(schema, datum, out); } }
private void updateRecord(CommonRecord record, GenericRecord delta) { List<Field> deltaFields = delta.getSchema().getFields(); for (Field deltaField : deltaFields) { String fieldName = deltaField.name(); Object rawDeltaField = delta.get(fieldName); if (LOG.isDebugEnabled()) { LOG.debug( "Processing field \"{}\", current value: {}", fieldName, record.getField(fieldName) != null ? record.getField(fieldName).toString() : null); } if (AvroGenericUtils.isRecord(rawDeltaField)) { processRecordField(record, (GenericRecord) rawDeltaField, fieldName); } else if (AvroGenericUtils.isArray(rawDeltaField)) { processArrayField(record, (GenericArray) rawDeltaField, fieldName); } else if (AvroGenericUtils.isEnum(rawDeltaField)) { processEnumField(record, (GenericEnumSymbol) rawDeltaField, fieldName); } else if (AvroGenericUtils.isFixed(rawDeltaField)) { processFixedField(record, (GenericFixed) rawDeltaField, fieldName); } else { record.setField(fieldName, commonFactory.createCommonValue(rawDeltaField)); } } }
/** Writes the given Avro datum into the given record, using the given Avro schema */ private void extractTree(Object datum, Schema schema, Record outputRecord, String prefix) { // RECORD, ENUM, ARRAY, MAP, UNION, FIXED, STRING, BYTES, INT, LONG, FLOAT, // DOUBLE, BOOLEAN, NULL switch (schema.getType()) { case RECORD: { IndexedRecord avroRecord = (IndexedRecord) datum; String prefix2 = prefix + "/"; for (Field field : schema.getFields()) { extractTree( avroRecord.get(field.pos()), field.schema(), outputRecord, prefix2 + field.name()); } break; } case ENUM: { GenericEnumSymbol symbol = (GenericEnumSymbol) datum; outputRecord.put(prefix, symbol.toString()); break; } case ARRAY: { Iterator iter = ((Collection) datum).iterator(); while (iter.hasNext()) { extractTree(iter.next(), schema.getElementType(), outputRecord, prefix); } break; } case MAP: { Map<CharSequence, ?> map = (Map<CharSequence, ?>) datum; for (Map.Entry<CharSequence, ?> entry : map.entrySet()) { extractTree( entry.getValue(), schema.getValueType(), outputRecord, prefix + "/" + entry.getKey().toString()); } break; } case UNION: { int index = GenericData.get().resolveUnion(schema, datum); // String typeName = schema.getTypes().get(index).getName(); // String prefix2 = prefix + "/" + typeName; String prefix2 = prefix; extractTree(datum, schema.getTypes().get(index), outputRecord, prefix2); break; } case FIXED: { GenericFixed fixed = (GenericFixed) datum; outputRecord.put(prefix, fixed.bytes()); // outputRecord.put(prefix, utf8toString(fixed.bytes())); break; } case BYTES: { ByteBuffer buf = (ByteBuffer) datum; int pos = buf.position(); byte[] bytes = new byte[buf.remaining()]; buf.get(bytes); buf.position(pos); // undo relative read outputRecord.put(prefix, bytes); // outputRecord.put(prefix, utf8toString(bytes)); break; } case STRING: { outputRecord.put(prefix, datum.toString()); break; } case INT: { outputRecord.put(prefix, datum); break; } case LONG: { outputRecord.put(prefix, datum); break; } case FLOAT: { outputRecord.put(prefix, datum); break; } case DOUBLE: { outputRecord.put(prefix, datum); break; } case BOOLEAN: { outputRecord.put(prefix, datum); break; } case NULL: { break; } default: throw new MorphlineRuntimeException("Unknown Avro schema type: " + schema.getType()); } }
public static List<SpecificRecordBase> parse( ResultSet resultSet, CollectProperties collectProperties, Sql2KafkaProperties sql2KafkaProperties) { List<SpecificRecordBase> metricsToSend = new ArrayList<SpecificRecordBase>(); try { Class<?> schemaClass = Class.forName(collectProperties.getAvroSchemaClass()); while (resultSet.next()) { SpecificRecordBase specificRecordBase = (SpecificRecordBase) schemaClass.newInstance(); for (int c = 1; c <= resultSet.getMetaData().getColumnCount(); c++) { String fieldName = resultSet.getMetaData().getColumnName(c); if (specificRecordBase.getSchema().getField(fieldName) == null) continue; Object value = null; switch (specificRecordBase .getClass() .getDeclaredField(fieldName) .getType() .getCanonicalName()) { case "java.lang.Boolean": value = resultSet.getBoolean(fieldName); break; case "java.lang.Integer": value = resultSet.getInt(fieldName); break; case "java.lang.Long": value = resultSet.getLong(fieldName); break; case "java.lang.Float": value = resultSet.getFloat(fieldName); break; case "java.lang.Double": value = resultSet.getDouble(fieldName); break; case "java.lang.Byte": value = resultSet.getByte(fieldName); break; case "java.lang.String": value = resultSet.getString(fieldName); break; default: value = resultSet.getString(fieldName); break; } specificRecordBase.put(fieldName, value); } if (sql2KafkaProperties.getCollectorListResultsExit().equals("true")) { for (Field field : specificRecordBase.getSchema().getFields()) LOG.info(field.name() + ": " + specificRecordBase.get(field.pos())); } metricsToSend.add(specificRecordBase); } if (sql2KafkaProperties.getCollectorListResultsExit().equals("true")) { System.exit(0); } } catch (SQLException e) { LOG.error("Error parsing results.", e); } catch (InstantiationException e) { LOG.error("Error parsing results.", e); } catch (IllegalAccessException e) { LOG.error("Error parsing results.", e); } catch (ClassNotFoundException e) { LOG.error("Error parsing results.", e); } catch (SecurityException e) { LOG.error("Error parsing results.", e); } catch (NoSuchFieldException e) { LOG.error("Error parsing results.", e); } return metricsToSend; }
/** * Given an avro Schema.Field instance, make a clone of it. * * @param field The field to clone. * @return The cloned field. */ public static Field cloneField(Field field) { return new Field(field.name(), field.schema(), field.doc(), field.defaultValue()); }
public void generateSimpleAggregationOnSingleColumnFilters() throws IOException { final Map<String, Map<Object, Integer>> cardinalityCountsMap = new HashMap<String, Map<Object, Integer>>(); final Map<String, Map<Object, Map<String, Double>>> sumMap = new HashMap<String, Map<Object, Map<String, Double>>>(); // here string key is columnName:columnValue:MetricName:GroupColumnName:groupKey:metricValue final Map<String, Map<Object, Double>> sumGroupBy = new HashMap<String, Map<Object, Double>>(); aggregationQueries = new ArrayList<AvroQueryGenerator.TestSimpleAggreationQuery>(); groupByQueries = new ArrayList<AvroQueryGenerator.TestGroupByAggreationQuery>(); for (final Field f : schema.getFields()) { final String fieldName = f.name(); if (dimensions.contains(fieldName) || metrics.contains(fieldName) || time.equals(fieldName)) { isSingleValueMap.put(fieldName, isSingleValueField(f)); dataTypeMap.put(fieldName, getColumnType(f)); if (!metrics.contains(fieldName)) { cardinalityCountsMap.put(fieldName, new HashMap<Object, Integer>()); } } } for (final String column : cardinalityCountsMap.keySet()) { sumMap.put(column, new HashMap<Object, Map<String, Double>>()); } // here string key is columnName:columnValue:MetricName:GroupColumnName:groupKey:metricValue while (dataStream.hasNext()) { final GenericRecord record = dataStream.next(); for (final String column : cardinalityCountsMap.keySet()) { Object value = record.get(column); if (value == null) { switch (schema.getField(column).schema().getType()) { case INT: value = 0; break; case FLOAT: value = 0F; break; case LONG: value = 0L; break; case DOUBLE: value = 0D; break; case STRING: case BOOLEAN: value = "null"; break; } } if (value instanceof Utf8) { value = ((Utf8) value).toString(); } if (value instanceof Array) { continue; } // here string key is columnName:columnValue:MetricName:GroupColumnName:groupKey:metricValue for (final String metricName : metrics) { final String groupbyKeyBase = column + ":" + record.get(column) + ":" + metricName; int dimCounter = 1; for (final String dim : cardinalityCountsMap.keySet()) { if (!dim.equals(column)) { dimCounter++; final String groupbyKey = groupbyKeyBase + ":" + dim; if (sumGroupBy.containsKey(groupbyKey)) { if (sumGroupBy.get(groupbyKey).containsKey(record.get(dim))) { sumGroupBy .get(groupbyKey) .put( record.get(dim), getAppropriateNumberType( metricName, record.get(metricName), sumGroupBy.get(groupbyKey).get(record.get(dim)))); } else { sumGroupBy .get(groupbyKey) .put(record.get(dim), Double.parseDouble(record.get(metricName).toString())); } } else { sumGroupBy.put(groupbyKey, new HashMap<Object, Double>()); sumGroupBy .get(groupbyKey) .put(record.get(dim), Double.parseDouble(record.get(metricName).toString())); } } if (dimCounter == 4) { break; } } } if (cardinalityCountsMap.get(column).containsKey(value)) { cardinalityCountsMap .get(column) .put(value, cardinalityCountsMap.get(column).get(value) + 1); } else { cardinalityCountsMap.get(column).put(value, 1); } if (!sumMap.get(column).containsKey(value)) { sumMap.get(column).put(value, new HashMap<String, Double>()); } for (final String metric : metrics) { if (!sumMap.get(column).get(value).containsKey(metric)) { sumMap .get(column) .get(value) .put(metric, getAppropriateNumberType(metric, record.get(metric), 0D)); } else { sumMap .get(column) .get(value) .put( metric, getAppropriateNumberType( metric, record.get(metric), sumMap.get(column).get(value).get(metric))); } } // here string key is columnName:columnValue:MetricName:GroupColumnName:groupKey:metricValue } } dataStream.close(); if (!isRealtimeSegment) { for (final String column : cardinalityCountsMap.keySet()) { for (final Object entry : cardinalityCountsMap.get(column).keySet()) { final StringBuilder bld = new StringBuilder(); bld.append("select count(*) from "); bld.append(resourceName); bld.append(" where "); bld.append(column); bld.append("="); bld.append("'"); bld.append(entry); bld.append("'"); bld.append(" "); bld.append("limit 0"); String queryString = bld.toString(); if (!queryString.contains("null")) { aggregationQueries.add( new TestSimpleAggreationQuery( queryString, new Double(cardinalityCountsMap.get(column).get(entry)))); } } } } for (final String column : sumMap.keySet()) { for (final Object value : sumMap.get(column).keySet()) { for (final String metric : sumMap.get(column).get(value).keySet()) { final StringBuilder bld = new StringBuilder(); bld.append("select sum('" + metric + "') from "); bld.append(resourceName); bld.append(" where "); bld.append(column); bld.append("="); bld.append("'"); bld.append(value); bld.append("'"); bld.append(" "); bld.append("limit 0"); String queryString = bld.toString(); if (!queryString.contains("null")) { aggregationQueries.add( new TestSimpleAggreationQuery( bld.toString(), sumMap.get(column).get(value).get(metric))); } } } } for (final String groupKey : sumGroupBy.keySet()) { final String columnName = groupKey.split(":")[0]; final String columnValue = groupKey.split(":")[1]; final String metricColumn = groupKey.split(":")[2]; final String groupByColumnName = groupKey.split(":")[3]; final StringBuilder bld = new StringBuilder(); bld.append("select sum('" + metricColumn + "') from "); bld.append(resourceName); bld.append(" where "); bld.append(columnName); bld.append("="); bld.append("'"); bld.append(columnValue); bld.append("'"); bld.append(" "); bld.append(" group by "); bld.append(groupByColumnName); bld.append(" top 10 "); bld.append("limit 0"); String queryString = bld.toString(); if (!queryString.contains("null")) { groupByQueries.add( new TestGroupByAggreationQuery(bld.toString(), sumGroupBy.get(groupKey))); } } }