private void assertField(GenericRecord field) { assertThat(field.get("name")).isInstanceOf(Utf8.class); assertThat(field.get("name").toString()).isEqualTo(field.getSchema().getName()); assertThat(field.get("boost")).isEqualTo(2.3f); assertThat(field.get("omitNorms")).isEqualTo(true); assertThat(field.get("omitTermFreqAndPositions")).isEqualTo(true); }
private void buildAttribute(Object element, LuceneWorksBuilder hydrator) { if (element instanceof GenericRecord) { GenericRecord record = (GenericRecord) element; String name = record.getSchema().getName(); if ("TokenTrackingAttribute".equals(name)) { @SuppressWarnings("unchecked") List<Integer> positionList = (List<Integer>) record.get("positions"); hydrator.addTokenTrackingAttribute(positionList); } else if ("CharTermAttribute".equals(name)) { hydrator.addCharTermAttribute((CharSequence) record.get("sequence")); } else if ("PayloadAttribute".equals(name)) { hydrator.addPayloadAttribute(asByteArray(record, "payload")); } else if ("KeywordAttribute".equals(name)) { hydrator.addKeywordAttribute(asBoolean(record, "isKeyword")); } else if ("PositionIncrementAttribute".equals(name)) { hydrator.addPositionIncrementAttribute(asInt(record, "positionIncrement")); } else if ("FlagsAttribute".equals(name)) { hydrator.addFlagsAttribute(asInt(record, "flags")); } else if ("TypeAttribute".equals(name)) { hydrator.addTypeAttribute(asString(record, "type")); } else if ("OffsetAttribute".equals(name)) { hydrator.addOffsetAttribute(asInt(record, "startOffset"), asInt(record, "endOffset")); } else { throw log.unknownAttributeSerializedRepresentation(name); } } else if (element instanceof ByteBuffer) { hydrator.addSerializedAttribute(asByteArray((ByteBuffer) element)); } else { throw log.unknownAttributeSerializedRepresentation(element.getClass().getName()); } }
@Override protected void processEvent(GenericRecord record, EventAggregator eventAggregator) { System.out.println(schemaName + "-Stream: " + record.toString()); long userId = (long) record.get("userId"); long time = (long) record.get("time"); String contactHash = record.get("contactHash").toString(); int msgLength = (int) record.get("msgLength"); eventAggregator.processSmsReceived(userId, time, contactHash, msgLength); }
private void assertEqualsWithGeneric(List<Bird> expected, List<GenericRecord> actual) { assertEquals(expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { Bird fixed = expected.get(i); GenericRecord generic = actual.get(i); assertEquals(fixed.number, generic.get("number")); assertEquals(fixed.quality, generic.get("quality").toString()); // From Avro util.Utf8 assertEquals(fixed.quantity, generic.get("quantity")); assertEquals(fixed.species, generic.get("species").toString()); } }
private Statement decodeStatement(final GenericRecord record) { final Resource subj = decodeIdentifier((GenericRecord) record.get(0)); final URI pred = (URI) decodeIdentifier((GenericRecord) record.get(1)); final Value obj = decodeValue(record.get(2)); final Resource ctx = decodeIdentifier((GenericRecord) record.get(3)); if (ctx == null) { return this.factory.createStatement(subj, pred, obj); } else { return this.factory.createStatement(subj, pred, obj, ctx); } }
private void processId(GenericRecord operation, LuceneWorksBuilder hydrator) { GenericRecord id = (GenericRecord) operation.get("id"); Object value = id.get("value"); if (value instanceof ByteBuffer) { hydrator.addIdAsJavaSerialized(asByteArray((ByteBuffer) value)); } else if (value instanceof Utf8) { hydrator.addId(value.toString()); } else { // the rest are serialized objects hydrator.addId((Serializable) value); } }
/** * Get a map of field names to default values for an Avro schema. * * @param avroRecordSchema The schema to get the map of field names to values. * @return The map. */ public static Map<String, Object> getDefaultValueMap(Schema avroRecordSchema) { List<Field> defaultFields = new ArrayList<Field>(); for (Field f : avroRecordSchema.getFields()) { if (f.defaultValue() != null) { // Need to create a new Field here or we will get // org.apache.avro.AvroRuntimeException: Field already used: // schemaVersion defaultFields.add(new Field(f.name(), f.schema(), f.doc(), f.defaultValue(), f.order())); } } Schema defaultSchema = Schema.createRecord(defaultFields); Schema emptyRecordSchema = Schema.createRecord(new ArrayList<Field>()); DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(emptyRecordSchema); DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(emptyRecordSchema, defaultSchema); GenericRecord emptyRecord = new GenericData.Record(emptyRecordSchema); GenericRecord defaultRecord = AvroUtils.readAvroEntity(AvroUtils.writeAvroEntity(emptyRecord, writer), reader); Map<String, Object> defaultValueMap = new HashMap<String, Object>(); for (Field f : defaultFields) { defaultValueMap.put(f.name(), defaultRecord.get(f.name())); } return defaultValueMap; }
private void processRecordField( CommonRecord record, GenericRecord deltaRecord, String fieldName) { CommonRecord nextRecord = null; CommonValue nextValue = record.getField(fieldName); if (nextValue != null && nextValue.isRecord() && nextValue .getRecord() .getSchema() .getFullName() .equals(deltaRecord.getSchema().getFullName())) { nextRecord = nextValue.getRecord(); GenericFixed uuidFixed = (GenericFixed) deltaRecord.get(UUID); if (uuidFixed != null) { UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed); // Checking if the uuid was changed if (!uuid.equals(nextRecord.getUuid())) { records.remove(nextRecord.getUuid()); records.put(uuid, nextRecord); nextRecord.setUuid(uuid); } } } else { nextRecord = createCommonRecord(deltaRecord); record.setField(fieldName, commonFactory.createCommonValue(nextRecord)); } updateRecord(nextRecord, deltaRecord); }
@Test public void testMapWithUtf8Key() throws Exception { Schema schema = new Schema.Parser().parse(Resources.getResource("map.avsc").openStream()); File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp"); tmp.deleteOnExit(); tmp.delete(); Path file = new Path(tmp.getPath()); AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema); // Write a record with a map with Utf8 keys. GenericData.Record record = new GenericRecordBuilder(schema) .set("mymap", ImmutableMap.of(new Utf8("a"), 1, new Utf8("b"), 2)) .build(); writer.write(record); writer.close(); AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file); GenericRecord nextRecord = reader.read(); assertNotNull(nextRecord); assertEquals(ImmutableMap.of(str("a"), 1, str("b"), 2), nextRecord.get("mymap")); }
@Test public void testMapWithNulls() throws Exception { Schema schema = new Schema.Parser().parse(Resources.getResource("map_with_nulls.avsc").openStream()); File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp"); tmp.deleteOnExit(); tmp.delete(); Path file = new Path(tmp.getPath()); AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema); // Write a record with a null value Map<CharSequence, Integer> map = new HashMap<CharSequence, Integer>(); map.put(str("thirty-four"), 34); map.put(str("eleventy-one"), null); map.put(str("one-hundred"), 100); GenericData.Record record = new GenericRecordBuilder(schema).set("mymap", map).build(); writer.write(record); writer.close(); AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file); GenericRecord nextRecord = reader.read(); assertNotNull(nextRecord); assertEquals(map, nextRecord.get("mymap")); }
@SuppressWarnings("unchecked") private Record decodeRecord( final GenericRecord generic, @Nullable final Set<URI> propertiesToDecode) { final Record record = Record.create(); final GenericRecord encodedID = (GenericRecord) generic.get(0); if (encodedID != null) { record.setID((URI) decodeIdentifier(encodedID)); } for (final GenericRecord prop : (Iterable<GenericRecord>) generic.get(1)) { final URI property = (URI) decodeIdentifier((GenericRecord) prop.get(0)); final List<Object> values = decodeNodes(prop.get(1)); if (propertiesToDecode == null || propertiesToDecode.contains(property)) { record.set(property, values); } } return record; }
public void testWrite() throws IOException { URL url = this.getClass().getClassLoader().getResource("input/Company.avsc"); assertNotNull(url); Schema schema = new Schema.Parser().parse(new File(url.getFile())); assertNotNull(schema); DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); // Another way of loading a file File file = new File("src/test/resources/input/companies.avro"); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, datumReader); File fileOut = new File("target/companies2.avro"); Schema schemaOut = new Schema.Parser().parse(new File("src/test/resources/input/Company2.avsc")); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schemaOut); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); GenericRecord company = null; int count = 0; while (dataFileReader.hasNext()) { company = dataFileReader.next(company); if (company.get("name").toString().equals("aol")) { dataFileWriter.create(schemaOut, fileOut); GenericRecord recordOut = new GenericData.Record(schemaOut); recordOut.put("id", company.get("id")); recordOut.put("name", company.get("name")); assertTrue(recordOut.getSchema().getField("address") != null); assertTrue(recordOut.getSchema().getField("employeeCount") == null); // address is of complex type GenericRecord address = new GenericData.Record((GenericData.Record) company.get("address"), true); recordOut.put("address", address); dataFileWriter.append(recordOut); count++; } } assertTrue(count > 0); dataFileWriter.close(); }
private Resource decodeIdentifier(final GenericRecord record) { final Schema schema = record.getSchema(); if (schema.equals(Schemas.COMPRESSED_IDENTIFIER)) { try { return this.dictionary.objectFor((Integer) record.get(0)); } catch (final IOException ex) { throw new IllegalStateException("Cannot access dictionary: " + ex.getMessage(), ex); } } else if (schema.equals(Schemas.PLAIN_IDENTIFIER)) { final String string = record.get(0).toString(); if (string.startsWith("_:")) { return this.factory.createBNode(string.substring(2)); } else { return this.factory.createURI(string); } } throw new IllegalArgumentException("Unsupported encoded identifier: " + record); }
@Override public void map(GenericRecord in, AvroCollector<Pair<Utf8, Long>> collector, Reporter reporter) throws IOException { Pair<Utf8, Long> p = new Pair<Utf8, Long>(PAIR_SCHEMA); Utf8 shape = (Utf8) in.get("shape"); if (shape != null) { p.set(shape, 1L); collector.collect(p); } }
private void buildAttributes(GenericRecord record, String field, LuceneWorksBuilder hydrator) { @SuppressWarnings("unchecked") List<List<?>> tokens = (List<List<?>>) record.get(field); for (List<?> token : tokens) { for (Object attribute : token) { buildAttribute(attribute, hydrator); } hydrator.addToken(); } }
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("kafka-sandbox").setMaster("local[*]"); JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000)); Set<String> topics = Collections.singleton("mytopic"); Map<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", "sandbox.hortonworks.com:6667"); JavaPairInputDStream<String, byte[]> directKafkaStream = KafkaUtils.createDirectStream( ssc, String.class, byte[].class, StringDecoder.class, DefaultDecoder.class, kafkaParams, topics); directKafkaStream.foreachRDD( rdd -> { rdd.foreach( avroRecord -> { Schema.Parser parser = new Schema.Parser(); Schema schema = parser.parse(AvroVulabProducer.USER_SCHEMA); Injection<GenericRecord, byte[]> recordInjection = GenericAvroCodecs.toBinary(schema); GenericRecord record = recordInjection.invert(avroRecord._2).get(); System.out.println( "str1= " + record.get("str1") + ", str2= " + record.get("str2") + ", int1=" + record.get("int1")); }); }); ssc.start(); ssc.awaitTermination(); }
private CommonRecord createCommonRecord(GenericRecord avroRecord) { GenericFixed uuidFixed = (GenericFixed) avroRecord.get(UUID); if (uuidFixed != null) { UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed); CommonRecord newRecord = commonFactory.createCommonRecord(uuid, avroRecord.getSchema()); records.put(uuid, newRecord); return newRecord; } else { return commonFactory.createCommonRecord(avroRecord.getSchema()); } }
private Map<String, String> getAnalyzers(GenericRecord operation) { Map<?, ?> analyzersWithUtf8 = (Map<?, ?>) operation.get("fieldToAnalyzerMap"); if (analyzersWithUtf8 == null) { return null; } Map<String, String> analyzers = new HashMap<>(analyzersWithUtf8.size()); for (Map.Entry<?, ?> entry : analyzersWithUtf8.entrySet()) { analyzers.put(entry.getKey().toString(), entry.getValue().toString()); } return analyzers; }
public void serializeGeneric() throws IOException { // Create a datum to serialize. Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("/MyPair.avsc")); GenericRecord datum = new GenericData.Record(schema); datum.put("left", new Utf8("dog")); datum.put("right", new Utf8("cat")); // Serialize it. ByteArrayOutputStream out = new ByteArrayOutputStream(); DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema); Encoder encoder = EncoderFactory.get().binaryEncoder(out, null); writer.write(datum, encoder); encoder.flush(); out.close(); System.out.println("Serialization: " + out); // Deserialize it. DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema); BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(out.toByteArray(), null); GenericRecord result = reader.read(null, decoder); System.out.printf("Left: %s, Right: %s\n", result.get("left"), result.get("right")); }
private void assertNumericField(GenericRecord field) { assertThat(field.get("name")).isInstanceOf(Utf8.class); assertThat(field.get("name").toString()).isEqualTo("int"); assertThat(field.get("precisionStep")).isEqualTo(3); assertThat(field.get("boost")).isEqualTo(2.3f); assertThat(field.get("indexed")).isEqualTo(true); assertThat(field.get("omitNorms")).isEqualTo(true); assertThat(field.get("omitTermFreqAndPositions")).isEqualTo(true); assertThat((field.get("store"))).isInstanceOf(GenericData.EnumSymbol.class); assertThat((field.get("store")).toString()).isEqualTo("YES"); }
@Test public void testWrite() throws IOException { // Write all test records for (String record : TestConstants.JSON_RECORDS) { this.writer.write(convertRecord(record)); } Assert.assertEquals(this.writer.recordsWritten(), 3); this.writer.close(); this.writer.commit(); File outputFile = new File( TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR + this.filePath, TestConstants.TEST_FILE_NAME + "." + TestConstants.TEST_WRITER_ID + "." + TestConstants.TEST_FILE_EXTENSION); DataFileReader<GenericRecord> reader = new DataFileReader<GenericRecord>( outputFile, new GenericDatumReader<GenericRecord>(this.schema)); // Read the records back and assert they are identical to the ones written GenericRecord user1 = reader.next(); // Strings are in UTF8, so we have to call toString() here and below Assert.assertEquals(user1.get("name").toString(), "Alyssa"); Assert.assertEquals(user1.get("favorite_number"), 256); Assert.assertEquals(user1.get("favorite_color").toString(), "yellow"); GenericRecord user2 = reader.next(); Assert.assertEquals(user2.get("name").toString(), "Ben"); Assert.assertEquals(user2.get("favorite_number"), 7); Assert.assertEquals(user2.get("favorite_color").toString(), "red"); GenericRecord user3 = reader.next(); Assert.assertEquals(user3.get("name").toString(), "Charlie"); Assert.assertEquals(user3.get("favorite_number"), 68); Assert.assertEquals(user3.get("favorite_color").toString(), "blue"); reader.close(); }
/** * Describe <code>reduce</code> method here. * * @param object an <code>Object</code> value * @param iterable an <code>Iterable</code> value * @param context a <code>Reducer.Context</code> value * @exception IOException if an error occurs * @exception InterruptedException if an error occurs */ @Override public final void reduce(Text key, Iterable<AvroValue<GenericRecord>> values, Context context) throws IOException, InterruptedException { GenericRecord output = new GenericData.Record(outputSchema); for (AvroValue<GenericRecord> value : values) { GenericRecord datum = value.datum(); for (Schema.Field field : datum.getSchema().getFields()) { String fieldName = field.name(); Object fieldValue = datum.get(fieldName); if (fieldValue != null) { output.put(fieldName, fieldValue); } } } CharSequence psam = (CharSequence) output.get(PSAM); CharSequence longitude = (CharSequence) output.get(LONGITUDE); CharSequence latitude = (CharSequence) output.get(LATITUDE); if (psam != null && longitude != null && latitude != null) { context.write(new AvroKey<GenericRecord>(output), NullWritable.get()); } }
public void validateAvroFile(File file) throws IOException { // read the events back using GenericRecord DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(file, reader); GenericRecord record = new GenericData.Record(fileReader.getSchema()); int numEvents = 0; while (fileReader.hasNext()) { fileReader.next(record); String bodyStr = record.get("message").toString(); System.out.println(bodyStr); numEvents++; } fileReader.close(); Assert.assertEquals("Should have found a total of 3 events", 3, numEvents); }
@Override public synchronized void onDeltaReceived(int index, GenericRecord data, boolean fullResync) { GenericFixed uuidFixed = (GenericFixed) data.get(UUID); UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed); if (LOG.isDebugEnabled()) { LOG.debug("Processing delta with uuid {}", uuidFixed.toString()); } CommonRecord currentRecord = null; if (!fullResync && records.containsKey(uuid)) { currentRecord = records.get(uuid); } else { records.clear(); currentRecord = createCommonRecord(data); rootRecord = currentRecord; } updateRecord(currentRecord, data); }
@BeforeClass public static void before() throws Exception { final String filePath = TestUtils.getFileFromResourceUrl( DictionariesTest.class.getClassLoader().getResource(AVRO_DATA)); if (INDEX_DIR.exists()) { FileUtils.deleteQuietly(INDEX_DIR); } final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns( new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test"); final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null); driver.init(config); driver.build(); final Schema schema = AvroUtils.extractSchemaFromAvro(new File(filePath)); final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath)); final org.apache.avro.Schema avroSchema = avroReader.getSchema(); final String[] columns = new String[avroSchema.getFields().size()]; int i = 0; for (final Field f : avroSchema.getFields()) { columns[i] = f.name(); i++; } uniqueEntries = new HashMap<String, Set<Object>>(); for (final String column : columns) { uniqueEntries.put(column, new HashSet<Object>()); } while (avroReader.hasNext()) { final GenericRecord rec = avroReader.next(); for (final String column : columns) { Object val = rec.get(column); if (val instanceof Utf8) { val = ((Utf8) val).toString(); } uniqueEntries .get(column) .add(getAppropriateType(schema.getFieldSpecFor(column).getDataType(), val)); } } }
private Literal decodeLiteral(final Object generic) { if (generic instanceof GenericRecord) { final GenericRecord record = (GenericRecord) generic; final Schema schema = record.getSchema(); if (schema.equals(Schemas.STRING_LANG)) { final String label = record.get(0).toString(); // Utf8 class used final Object language = record.get(1); return this.factory.createLiteral(label, language.toString()); } else if (schema.equals(Schemas.SHORT)) { return this.factory.createLiteral(((Integer) record.get(0)).shortValue()); } else if (schema.equals(Schemas.BYTE)) { return this.factory.createLiteral(((Integer) record.get(0)).byteValue()); } else if (schema.equals(Schemas.BIGINTEGER)) { return this.factory.createLiteral(record.get(0).toString(), XMLSchema.INTEGER); } else if (schema.equals(Schemas.BIGDECIMAL)) { return this.factory.createLiteral(record.get(0).toString(), XMLSchema.DECIMAL); } else if (schema.equals(Schemas.CALENDAR)) { final int tz = (Integer) record.get(0); final GregorianCalendar calendar = new GregorianCalendar(); calendar.setTimeInMillis((Long) record.get(1)); calendar.setTimeZone( TimeZone.getTimeZone( String.format( "GMT%s%02d:%02d", tz >= 0 ? "+" : "-", Math.abs(tz) / 60, Math.abs(tz) % 60))); return this.factory.createLiteral(this.datatypeFactory.newXMLGregorianCalendar(calendar)); } } else if (generic instanceof CharSequence) { return this.factory.createLiteral(generic.toString()); // Utf8 class used } else if (generic instanceof Boolean) { return this.factory.createLiteral((Boolean) generic); } else if (generic instanceof Long) { return this.factory.createLiteral((Long) generic); } else if (generic instanceof Integer) { return this.factory.createLiteral((Integer) generic); } else if (generic instanceof Double) { return this.factory.createLiteral((Double) generic); } else if (generic instanceof Float) { return this.factory.createLiteral((Float) generic); } Preconditions.checkNotNull(generic); throw new IllegalArgumentException("Unsupported generic data: " + generic); }
@Test public void testEmptyArray() throws Exception { Schema schema = new Schema.Parser().parse(Resources.getResource("array.avsc").openStream()); File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp"); tmp.deleteOnExit(); tmp.delete(); Path file = new Path(tmp.getPath()); AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema); // Write a record with an empty array. List<Integer> emptyArray = new ArrayList<Integer>(); GenericData.Record record = new GenericRecordBuilder(schema).set("myarray", emptyArray).build(); writer.write(record); writer.close(); AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file); GenericRecord nextRecord = reader.read(); assertNotNull(nextRecord); assertEquals(emptyArray, nextRecord.get("myarray")); }
private void updateRecord(CommonRecord record, GenericRecord delta) { List<Field> deltaFields = delta.getSchema().getFields(); for (Field deltaField : deltaFields) { String fieldName = deltaField.name(); Object rawDeltaField = delta.get(fieldName); if (LOG.isDebugEnabled()) { LOG.debug( "Processing field \"{}\", current value: {}", fieldName, record.getField(fieldName) != null ? record.getField(fieldName).toString() : null); } if (AvroGenericUtils.isRecord(rawDeltaField)) { processRecordField(record, (GenericRecord) rawDeltaField, fieldName); } else if (AvroGenericUtils.isArray(rawDeltaField)) { processArrayField(record, (GenericArray) rawDeltaField, fieldName); } else if (AvroGenericUtils.isEnum(rawDeltaField)) { processEnumField(record, (GenericEnumSymbol) rawDeltaField, fieldName); } else if (AvroGenericUtils.isFixed(rawDeltaField)) { processFixedField(record, (GenericFixed) rawDeltaField, fieldName); } else { record.setField(fieldName, commonFactory.createCommonValue(rawDeltaField)); } } }
private Object translate(Object value, DataSchema dataSchema, Schema avroSchema) { AvroOverride avroOverride = getAvroOverride(dataSchema); if (avroOverride != null) { return avroOverride .getCustomDataTranslator() .avroGenericToData(this, value, avroSchema, dataSchema); } DataSchema dereferencedDataSchema = dataSchema.getDereferencedDataSchema(); DataSchema.Type type = dereferencedDataSchema.getType(); Object result; switch (type) { case NULL: if (value != null) { appendMessage("value must be null for null schema"); result = BAD_RESULT; break; } result = Data.NULL; break; case BOOLEAN: result = ((Boolean) value).booleanValue(); break; case INT: result = ((Number) value).intValue(); break; case LONG: result = ((Number) value).longValue(); break; case FLOAT: result = ((Number) value).floatValue(); break; case DOUBLE: result = ((Number) value).doubleValue(); break; case STRING: result = value.toString(); break; case BYTES: ByteBuffer byteBuffer = (ByteBuffer) value; ByteString byteString = ByteString.copy(byteBuffer); byteBuffer.rewind(); result = byteString; break; case ENUM: String enumValue = value.toString(); EnumDataSchema enumDataSchema = (EnumDataSchema) dereferencedDataSchema; if (enumDataSchema.getSymbols().contains(enumValue) == false) { appendMessage( "enum value %1$s not one of %2$s", enumValue, enumDataSchema.getSymbols()); result = BAD_RESULT; break; } result = enumValue; break; case FIXED: GenericFixed fixed = (GenericFixed) value; byte[] fixedBytes = fixed.bytes(); FixedDataSchema fixedDataSchema = (FixedDataSchema) dereferencedDataSchema; if (fixedDataSchema.getSize() != fixedBytes.length) { appendMessage( "GenericFixed size %1$d != FixedDataSchema size %2$d", fixedBytes.length, fixedDataSchema.getSize()); result = BAD_RESULT; break; } byteString = ByteString.copy(fixedBytes); result = byteString; break; case MAP: @SuppressWarnings("unchecked") Map<?, Object> map = (Map<?, Object>) value; DataSchema valueDataSchema = ((MapDataSchema) dereferencedDataSchema).getValues(); Schema valueAvroSchema = avroSchema.getValueType(); DataMap dataMap = new DataMap(map.size()); for (Map.Entry<?, Object> entry : map.entrySet()) { String key = entry.getKey().toString(); _path.addLast(key); Object entryValue = translate(entry.getValue(), valueDataSchema, valueAvroSchema); _path.removeLast(); dataMap.put(key, entryValue); } result = dataMap; break; case ARRAY: GenericArray<?> list = (GenericArray<?>) value; DataSchema elementDataSchema = ((ArrayDataSchema) dereferencedDataSchema).getItems(); Schema elementAvroSchema = avroSchema.getElementType(); DataList dataList = new DataList(list.size()); for (int i = 0; i < list.size(); i++) { _path.addLast(i); Object entryValue = translate(list.get(i), elementDataSchema, elementAvroSchema); _path.removeLast(); dataList.add(entryValue); } result = dataList; break; case RECORD: GenericRecord record = (GenericRecord) value; RecordDataSchema recordDataSchema = (RecordDataSchema) dereferencedDataSchema; dataMap = new DataMap(avroSchema.getFields().size()); for (RecordDataSchema.Field field : recordDataSchema.getFields()) { String fieldName = field.getName(); Object fieldValue = record.get(fieldName); // fieldValue could be null if the Avro schema does not contain the named field or // the field is present with a null value. In either case we do not add a value // to the translated DataMap. We do not consider optional/required/default here // either (i.e. it is not an error if a required field is missing); the user can // later call ValidateDataAgainstSchema with various // settings for RequiredMode to obtain the desired behaviour. if (fieldValue == null) { continue; } boolean isOptional = field.getOptional(); DataSchema fieldDataSchema = field.getType(); Schema fieldAvroSchema = avroSchema.getField(fieldName).schema(); if (isOptional && (fieldDataSchema.getDereferencedType() != DataSchema.Type.UNION)) { // Avro schema should be union with 2 types: null and the field's type. Map.Entry<String, Schema> fieldAvroEntry = findUnionMember(fieldDataSchema, fieldAvroSchema); if (fieldAvroEntry == null) { continue; } fieldAvroSchema = fieldAvroEntry.getValue(); } _path.addLast(fieldName); dataMap.put(fieldName, translate(fieldValue, fieldDataSchema, fieldAvroSchema)); _path.removeLast(); } result = dataMap; break; case UNION: UnionDataSchema unionDataSchema = (UnionDataSchema) dereferencedDataSchema; Map.Entry<DataSchema, Schema> memberSchemas = findUnionMemberSchema(value, unionDataSchema, avroSchema); if (memberSchemas == null) { result = BAD_RESULT; break; } if (value == null) { // schema must be "null" schema result = Data.NULL; } else { DataSchema memberDataSchema = memberSchemas.getKey(); Schema memberAvroSchema = memberSchemas.getValue(); String key = memberDataSchema.getUnionMemberKey(); dataMap = new DataMap(1); _path.addLast(key); dataMap.put(key, translate(value, memberDataSchema, memberAvroSchema)); _path.removeLast(); result = dataMap; } break; default: appendMessage("schema type unknown %1$s", dereferencedDataSchema.getType()); result = BAD_RESULT; break; } return result; }
private byte[] asByteArray(GenericRecord operation, String field) { ByteBuffer buffer = (ByteBuffer) operation.get(field); return asByteArray(buffer); }