@Test public void testMapWithNulls() throws Exception { Schema schema = new Schema.Parser().parse(Resources.getResource("map_with_nulls.avsc").openStream()); File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp"); tmp.deleteOnExit(); tmp.delete(); Path file = new Path(tmp.getPath()); AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema); // Write a record with a null value Map<CharSequence, Integer> map = new HashMap<CharSequence, Integer>(); map.put(str("thirty-four"), 34); map.put(str("eleventy-one"), null); map.put(str("one-hundred"), 100); GenericData.Record record = new GenericRecordBuilder(schema).set("mymap", map).build(); writer.write(record); writer.close(); AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file); GenericRecord nextRecord = reader.read(); assertNotNull(nextRecord); assertEquals(map, nextRecord.get("mymap")); }
@Test public void testMapWithUtf8Key() throws Exception { Schema schema = new Schema.Parser().parse(Resources.getResource("map.avsc").openStream()); File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp"); tmp.deleteOnExit(); tmp.delete(); Path file = new Path(tmp.getPath()); AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema); // Write a record with a map with Utf8 keys. GenericData.Record record = new GenericRecordBuilder(schema) .set("mymap", ImmutableMap.of(new Utf8("a"), 1, new Utf8("b"), 2)) .build(); writer.write(record); writer.close(); AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file); GenericRecord nextRecord = reader.read(); assertNotNull(nextRecord); assertEquals(ImmutableMap.of(str("a"), 1, str("b"), 2), nextRecord.get("mymap")); }
@Test public void testRead_SpecificReader() throws IOException { GenericRecord savedRecord = new GenericData.Record(schema); savedRecord.put("name", "John Doe"); savedRecord.put("age", 42); savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane")); populateGenericFile(Lists.newArrayList(savedRecord)); AvroFileReaderFactory<Person> genericReader = new AvroFileReaderFactory<Person>(Avros.records(Person.class), new Configuration()); Iterator<Person> recordIterator = genericReader.read( FileSystem.getLocal(new Configuration()), new Path(this.avroFile.getAbsolutePath())); Person expectedPerson = new Person(); expectedPerson.setAge(42); expectedPerson.setName("John Doe"); List<CharSequence> siblingNames = Lists.newArrayList(); siblingNames.add("Jimmy"); siblingNames.add("Jane"); expectedPerson.setSiblingnames(siblingNames); Person person = recordIterator.next(); assertEquals(expectedPerson, person); assertFalse(recordIterator.hasNext()); }
private static GenericRecord buildUser(Schema schema, String name, String office, String color) { GenericRecord user = new GenericData.Record(schema); user.put("name", name); user.put("office", office); if (color != null) user.put("favorite_color", color); return user; }
private void buildAttribute(Object element, LuceneWorksBuilder hydrator) { if (element instanceof GenericRecord) { GenericRecord record = (GenericRecord) element; String name = record.getSchema().getName(); if ("TokenTrackingAttribute".equals(name)) { @SuppressWarnings("unchecked") List<Integer> positionList = (List<Integer>) record.get("positions"); hydrator.addTokenTrackingAttribute(positionList); } else if ("CharTermAttribute".equals(name)) { hydrator.addCharTermAttribute((CharSequence) record.get("sequence")); } else if ("PayloadAttribute".equals(name)) { hydrator.addPayloadAttribute(asByteArray(record, "payload")); } else if ("KeywordAttribute".equals(name)) { hydrator.addKeywordAttribute(asBoolean(record, "isKeyword")); } else if ("PositionIncrementAttribute".equals(name)) { hydrator.addPositionIncrementAttribute(asInt(record, "positionIncrement")); } else if ("FlagsAttribute".equals(name)) { hydrator.addFlagsAttribute(asInt(record, "flags")); } else if ("TypeAttribute".equals(name)) { hydrator.addTypeAttribute(asString(record, "type")); } else if ("OffsetAttribute".equals(name)) { hydrator.addOffsetAttribute(asInt(record, "startOffset"), asInt(record, "endOffset")); } else { throw log.unknownAttributeSerializedRepresentation(name); } } else if (element instanceof ByteBuffer) { hydrator.addSerializedAttribute(asByteArray((ByteBuffer) element)); } else { throw log.unknownAttributeSerializedRepresentation(element.getClass().getName()); } }
private void assertField(GenericRecord field) { assertThat(field.get("name")).isInstanceOf(Utf8.class); assertThat(field.get("name").toString()).isEqualTo(field.getSchema().getName()); assertThat(field.get("boost")).isEqualTo(2.3f); assertThat(field.get("omitNorms")).isEqualTo(true); assertThat(field.get("omitTermFreqAndPositions")).isEqualTo(true); }
private void processRecordField( CommonRecord record, GenericRecord deltaRecord, String fieldName) { CommonRecord nextRecord = null; CommonValue nextValue = record.getField(fieldName); if (nextValue != null && nextValue.isRecord() && nextValue .getRecord() .getSchema() .getFullName() .equals(deltaRecord.getSchema().getFullName())) { nextRecord = nextValue.getRecord(); GenericFixed uuidFixed = (GenericFixed) deltaRecord.get(UUID); if (uuidFixed != null) { UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed); // Checking if the uuid was changed if (!uuid.equals(nextRecord.getUuid())) { records.remove(nextRecord.getUuid()); records.put(uuid, nextRecord); nextRecord.setUuid(uuid); } } } else { nextRecord = createCommonRecord(deltaRecord); record.setField(fieldName, commonFactory.createCommonValue(nextRecord)); } updateRecord(nextRecord, deltaRecord); }
/** * Use resource key(Optional) and rest json entry as a template and fill in template using Avro as * a reference. e.g: Rest JSON entry HOCON template: * AccountId=${sf_account_id},Member_Id__c=${member_id} Avro: * {"sf_account_id":{"string":"0016000000UiCYHAA3"},"member_id":{"long":296458833}} * * <p>Converted Json: {"AccountId":"0016000000UiCYHAA3","Member_Id__c":296458833} * * <p>As it's template based approach, it can produce nested JSON structure even Avro is flat (or * vice versa). * * <p>e.g: Rest resource template: /sobject/account/memberId/${member_id} Avro: * {"sf_account_id":{"string":"0016000000UiCYHAA3"},"member_id":{"long":296458833}} Converted * resource: /sobject/account/memberId/296458833 * * <p>Converted resource will be used to form end point. * http://www.server.com:9090/sobject/account/memberId/296458833 * * <p>{@inheritDoc} * * @see gobblin.converter.Converter#convertRecord(java.lang.Object, java.lang.Object, * gobblin.configuration.WorkUnitState) */ @Override public Iterable<RestEntry<JsonObject>> convertRecord( Void outputSchema, GenericRecord inputRecord, WorkUnitState workUnit) throws DataConversionException { Config srcConfig = ConfigFactory.parseString( inputRecord.toString(), ConfigParseOptions.defaults().setSyntax(ConfigSyntax.JSON)); String resourceKey = workUnit.getProp(CONVERTER_AVRO_REST_ENTRY_RESOURCE_KEY, ""); if (!StringUtils.isEmpty(resourceKey)) { final String dummyKey = "DUMMY"; Config tmpConfig = ConfigFactory.parseString(dummyKey + "=" + resourceKey).resolveWith(srcConfig); resourceKey = tmpConfig.getString(dummyKey); } String hoconInput = workUnit.getProp(CONVERTER_AVRO_REST_JSON_ENTRY_TEMPLATE); if (StringUtils.isEmpty(hoconInput)) { return new SingleRecordIterable<>( new RestEntry<>(resourceKey, parser.parse(inputRecord.toString()).getAsJsonObject())); } Config destConfig = ConfigFactory.parseString(hoconInput).resolveWith(srcConfig); JsonObject json = parser.parse(destConfig.root().render(ConfigRenderOptions.concise())).getAsJsonObject(); return new SingleRecordIterable<>(new RestEntry<>(resourceKey, json)); }
/** * Get a map of field names to default values for an Avro schema. * * @param avroRecordSchema The schema to get the map of field names to values. * @return The map. */ public static Map<String, Object> getDefaultValueMap(Schema avroRecordSchema) { List<Field> defaultFields = new ArrayList<Field>(); for (Field f : avroRecordSchema.getFields()) { if (f.defaultValue() != null) { // Need to create a new Field here or we will get // org.apache.avro.AvroRuntimeException: Field already used: // schemaVersion defaultFields.add(new Field(f.name(), f.schema(), f.doc(), f.defaultValue(), f.order())); } } Schema defaultSchema = Schema.createRecord(defaultFields); Schema emptyRecordSchema = Schema.createRecord(new ArrayList<Field>()); DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(emptyRecordSchema); DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(emptyRecordSchema, defaultSchema); GenericRecord emptyRecord = new GenericData.Record(emptyRecordSchema); GenericRecord defaultRecord = AvroUtils.readAvroEntity(AvroUtils.writeAvroEntity(emptyRecord, writer), reader); Map<String, Object> defaultValueMap = new HashMap<String, Object>(); for (Field f : defaultFields) { defaultValueMap.put(f.name(), defaultRecord.get(f.name())); } return defaultValueMap; }
private byte[] createAvroData(String name, int age, List<String> emails) throws IOException { String AVRO_SCHEMA = "{\n" + "\"type\": \"record\",\n" + "\"name\": \"Employee\",\n" + "\"fields\": [\n" + " {\"name\": \"name\", \"type\": \"string\"},\n" + " {\"name\": \"age\", \"type\": \"int\"},\n" + " {\"name\": \"emails\", \"type\": {\"type\": \"array\", \"items\": \"string\"}},\n" + " {\"name\": \"boss\", \"type\": [\"Employee\",\"null\"]}\n" + "]}"; Schema schema = new Schema.Parser().parse(AVRO_SCHEMA); ByteArrayOutputStream out = new ByteArrayOutputStream(); GenericRecord e1 = new GenericData.Record(schema); e1.put("name", name); e1.put("age", age); e1.put("emails", emails); e1.put("boss", null); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.create(schema, out); dataFileWriter.append(e1); dataFileWriter.close(); return out.toByteArray(); }
private GenericRecord createField(Schema schema) { GenericRecord field = new GenericData.Record(schema); field.put("name", schema.getName()); field.put("boost", 2.3f); field.put("omitNorms", true); field.put("omitTermFreqAndPositions", true); return field; }
@Override protected void processEvent(GenericRecord record, EventAggregator eventAggregator) { System.out.println(schemaName + "-Stream: " + record.toString()); long userId = (long) record.get("userId"); long time = (long) record.get("time"); String contactHash = record.get("contactHash").toString(); int msgLength = (int) record.get("msgLength"); eventAggregator.processSmsReceived(userId, time, contactHash, msgLength); }
@Override public void deserialize(byte[] data, LuceneWorksBuilder hydrator) { final ByteArrayInputStream inputStream = new ByteArrayInputStream(data); final int majorVersion = inputStream.read(); final int minorVersion = inputStream.read(); final Protocol protocol = protocols.getProtocol(majorVersion, minorVersion); Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(protocol.getType("Message")); GenericRecord result; try { result = reader.read(null, decoder); } catch (IOException e) { throw log.unableToDeserializeAvroStream(e); } classReferences = asListOfString(result, "classReferences"); final List<GenericRecord> operations = asListOfGenericRecords(result, "operations"); final ConversionContext conversionContext = new ContextualExceptionBridgeHelper(); for (GenericRecord operation : operations) { String schema = operation.getSchema().getName(); if ("OptimizeAll".equals(schema)) { hydrator.addOptimizeAll(); } else if ("PurgeAll".equals(schema)) { hydrator.addPurgeAllLuceneWork(asClass(operation, "class")); } else if ("Flush".equals(schema)) { hydrator.addFlush(); } else if ("Delete".equals(schema)) { processId(operation, hydrator); hydrator.addDeleteLuceneWork(asClass(operation, "class"), conversionContext); } else if ("DeleteByQuery".equals(schema)) { String entityClassName = asClass(operation, "class"); int queryKey = asInt(operation, "key"); DeleteByQuerySupport.StringToQueryMapper mapper = DeleteByQuerySupport.getStringToQueryMapper(queryKey); List<Utf8> stringList = asListOfString(operation, "query"); String[] query = new String[stringList.size()]; for (int i = 0; i < stringList.size(); ++i) { query[i] = stringList.get(i).toString(); } hydrator.addDeleteByQueryLuceneWork(entityClassName, mapper.fromString(query)); } else if ("Add".equals(schema)) { buildLuceneDocument(asGenericRecord(operation, "document"), hydrator); Map<String, String> analyzers = getAnalyzers(operation); processId(operation, hydrator); hydrator.addAddLuceneWork(asClass(operation, "class"), analyzers, conversionContext); } else if ("Update".equals(schema)) { buildLuceneDocument(asGenericRecord(operation, "document"), hydrator); Map<String, String> analyzers = getAnalyzers(operation); processId(operation, hydrator); hydrator.addUpdateLuceneWork(asClass(operation, "class"), analyzers, conversionContext); } else { throw log.cannotDeserializeOperation(schema); } } }
private Value decodeValue(final Object generic) { if (generic instanceof GenericRecord) { final GenericRecord record = (GenericRecord) generic; final Schema schema = record.getSchema(); if (schema.equals(Schemas.COMPRESSED_IDENTIFIER) || schema.equals(Schemas.PLAIN_IDENTIFIER)) { return decodeIdentifier(record); } } return decodeLiteral(generic); }
private Statement decodeStatement(final GenericRecord record) { final Resource subj = decodeIdentifier((GenericRecord) record.get(0)); final URI pred = (URI) decodeIdentifier((GenericRecord) record.get(1)); final Value obj = decodeValue(record.get(2)); final Resource ctx = decodeIdentifier((GenericRecord) record.get(3)); if (ctx == null) { return this.factory.createStatement(subj, pred, obj); } else { return this.factory.createStatement(subj, pred, obj, ctx); } }
private CommonRecord createCommonRecord(GenericRecord avroRecord) { GenericFixed uuidFixed = (GenericFixed) avroRecord.get(UUID); if (uuidFixed != null) { UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed); CommonRecord newRecord = commonFactory.createCommonRecord(uuid, avroRecord.getSchema()); records.put(uuid, newRecord); return newRecord; } else { return commonFactory.createCommonRecord(avroRecord.getSchema()); } }
private void assertEqualsWithGeneric(List<Bird> expected, List<GenericRecord> actual) { assertEquals(expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { Bird fixed = expected.get(i); GenericRecord generic = actual.get(i); assertEquals(fixed.number, generic.get("number")); assertEquals(fixed.quality, generic.get("quality").toString()); // From Avro util.Utf8 assertEquals(fixed.quantity, generic.get("quantity")); assertEquals(fixed.species, generic.get("species").toString()); } }
private GenericRecord convertRecord(String inputRecord) { Gson gson = new Gson(); JsonElement element = gson.fromJson(inputRecord, JsonElement.class); Map<String, Object> fields = gson.fromJson(element, FIELD_ENTRY_TYPE); GenericRecord outputRecord = new GenericData.Record(schema); for (Map.Entry<String, Object> entry : fields.entrySet()) { outputRecord.put(entry.getKey(), entry.getValue()); } return outputRecord; }
private void processId(GenericRecord operation, LuceneWorksBuilder hydrator) { GenericRecord id = (GenericRecord) operation.get("id"); Object value = id.get("value"); if (value instanceof ByteBuffer) { hydrator.addIdAsJavaSerialized(asByteArray((ByteBuffer) value)); } else if (value instanceof Utf8) { hydrator.addId(value.toString()); } else { // the rest are serialized objects hydrator.addId((Serializable) value); } }
public void execute(TridentTuple tuple, TridentCollector collector) { GenericRecord docEntry = new GenericData.Record(schema); docEntry.put("docid", tuple.getStringByField("documentId")); docEntry.put("time", Time.currentTimeMillis()); docEntry.put("line", tuple.getStringByField("document")); try { dataFileWriter.append(docEntry); dataFileWriter.flush(); } catch (IOException e) { LOG.error("Error writing to document record: " + e); throw new RuntimeException(e); } }
public void reduce( Utf8 key, Iterable<Long> values, AvroCollector<GenericRecord> collector, Reporter reporter) throws IOException { long sum = 0; for (Long val : values) { sum += val; } GenericRecord value = new GenericData.Record(OUTPUT_SCHEMA); value.put("shape", key); value.put("count", sum); collector.collect(value); }
@Override public void execute(Tuple inputTuple) { /* Processing tuples of the shape (DATASOURCE_ID, TIMESTAMP_FIELD, CONTENT_FIELD) */ // get datasource String datasource = inputTuple.getStringByField(RestIngestionSpout.DATASOURCE_ID); // compute month long timestamp = inputTuple.getLongByField(RestIngestionSpout.TIMESTAMP_FIELD); // this computation is completely stateless String month = timestampToMonth(timestamp); // now get the DataFileWriter DataFileWriter<GenericRecord> writer = null; try { writer = this.writersCache.get(DatasourceMonth.create(datasource, month)); } catch (ExecutionException ee) { LOGGER.error( "Error getting DataFileWriter for tuple for datasource " + datasource + " and timestamp " + timestamp + " : " + ee.getMessage()); this.collector.fail(inputTuple); return; } // create and write a new record GenericRecord newDataRecord = new GenericData.Record(AVRO_SCHEMA); newDataRecord.put(AVRO_TIMESTAMP_FIELD, new Long(timestamp)); newDataRecord.put( AVRO_CONTENT_FIELD, inputTuple.getStringByField(RestIngestionSpout.CONTENT_FIELD)); try { writer.append(newDataRecord); } catch (IOException ioe) { LOGGER.error( "Error writing Avro record for datasource " + datasource + " and timestamp " + timestamp + " : " + ioe.getMessage()); this.collector.fail(inputTuple); return; } // ACK processing for this tupe as ok this.collector.ack(inputTuple); }
private Object decodeNode(final Object generic) { if (generic instanceof GenericRecord) { final GenericRecord record = (GenericRecord) generic; final Schema schema = record.getSchema(); if (schema.equals(Schemas.RECORD)) { return decodeRecord(record, null); } else if (schema.equals(Schemas.PLAIN_IDENTIFIER) || schema.equals(Schemas.COMPRESSED_IDENTIFIER)) { return decodeIdentifier(record); } else if (schema.equals(Schemas.STATEMENT)) { return decodeStatement(record); } } return decodeLiteral(generic); }
public void validateAvroFile(File file) throws IOException { // read the events back using GenericRecord DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(file, reader); GenericRecord record = new GenericData.Record(fileReader.getSchema()); int numEvents = 0; while (fileReader.hasNext()) { fileReader.next(record); String bodyStr = record.get("message").toString(); System.out.println(bodyStr); numEvents++; } fileReader.close(); Assert.assertEquals("Should have found a total of 3 events", 3, numEvents); }
public Iterable<GenericRecord> convertRecord( Schema outputSchema, JsonElement inputRecord, WorkUnitState workUnit) { JsonElement element = GSON.fromJson(inputRecord, JsonElement.class); Map<String, Object> fields = GSON.fromJson(element, FIELD_ENTRY_TYPE); GenericRecord record = new GenericData.Record(outputSchema); for (Map.Entry<String, Object> entry : fields.entrySet()) { if (entry.getKey().equals("*")) { // switch '*' to 'content' since '*' is not a valid avro schema field name record.put(JSON_CONTENT_MEMBER, entry.getValue()); } else { record.put(entry.getKey(), entry.getValue()); } } return new SingleRecordIterable<GenericRecord>(record); }
public void testWrite() throws IOException { URL url = this.getClass().getClassLoader().getResource("input/Company.avsc"); assertNotNull(url); Schema schema = new Schema.Parser().parse(new File(url.getFile())); assertNotNull(schema); DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); // Another way of loading a file File file = new File("src/test/resources/input/companies.avro"); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, datumReader); File fileOut = new File("target/companies2.avro"); Schema schemaOut = new Schema.Parser().parse(new File("src/test/resources/input/Company2.avsc")); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schemaOut); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); GenericRecord company = null; int count = 0; while (dataFileReader.hasNext()) { company = dataFileReader.next(company); if (company.get("name").toString().equals("aol")) { dataFileWriter.create(schemaOut, fileOut); GenericRecord recordOut = new GenericData.Record(schemaOut); recordOut.put("id", company.get("id")); recordOut.put("name", company.get("name")); assertTrue(recordOut.getSchema().getField("address") != null); assertTrue(recordOut.getSchema().getField("employeeCount") == null); // address is of complex type GenericRecord address = new GenericData.Record((GenericData.Record) company.get("address"), true); recordOut.put("address", address); dataFileWriter.append(recordOut); count++; } } assertTrue(count > 0); dataFileWriter.close(); }
@BeforeClass public static void before() throws Exception { final String filePath = TestUtils.getFileFromResourceUrl( DictionariesTest.class.getClassLoader().getResource(AVRO_DATA)); if (INDEX_DIR.exists()) { FileUtils.deleteQuietly(INDEX_DIR); } final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns( new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test"); final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null); driver.init(config); driver.build(); final Schema schema = AvroUtils.extractSchemaFromAvro(new File(filePath)); final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath)); final org.apache.avro.Schema avroSchema = avroReader.getSchema(); final String[] columns = new String[avroSchema.getFields().size()]; int i = 0; for (final Field f : avroSchema.getFields()) { columns[i] = f.name(); i++; } uniqueEntries = new HashMap<String, Set<Object>>(); for (final String column : columns) { uniqueEntries.put(column, new HashSet<Object>()); } while (avroReader.hasNext()) { final GenericRecord rec = avroReader.next(); for (final String column : columns) { Object val = rec.get(column); if (val instanceof Utf8) { val = ((Utf8) val).toString(); } uniqueEntries .get(column) .add(getAppropriateType(schema.getFieldSpecFor(column).getDataType(), val)); } } }
@SuppressWarnings("unchecked") private Record decodeRecord( final GenericRecord generic, @Nullable final Set<URI> propertiesToDecode) { final Record record = Record.create(); final GenericRecord encodedID = (GenericRecord) generic.get(0); if (encodedID != null) { record.setID((URI) decodeIdentifier(encodedID)); } for (final GenericRecord prop : (Iterable<GenericRecord>) generic.get(1)) { final URI property = (URI) decodeIdentifier((GenericRecord) prop.get(0)); final List<Object> values = decodeNodes(prop.get(1)); if (propertiesToDecode == null || propertiesToDecode.contains(property)) { record.set(property, values); } } return record; }
@Test public void testRead_GenericReader() throws IOException { GenericRecord savedRecord = new GenericData.Record(schema); savedRecord.put("name", "John Doe"); savedRecord.put("age", 42); savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane")); populateGenericFile(Lists.newArrayList(savedRecord)); AvroFileReaderFactory<GenericData.Record> genericReader = new AvroFileReaderFactory<GenericData.Record>(Avros.generics(schema), new Configuration()); Iterator<GenericData.Record> recordIterator = genericReader.read( FileSystem.getLocal(new Configuration()), new Path(this.avroFile.getAbsolutePath())); GenericRecord genericRecord = recordIterator.next(); assertEquals(savedRecord, genericRecord); assertFalse(recordIterator.hasNext()); }
private Resource decodeIdentifier(final GenericRecord record) { final Schema schema = record.getSchema(); if (schema.equals(Schemas.COMPRESSED_IDENTIFIER)) { try { return this.dictionary.objectFor((Integer) record.get(0)); } catch (final IOException ex) { throw new IllegalStateException("Cannot access dictionary: " + ex.getMessage(), ex); } } else if (schema.equals(Schemas.PLAIN_IDENTIFIER)) { final String string = record.get(0).toString(); if (string.startsWith("_:")) { return this.factory.createBNode(string.substring(2)); } else { return this.factory.createURI(string); } } throw new IllegalArgumentException("Unsupported encoded identifier: " + record); }