// Get the schema for the Avro Record from the object container file public String getRecordSchema() throws IOException { Schema schema = AvroUtils.getAvroSchemaFromPath(getInputPath()); String recSchema = schema.toString(); return recSchema; }
@Test public void testBasicConversion() throws IOException { TestRunner runner = TestRunners.newTestRunner(ConvertCSVToAvro.class); runner.assertNotValid(); runner.setProperty(ConvertCSVToAvro.SCHEMA, SCHEMA.toString()); runner.assertValid(); runner.enqueue(streamFor(CSV_CONTENT)); runner.run(); long converted = runner.getCounterValue("Converted records"); long errors = runner.getCounterValue("Conversion errors"); Assert.assertEquals("Should convert 2 rows", 2, converted); Assert.assertEquals("Should reject 1 row", 1, errors); runner.assertTransferCount("success", 1); runner.assertTransferCount("failure", 0); runner.assertTransferCount("incompatible", 1); MockFlowFile incompatible = runner.getFlowFilesForRelationship("incompatible").get(0); String failureContent = new String(runner.getContentAsByteArray(incompatible), StandardCharsets.UTF_8); Assert.assertEquals("Should reject an invalid string and double", CSV_CONTENT, failureContent); Assert.assertEquals( "Should accumulate error messages", FAILURE_SUMMARY, incompatible.getAttribute("errors")); }
@Override public void createDestination() throws Exception { FileInputStream schemaIn = new FileInputStream(avsc); Schema original = new Schema.Parser().parse(schemaIn); schemaIn.close(); Schema evolved = getEvolvedSchema(original); FileOutputStream schemaOut = new FileOutputStream(evolvedAvsc); schemaOut.write(evolved.toString(true).getBytes()); schemaOut.close(); List<String> createArgs = Lists.newArrayList("create", dest, "-s", evolvedAvsc, "-r", repoUri, "-d", "target/data"); createArgs.addAll(getExtraCreateArgs()); TestUtil.run( LoggerFactory.getLogger(this.getClass()), "delete", dest, "-r", repoUri, "-d", "target/data"); TestUtil.run( LoggerFactory.getLogger(this.getClass()), createArgs.toArray(new String[createArgs.size()])); this.console = mock(Logger.class); this.command = new CopyCommand(console); command.setConf(new Configuration()); }
public void createAvroFile(File file, String codec, boolean useSchemaUrl) throws IOException { // serialize a few events using the reflection-based avro serializer OutputStream out = new FileOutputStream(file); Context ctx = new Context(); if (codec != null) { ctx.put("compressionCodec", codec); } Schema schema = Schema.createRecord("myrecord", null, null, false); schema.setFields( Arrays.asList( new Schema.Field[] { new Schema.Field("message", Schema.create(Schema.Type.STRING), null, null) })); GenericRecordBuilder recordBuilder = new GenericRecordBuilder(schema); File schemaFile = null; if (useSchemaUrl) { schemaFile = File.createTempFile(getClass().getSimpleName(), ".avsc"); Files.write(schema.toString(), schemaFile, Charsets.UTF_8); } EventSerializer.Builder builder = new AvroEventSerializer.Builder(); EventSerializer serializer = builder.build(ctx, out); serializer.afterCreate(); for (int i = 0; i < 3; i++) { GenericRecord record = recordBuilder.set("message", "Hello " + i).build(); Event event = EventBuilder.withBody(serializeAvro(record, schema)); if (schemaFile == null) { event.getHeaders().put(AvroEventSerializer.AVRO_SCHEMA_LITERAL_HEADER, schema.toString()); } else { event .getHeaders() .put( AvroEventSerializer.AVRO_SCHEMA_URL_HEADER, schemaFile.toURI().toURL().toExternalForm()); } serializer.write(event); } serializer.flush(); serializer.beforeClose(); out.flush(); out.close(); }
private Schema namespacelessSchemaFor(Class<?> type) { return schemaCache.computeIfAbsent( type, clazz -> { Schema schema = ReflectData.get().getSchema(clazz); // kind of a hack to set an empty namespace :) return new Schema.Parser().parse(schema.toString().replace(schema.getNamespace(), "")); }); }
private void writeObject(java.io.ObjectOutputStream out) throws IOException { out.defaultWriteObject(); if (userDefinedSchema != null) { byte[] json = userDefinedSchema.toString().getBytes(); out.writeInt(json.length); out.write(json); } else { out.writeInt(0); } }
/** * Checks that the {@code existing} {@link DatasetDescriptor} is compatible with {@code test}. * * @param existing the current {@code DatasetDescriptor} for a dataset * @param test a new {@code DatasetDescriptor} for the same dataset */ public static void checkCompatible(DatasetDescriptor existing, DatasetDescriptor test) { checkNotChanged("format", existing.getFormat(), test.getFormat()); checkNotChanged("partitioning", existing.isPartitioned(), test.isPartitioned()); if (existing.isPartitioned()) { checkNotChanged( "partition strategy", existing.getPartitionStrategy(), test.getPartitionStrategy()); } // check can read records written with old schema using new schema Schema oldSchema = existing.getSchema(); Schema testSchema = test.getSchema(); if (!SchemaValidationUtil.canRead(oldSchema, testSchema)) { throw new IncompatibleSchemaException( "Schema cannot read data " + "written using existing schema. Schema: " + testSchema.toString(true) + "\nExisting schema: " + oldSchema.toString(true)); } }
public <T> CompletableFuture<PublishingResponse> publish(Schema schema, T message, String topic) throws JsonProcessingException, UnsupportedEncodingException { PublishingData data = new PublishingData(new Record(message), null, null, schema.toString(), null); HttpCallback<PublishingResponse> callback = new HttpCallback(PublishingResponse.class); executePost( baseUri.resolve(topic), new StringEntity(mapper.writeValueAsString(data), ContentType.APPLICATION_JSON), callback); return callback; }
public AvroType( Class<T> typeClass, Schema schema, MapFn inputMapFn, MapFn outputMapFn, DeepCopier<T> deepCopier, PType... ptypes) { this.typeClass = typeClass; this.schema = Preconditions.checkNotNull(schema); this.schemaString = schema.toString(); this.baseInputMapFn = inputMapFn; this.baseOutputMapFn = outputMapFn; this.deepCopier = deepCopier; this.subTypes = ImmutableList.<PType>builder().add(ptypes).build(); }
Schema computeAvroSchema() { HashSet<String> observedSchemas = new HashSet<String>(); List<Schema> fields = new ArrayList<Schema>(); for (InferredType it : unionTypes) { Schema itS = it.getAvroSchema(); if (itS == null) { continue; } String schemaDesc = itS.toString(); if (!observedSchemas.contains(schemaDesc)) { observedSchemas.add(schemaDesc); fields.add(it.getAvroSchema()); } } return Schema.createUnion(fields); }
@Override public synchronized void onSchemaUpdated(Schema schema) { if (!ignoreNextUpdate) { try { byte[] schemaBuffer = schema.toString().getBytes("UTF-8"); ByteBuffer buffer = ByteBuffer.wrap(schemaBuffer); if (storage != null) { storage.saveSchema(buffer); } } catch (UnsupportedEncodingException ex) { LOG.error("Failed to save schema: ", ex); throw new SchemaRuntimeException("Failed to save schema"); } } else { ignoreNextUpdate = false; } }
@Test public void testBasicConversionNoErrors() throws IOException { TestRunner runner = TestRunners.newTestRunner(ConvertCSVToAvro.class); runner.assertNotValid(); runner.setProperty(ConvertCSVToAvro.SCHEMA, SCHEMA.toString()); runner.assertValid(); runner.enqueue(streamFor("1,green\n2,blue,\n3,grey,12.95")); runner.run(); long converted = runner.getCounterValue("Converted records"); long errors = runner.getCounterValue("Conversion errors"); Assert.assertEquals("Should convert 3 rows", 3, converted); Assert.assertEquals("Should reject 0 row", 0, errors); runner.assertTransferCount("success", 1); runner.assertTransferCount("failure", 0); runner.assertTransferCount("incompatible", 0); }
@Test public void testEmptyContent() throws IOException { TestRunner runner = TestRunners.newTestRunner(ConvertCSVToAvro.class); runner.assertNotValid(); runner.setProperty(ConvertCSVToAvro.SCHEMA, SCHEMA.toString()); runner.assertValid(); runner.enqueue(streamFor("")); runner.run(); long converted = runner.getCounterValue("Converted records"); long errors = runner.getCounterValue("Conversion errors"); Assert.assertEquals("Should convert 0 rows", 0, converted); Assert.assertEquals("Should reject 0 row", 0, errors); runner.assertTransferCount("success", 0); runner.assertTransferCount("failure", 1); runner.assertTransferCount("incompatible", 0); MockFlowFile incompatible = runner.getFlowFilesForRelationship("failure").get(0); Assert.assertEquals( "Should set an error message", "No incoming records", incompatible.getAttribute("errors")); }
@Test public void testAlternateCharset() throws IOException { TestRunner runner = TestRunners.newTestRunner(ConvertCSVToAvro.class); runner.setProperty(ConvertCSVToAvro.SCHEMA, SCHEMA.toString()); runner.setProperty(ConvertCSVToAvro.CHARSET, "utf16"); runner.assertValid(); runner.enqueue(streamFor(CSV_CONTENT, Charset.forName("UTF-16"))); runner.run(); long converted = runner.getCounterValue("Converted records"); long errors = runner.getCounterValue("Conversion errors"); Assert.assertEquals("Should convert 2 rows", 2, converted); Assert.assertEquals("Should reject 1 row", 1, errors); runner.assertTransferCount("success", 1); runner.assertTransferCount("failure", 0); runner.assertTransferCount("incompatible", 1); MockFlowFile incompatible = runner.getFlowFilesForRelationship("incompatible").get(0); Assert.assertEquals( "Should accumulate error messages", FAILURE_SUMMARY, incompatible.getAttribute("errors")); }
@Before public void beforeTest() throws IOException, DeltaCalculatorException, ControlServiceException { String dataSchema = OperationsServiceIT.getResourceAsString(OperationsServiceIT.DATA_SCHEMA_LOCATION); PROFILE_BYTES = avroConverter.encode(ENDPOINT_PROFILE); PROFILE_JSON = avroConverter.encodeToJson(ENDPOINT_PROFILE); tenant = new TenantDto(); tenant.setName(CUSTOMER_ID); tenant = userService.saveTenant(tenant); assertNotNull(tenant); assertNotNull(tenant.getId()); ApplicationDto applicationDto = new ApplicationDto(); applicationDto.setTenantId(tenant.getId()); applicationDto.setApplicationToken(APPLICATION_ID); applicationDto.setName(APPLICATION_NAME); applicationDto.setSequenceNumber(NEW_APPLICATION_SEQ_NUMBER); applicationDto = applicationService.saveApp(applicationDto); APP_TOKEN = applicationDto.getApplicationToken(); assertNotNull(applicationDto); assertNotNull(applicationDto.getId()); application = applicationService.findAppById(applicationDto.getId()); EndpointGroupDto groupAll = endpointService.findEndpointGroupsByAppId(application.getId()).get(0); CTLSchemaDto profileCtlSchema = new CTLSchemaDto(); CtlSchemaMetaInfoDto metaInfo = new CtlSchemaMetaInfoDto( BasicEndpointProfile.SCHEMA$.getFullName(), application.getTenantId(), application.getId()); profileCtlSchema.setMetaInfo(metaInfo); profileCtlSchema.setBody(BasicEndpointProfile.SCHEMA$.toString()); profileCtlSchema.setVersion(1); profileCtlSchema.setDependencySet(new HashSet<CTLSchemaDto>()); profileCtlSchema = ctlService.saveCtlSchema(profileCtlSchema); Schema schema = new Schema.Parser().parse(dataSchema); CTLSchemaDto confCtlSchema = new CTLSchemaDto(); CtlSchemaMetaInfoDto confMetaInfo = new CtlSchemaMetaInfoDto( schema.getFullName(), application.getTenantId(), application.getId()); confCtlSchema.setMetaInfo(confMetaInfo); confCtlSchema.setBody(schema.toString()); confCtlSchema.setVersion(CONF_SCHEMA_VERSION); confCtlSchema.setDependencySet(new HashSet<CTLSchemaDto>()); confCtlSchema = ctlService.saveCtlSchema(confCtlSchema); EndpointProfileSchemaDto profileSchemaObj = new EndpointProfileSchemaDto(); profileSchemaObj.setVersion(PROFILE_SCHEMA_VERSION); profileSchemaObj.setCtlSchemaId(profileCtlSchema.getId()); profileSchemaObj.setApplicationId(application.getId()); EndpointProfileSchemaDto profileSchemaDto = profileService.saveProfileSchema(profileSchemaObj); profileSchema = profileService.findProfileSchemaById(profileSchemaDto.getId()); EndpointGroupDto endpointGroup = new EndpointGroupDto(); endpointGroup.setApplicationId(application.getId()); endpointGroup.setName("Test group"); endpointGroup.setWeight(277); endpointGroup.setDescription("Test Description"); endpointGroup = endpointService.saveEndpointGroup(endpointGroup); ProfileFilterDto profileFilterObj = new ProfileFilterDto(); profileFilterObj.setApplicationId(application.getId()); profileFilterObj.setEndpointGroupId(endpointGroup.getId()); profileFilterObj.setBody("profileBody.contains(\"dummy\")"); profileFilterObj.setEndpointProfileSchemaId(profileSchema.getId()); profileFilter = profileService.saveProfileFilter(profileFilterObj); profileService.activateProfileFilter(profileFilter.getId(), null); confSchema = new ConfigurationSchemaDto(); confSchema.setApplicationId(application.getId()); confSchema.setVersion(CONF_SCHEMA_VERSION); confSchema.setCtlSchemaId(confCtlSchema.getId()); try { confSchema = configurationService.saveConfSchema(confSchema); } catch (IncorrectParameterException e) { Assert.fail("Can't generate schemas"); } Assert.assertNotNull(confSchema); Assert.assertNotNull(confSchema.getId()); egAllId = groupAll.getId(); pfAllId = profileFilter.getId(); ConfigurationDto confDto = configurationService.findConfigurationByEndpointGroupIdAndVersion( egAllId, CONF_SCHEMA_VERSION); cfAllId = confDto.getId(); endpointConfiguration = new EndpointConfigurationDto(); endpointConfiguration.setConfiguration(confDto.getBody().getBytes(UTF_8)); endpointConfiguration.setConfigurationHash( EndpointObjectHash.fromSha1(confDto.getBody()).getData()); endpointConfiguration = endpointService.saveEndpointConfiguration(endpointConfiguration); assertNotNull(endpointConfiguration); EndpointGroupStateDto egs = new EndpointGroupStateDto(); egs.setConfigurationId(cfAllId); egs.setEndpointGroupId(egAllId); egs.setProfileFilterId(pfAllId); endpointProfile = new EndpointProfileDto(); endpointProfile.setApplicationId(application.getId()); endpointProfile.setEndpointKeyHash(Base64Utils.decodeFromString("EndpointId")); endpointProfile.setClientProfileBody(PROFILE_JSON); endpointProfile.setProfileHash(EndpointObjectHash.fromSha1(PROFILE_BYTES).getData()); endpointProfile.setConfigurationHash(endpointConfiguration.getConfigurationHash()); endpointProfile.setConfigurationVersion(CONF_SCHEMA_VERSION); endpointProfile.setClientProfileVersion(PROFILE_VERSION); endpointProfile.setGroupState(Collections.singletonList(egs)); endpointProfile = endpointService.saveEndpointProfile(endpointProfile); assertNotNull(endpointProfile); assertNotNull(endpointProfile.getId()); }
@Override public <E> Dataset<E> update(String name, DatasetDescriptor descriptor) { Preconditions.checkArgument(name != null, "Dataset name cannot be null"); Preconditions.checkArgument(descriptor != null, "DatasetDescriptro cannot be null"); DatasetDescriptor oldDescriptor = metadataProvider.load(name); // oldDescriptor is valid if load didn't throw NoSuchDatasetException if (!oldDescriptor.getFormat().equals(descriptor.getFormat())) { throw new DatasetRepositoryException( "Cannot change dataset format from " + oldDescriptor.getFormat() + " to " + descriptor.getFormat()); } final URI oldLocation = oldDescriptor.getLocation(); if ((oldLocation != null) && !(oldLocation.equals(descriptor.getLocation()))) { throw new DatasetRepositoryException("Cannot change the dataset's location"); } if (oldDescriptor.isPartitioned() != descriptor.isPartitioned()) { throw new DatasetRepositoryException( "Cannot change an unpartitioned dataset to " + " partitioned or vice versa."); } else if (oldDescriptor.isPartitioned() && descriptor.isPartitioned() && !oldDescriptor.getPartitionStrategy().equals(descriptor.getPartitionStrategy())) { throw new DatasetRepositoryException( "Cannot change partition strategy from " + oldDescriptor.getPartitionStrategy() + " to " + descriptor.getPartitionStrategy()); } // check can read records written with old schema using new schema final Schema oldSchema = oldDescriptor.getSchema(); final Schema newSchema = descriptor.getSchema(); if (!SchemaValidationUtil.canRead(oldSchema, newSchema)) { throw new IncompatibleSchemaException( "New schema cannot read data " + "written using " + "old schema. New schema: " + newSchema.toString(true) + "\nOld schema: " + oldSchema.toString(true)); } DatasetDescriptor updatedDescriptor = metadataProvider.update(name, descriptor); updatedDescriptor = addRepositoryUri(updatedDescriptor); logger.debug( "Updated dataset:{} schema:{} datasetPath:{}", new Object[] { name, updatedDescriptor.getSchema(), updatedDescriptor.getLocation().toString() }); return new FileSystemDataset.Builder() .name(name) .configuration(conf) .descriptor(updatedDescriptor) .partitionKey( updatedDescriptor.isPartitioned() ? org.kitesdk.data.impl.Accessor.getDefault().newPartitionKey() : null) .partitionListener(getPartitionListener()) .build(); }
public AvroDeepCopier(Schema schema) { this.jsonSchema = schema.toString(); }
@Override public BaseSchema createSchema(Schema schema) { return getSchemaFactory().createBaseSchema(schema.toString()); }