// Get the schema for the Avro Record from the object container file
  public String getRecordSchema() throws IOException {
    Schema schema = AvroUtils.getAvroSchemaFromPath(getInputPath());

    String recSchema = schema.toString();

    return recSchema;
  }
예제 #2
0
  @Test
  public void testBasicConversion() throws IOException {
    TestRunner runner = TestRunners.newTestRunner(ConvertCSVToAvro.class);
    runner.assertNotValid();
    runner.setProperty(ConvertCSVToAvro.SCHEMA, SCHEMA.toString());
    runner.assertValid();

    runner.enqueue(streamFor(CSV_CONTENT));
    runner.run();

    long converted = runner.getCounterValue("Converted records");
    long errors = runner.getCounterValue("Conversion errors");
    Assert.assertEquals("Should convert 2 rows", 2, converted);
    Assert.assertEquals("Should reject 1 row", 1, errors);

    runner.assertTransferCount("success", 1);
    runner.assertTransferCount("failure", 0);
    runner.assertTransferCount("incompatible", 1);

    MockFlowFile incompatible = runner.getFlowFilesForRelationship("incompatible").get(0);
    String failureContent =
        new String(runner.getContentAsByteArray(incompatible), StandardCharsets.UTF_8);
    Assert.assertEquals("Should reject an invalid string and double", CSV_CONTENT, failureContent);
    Assert.assertEquals(
        "Should accumulate error messages", FAILURE_SUMMARY, incompatible.getAttribute("errors"));
  }
  @Override
  public void createDestination() throws Exception {
    FileInputStream schemaIn = new FileInputStream(avsc);
    Schema original = new Schema.Parser().parse(schemaIn);
    schemaIn.close();

    Schema evolved = getEvolvedSchema(original);

    FileOutputStream schemaOut = new FileOutputStream(evolvedAvsc);
    schemaOut.write(evolved.toString(true).getBytes());
    schemaOut.close();

    List<String> createArgs =
        Lists.newArrayList("create", dest, "-s", evolvedAvsc, "-r", repoUri, "-d", "target/data");
    createArgs.addAll(getExtraCreateArgs());

    TestUtil.run(
        LoggerFactory.getLogger(this.getClass()),
        "delete",
        dest,
        "-r",
        repoUri,
        "-d",
        "target/data");
    TestUtil.run(
        LoggerFactory.getLogger(this.getClass()),
        createArgs.toArray(new String[createArgs.size()]));
    this.console = mock(Logger.class);
    this.command = new CopyCommand(console);
    command.setConf(new Configuration());
  }
  public void createAvroFile(File file, String codec, boolean useSchemaUrl) throws IOException {

    // serialize a few events using the reflection-based avro serializer
    OutputStream out = new FileOutputStream(file);

    Context ctx = new Context();
    if (codec != null) {
      ctx.put("compressionCodec", codec);
    }

    Schema schema = Schema.createRecord("myrecord", null, null, false);
    schema.setFields(
        Arrays.asList(
            new Schema.Field[] {
              new Schema.Field("message", Schema.create(Schema.Type.STRING), null, null)
            }));
    GenericRecordBuilder recordBuilder = new GenericRecordBuilder(schema);
    File schemaFile = null;
    if (useSchemaUrl) {
      schemaFile = File.createTempFile(getClass().getSimpleName(), ".avsc");
      Files.write(schema.toString(), schemaFile, Charsets.UTF_8);
    }

    EventSerializer.Builder builder = new AvroEventSerializer.Builder();
    EventSerializer serializer = builder.build(ctx, out);

    serializer.afterCreate();
    for (int i = 0; i < 3; i++) {
      GenericRecord record = recordBuilder.set("message", "Hello " + i).build();
      Event event = EventBuilder.withBody(serializeAvro(record, schema));
      if (schemaFile == null) {
        event.getHeaders().put(AvroEventSerializer.AVRO_SCHEMA_LITERAL_HEADER, schema.toString());
      } else {
        event
            .getHeaders()
            .put(
                AvroEventSerializer.AVRO_SCHEMA_URL_HEADER,
                schemaFile.toURI().toURL().toExternalForm());
      }
      serializer.write(event);
    }
    serializer.flush();
    serializer.beforeClose();
    out.flush();
    out.close();
  }
예제 #5
0
 private Schema namespacelessSchemaFor(Class<?> type) {
   return schemaCache.computeIfAbsent(
       type,
       clazz -> {
         Schema schema = ReflectData.get().getSchema(clazz);
         // kind of a hack to set an empty namespace :)
         return new Schema.Parser().parse(schema.toString().replace(schema.getNamespace(), ""));
       });
 }
예제 #6
0
  private void writeObject(java.io.ObjectOutputStream out) throws IOException {
    out.defaultWriteObject();

    if (userDefinedSchema != null) {
      byte[] json = userDefinedSchema.toString().getBytes();
      out.writeInt(json.length);
      out.write(json);
    } else {
      out.writeInt(0);
    }
  }
예제 #7
0
  /**
   * Checks that the {@code existing} {@link DatasetDescriptor} is compatible with {@code test}.
   *
   * @param existing the current {@code DatasetDescriptor} for a dataset
   * @param test a new {@code DatasetDescriptor} for the same dataset
   */
  public static void checkCompatible(DatasetDescriptor existing, DatasetDescriptor test) {
    checkNotChanged("format", existing.getFormat(), test.getFormat());

    checkNotChanged("partitioning", existing.isPartitioned(), test.isPartitioned());

    if (existing.isPartitioned()) {
      checkNotChanged(
          "partition strategy", existing.getPartitionStrategy(), test.getPartitionStrategy());
    }

    // check can read records written with old schema using new schema
    Schema oldSchema = existing.getSchema();
    Schema testSchema = test.getSchema();
    if (!SchemaValidationUtil.canRead(oldSchema, testSchema)) {
      throw new IncompatibleSchemaException(
          "Schema cannot read data "
              + "written using existing schema. Schema: "
              + testSchema.toString(true)
              + "\nExisting schema: "
              + oldSchema.toString(true));
    }
  }
예제 #8
0
  public <T> CompletableFuture<PublishingResponse> publish(Schema schema, T message, String topic)
      throws JsonProcessingException, UnsupportedEncodingException {

    PublishingData data =
        new PublishingData(new Record(message), null, null, schema.toString(), null);
    HttpCallback<PublishingResponse> callback = new HttpCallback(PublishingResponse.class);

    executePost(
        baseUri.resolve(topic),
        new StringEntity(mapper.writeValueAsString(data), ContentType.APPLICATION_JSON),
        callback);

    return callback;
  }
예제 #9
0
파일: AvroType.java 프로젝트: poses/crunch
 public AvroType(
     Class<T> typeClass,
     Schema schema,
     MapFn inputMapFn,
     MapFn outputMapFn,
     DeepCopier<T> deepCopier,
     PType... ptypes) {
   this.typeClass = typeClass;
   this.schema = Preconditions.checkNotNull(schema);
   this.schemaString = schema.toString();
   this.baseInputMapFn = inputMapFn;
   this.baseOutputMapFn = outputMapFn;
   this.deepCopier = deepCopier;
   this.subTypes = ImmutableList.<PType>builder().add(ptypes).build();
 }
예제 #10
0
 Schema computeAvroSchema() {
   HashSet<String> observedSchemas = new HashSet<String>();
   List<Schema> fields = new ArrayList<Schema>();
   for (InferredType it : unionTypes) {
     Schema itS = it.getAvroSchema();
     if (itS == null) {
       continue;
     }
     String schemaDesc = itS.toString();
     if (!observedSchemas.contains(schemaDesc)) {
       observedSchemas.add(schemaDesc);
       fields.add(it.getAvroSchema());
     }
   }
   return Schema.createUnion(fields);
 }
 @Override
 public synchronized void onSchemaUpdated(Schema schema) {
   if (!ignoreNextUpdate) {
     try {
       byte[] schemaBuffer = schema.toString().getBytes("UTF-8");
       ByteBuffer buffer = ByteBuffer.wrap(schemaBuffer);
       if (storage != null) {
         storage.saveSchema(buffer);
       }
     } catch (UnsupportedEncodingException ex) {
       LOG.error("Failed to save schema: ", ex);
       throw new SchemaRuntimeException("Failed to save schema");
     }
   } else {
     ignoreNextUpdate = false;
   }
 }
예제 #12
0
  @Test
  public void testBasicConversionNoErrors() throws IOException {
    TestRunner runner = TestRunners.newTestRunner(ConvertCSVToAvro.class);
    runner.assertNotValid();
    runner.setProperty(ConvertCSVToAvro.SCHEMA, SCHEMA.toString());
    runner.assertValid();

    runner.enqueue(streamFor("1,green\n2,blue,\n3,grey,12.95"));
    runner.run();

    long converted = runner.getCounterValue("Converted records");
    long errors = runner.getCounterValue("Conversion errors");
    Assert.assertEquals("Should convert 3 rows", 3, converted);
    Assert.assertEquals("Should reject 0 row", 0, errors);

    runner.assertTransferCount("success", 1);
    runner.assertTransferCount("failure", 0);
    runner.assertTransferCount("incompatible", 0);
  }
예제 #13
0
  @Test
  public void testEmptyContent() throws IOException {
    TestRunner runner = TestRunners.newTestRunner(ConvertCSVToAvro.class);
    runner.assertNotValid();
    runner.setProperty(ConvertCSVToAvro.SCHEMA, SCHEMA.toString());
    runner.assertValid();

    runner.enqueue(streamFor(""));
    runner.run();

    long converted = runner.getCounterValue("Converted records");
    long errors = runner.getCounterValue("Conversion errors");
    Assert.assertEquals("Should convert 0 rows", 0, converted);
    Assert.assertEquals("Should reject 0 row", 0, errors);

    runner.assertTransferCount("success", 0);
    runner.assertTransferCount("failure", 1);
    runner.assertTransferCount("incompatible", 0);

    MockFlowFile incompatible = runner.getFlowFilesForRelationship("failure").get(0);
    Assert.assertEquals(
        "Should set an error message", "No incoming records", incompatible.getAttribute("errors"));
  }
예제 #14
0
  @Test
  public void testAlternateCharset() throws IOException {
    TestRunner runner = TestRunners.newTestRunner(ConvertCSVToAvro.class);
    runner.setProperty(ConvertCSVToAvro.SCHEMA, SCHEMA.toString());
    runner.setProperty(ConvertCSVToAvro.CHARSET, "utf16");
    runner.assertValid();

    runner.enqueue(streamFor(CSV_CONTENT, Charset.forName("UTF-16")));
    runner.run();

    long converted = runner.getCounterValue("Converted records");
    long errors = runner.getCounterValue("Conversion errors");
    Assert.assertEquals("Should convert 2 rows", 2, converted);
    Assert.assertEquals("Should reject 1 row", 1, errors);

    runner.assertTransferCount("success", 1);
    runner.assertTransferCount("failure", 0);
    runner.assertTransferCount("incompatible", 1);

    MockFlowFile incompatible = runner.getFlowFilesForRelationship("incompatible").get(0);
    Assert.assertEquals(
        "Should accumulate error messages", FAILURE_SUMMARY, incompatible.getAttribute("errors"));
  }
예제 #15
0
  @Before
  public void beforeTest() throws IOException, DeltaCalculatorException, ControlServiceException {
    String dataSchema =
        OperationsServiceIT.getResourceAsString(OperationsServiceIT.DATA_SCHEMA_LOCATION);
    PROFILE_BYTES = avroConverter.encode(ENDPOINT_PROFILE);
    PROFILE_JSON = avroConverter.encodeToJson(ENDPOINT_PROFILE);

    tenant = new TenantDto();
    tenant.setName(CUSTOMER_ID);
    tenant = userService.saveTenant(tenant);
    assertNotNull(tenant);
    assertNotNull(tenant.getId());

    ApplicationDto applicationDto = new ApplicationDto();
    applicationDto.setTenantId(tenant.getId());
    applicationDto.setApplicationToken(APPLICATION_ID);
    applicationDto.setName(APPLICATION_NAME);
    applicationDto.setSequenceNumber(NEW_APPLICATION_SEQ_NUMBER);
    applicationDto = applicationService.saveApp(applicationDto);
    APP_TOKEN = applicationDto.getApplicationToken();
    assertNotNull(applicationDto);
    assertNotNull(applicationDto.getId());

    application = applicationService.findAppById(applicationDto.getId());

    EndpointGroupDto groupAll =
        endpointService.findEndpointGroupsByAppId(application.getId()).get(0);

    CTLSchemaDto profileCtlSchema = new CTLSchemaDto();
    CtlSchemaMetaInfoDto metaInfo =
        new CtlSchemaMetaInfoDto(
            BasicEndpointProfile.SCHEMA$.getFullName(),
            application.getTenantId(),
            application.getId());
    profileCtlSchema.setMetaInfo(metaInfo);
    profileCtlSchema.setBody(BasicEndpointProfile.SCHEMA$.toString());
    profileCtlSchema.setVersion(1);
    profileCtlSchema.setDependencySet(new HashSet<CTLSchemaDto>());
    profileCtlSchema = ctlService.saveCtlSchema(profileCtlSchema);

    Schema schema = new Schema.Parser().parse(dataSchema);
    CTLSchemaDto confCtlSchema = new CTLSchemaDto();
    CtlSchemaMetaInfoDto confMetaInfo =
        new CtlSchemaMetaInfoDto(
            schema.getFullName(), application.getTenantId(), application.getId());
    confCtlSchema.setMetaInfo(confMetaInfo);
    confCtlSchema.setBody(schema.toString());
    confCtlSchema.setVersion(CONF_SCHEMA_VERSION);
    confCtlSchema.setDependencySet(new HashSet<CTLSchemaDto>());
    confCtlSchema = ctlService.saveCtlSchema(confCtlSchema);

    EndpointProfileSchemaDto profileSchemaObj = new EndpointProfileSchemaDto();
    profileSchemaObj.setVersion(PROFILE_SCHEMA_VERSION);
    profileSchemaObj.setCtlSchemaId(profileCtlSchema.getId());
    profileSchemaObj.setApplicationId(application.getId());
    EndpointProfileSchemaDto profileSchemaDto = profileService.saveProfileSchema(profileSchemaObj);

    profileSchema = profileService.findProfileSchemaById(profileSchemaDto.getId());

    EndpointGroupDto endpointGroup = new EndpointGroupDto();
    endpointGroup.setApplicationId(application.getId());
    endpointGroup.setName("Test group");
    endpointGroup.setWeight(277);
    endpointGroup.setDescription("Test Description");
    endpointGroup = endpointService.saveEndpointGroup(endpointGroup);

    ProfileFilterDto profileFilterObj = new ProfileFilterDto();
    profileFilterObj.setApplicationId(application.getId());
    profileFilterObj.setEndpointGroupId(endpointGroup.getId());
    profileFilterObj.setBody("profileBody.contains(\"dummy\")");
    profileFilterObj.setEndpointProfileSchemaId(profileSchema.getId());
    profileFilter = profileService.saveProfileFilter(profileFilterObj);
    profileService.activateProfileFilter(profileFilter.getId(), null);

    confSchema = new ConfigurationSchemaDto();
    confSchema.setApplicationId(application.getId());
    confSchema.setVersion(CONF_SCHEMA_VERSION);
    confSchema.setCtlSchemaId(confCtlSchema.getId());

    try {
      confSchema = configurationService.saveConfSchema(confSchema);
    } catch (IncorrectParameterException e) {
      Assert.fail("Can't generate schemas");
    }
    Assert.assertNotNull(confSchema);
    Assert.assertNotNull(confSchema.getId());

    egAllId = groupAll.getId();
    pfAllId = profileFilter.getId();
    ConfigurationDto confDto =
        configurationService.findConfigurationByEndpointGroupIdAndVersion(
            egAllId, CONF_SCHEMA_VERSION);
    cfAllId = confDto.getId();

    endpointConfiguration = new EndpointConfigurationDto();
    endpointConfiguration.setConfiguration(confDto.getBody().getBytes(UTF_8));
    endpointConfiguration.setConfigurationHash(
        EndpointObjectHash.fromSha1(confDto.getBody()).getData());
    endpointConfiguration = endpointService.saveEndpointConfiguration(endpointConfiguration);
    assertNotNull(endpointConfiguration);

    EndpointGroupStateDto egs = new EndpointGroupStateDto();
    egs.setConfigurationId(cfAllId);
    egs.setEndpointGroupId(egAllId);
    egs.setProfileFilterId(pfAllId);

    endpointProfile = new EndpointProfileDto();
    endpointProfile.setApplicationId(application.getId());
    endpointProfile.setEndpointKeyHash(Base64Utils.decodeFromString("EndpointId"));
    endpointProfile.setClientProfileBody(PROFILE_JSON);
    endpointProfile.setProfileHash(EndpointObjectHash.fromSha1(PROFILE_BYTES).getData());
    endpointProfile.setConfigurationHash(endpointConfiguration.getConfigurationHash());
    endpointProfile.setConfigurationVersion(CONF_SCHEMA_VERSION);
    endpointProfile.setClientProfileVersion(PROFILE_VERSION);
    endpointProfile.setGroupState(Collections.singletonList(egs));
    endpointProfile = endpointService.saveEndpointProfile(endpointProfile);
    assertNotNull(endpointProfile);
    assertNotNull(endpointProfile.getId());
  }
  @Override
  public <E> Dataset<E> update(String name, DatasetDescriptor descriptor) {
    Preconditions.checkArgument(name != null, "Dataset name cannot be null");
    Preconditions.checkArgument(descriptor != null, "DatasetDescriptro cannot be null");

    DatasetDescriptor oldDescriptor = metadataProvider.load(name);

    // oldDescriptor is valid if load didn't throw NoSuchDatasetException

    if (!oldDescriptor.getFormat().equals(descriptor.getFormat())) {
      throw new DatasetRepositoryException(
          "Cannot change dataset format from "
              + oldDescriptor.getFormat()
              + " to "
              + descriptor.getFormat());
    }

    final URI oldLocation = oldDescriptor.getLocation();
    if ((oldLocation != null) && !(oldLocation.equals(descriptor.getLocation()))) {
      throw new DatasetRepositoryException("Cannot change the dataset's location");
    }

    if (oldDescriptor.isPartitioned() != descriptor.isPartitioned()) {
      throw new DatasetRepositoryException(
          "Cannot change an unpartitioned dataset to " + " partitioned or vice versa.");
    } else if (oldDescriptor.isPartitioned()
        && descriptor.isPartitioned()
        && !oldDescriptor.getPartitionStrategy().equals(descriptor.getPartitionStrategy())) {
      throw new DatasetRepositoryException(
          "Cannot change partition strategy from "
              + oldDescriptor.getPartitionStrategy()
              + " to "
              + descriptor.getPartitionStrategy());
    }

    // check can read records written with old schema using new schema
    final Schema oldSchema = oldDescriptor.getSchema();
    final Schema newSchema = descriptor.getSchema();
    if (!SchemaValidationUtil.canRead(oldSchema, newSchema)) {
      throw new IncompatibleSchemaException(
          "New schema cannot read data "
              + "written using "
              + "old schema. New schema: "
              + newSchema.toString(true)
              + "\nOld schema: "
              + oldSchema.toString(true));
    }

    DatasetDescriptor updatedDescriptor = metadataProvider.update(name, descriptor);
    updatedDescriptor = addRepositoryUri(updatedDescriptor);

    logger.debug(
        "Updated dataset:{} schema:{} datasetPath:{}",
        new Object[] {
          name, updatedDescriptor.getSchema(), updatedDescriptor.getLocation().toString()
        });

    return new FileSystemDataset.Builder()
        .name(name)
        .configuration(conf)
        .descriptor(updatedDescriptor)
        .partitionKey(
            updatedDescriptor.isPartitioned()
                ? org.kitesdk.data.impl.Accessor.getDefault().newPartitionKey()
                : null)
        .partitionListener(getPartitionListener())
        .build();
  }
예제 #17
0
 public AvroDeepCopier(Schema schema) {
   this.jsonSchema = schema.toString();
 }
예제 #18
0
 @Override
 public BaseSchema createSchema(Schema schema) {
   return getSchemaFactory().createBaseSchema(schema.toString());
 }