/**
  * Wrap an avro schema as a nullable union if needed. For instance, wrap schema "int" as ["null",
  * "int"]
  */
 public static Schema wrapAsUnion(Schema schema, boolean nullable) {
   if (nullable) {
     /* if schema is an acceptable union, then return itself */
     if (schema.getType().equals(Schema.Type.UNION) && isAcceptableUnion(schema)) return schema;
     else return Schema.createUnion(Arrays.asList(NullSchema, schema));
   } else
     /*do not wrap it if not */
     return schema;
 }
Exemple #2
0
 public static Schema getAvroSchema(FieldType type) {
   switch (type) {
     case STRING:
       return Schema.create(Schema.Type.STRING);
     case BINARY:
       return Schema.create(Schema.Type.BYTES);
     case DOUBLE:
     case DECIMAL:
       return Schema.create(Schema.Type.DOUBLE);
     case BOOLEAN:
       return Schema.create(Schema.Type.BOOLEAN);
     case DATE:
     case TIME:
     case INTEGER:
       return Schema.create(Schema.Type.INT);
     case LONG:
     case TIMESTAMP:
       return Schema.create(Schema.Type.LONG);
     default:
       if (type.isMap()) {
         Schema union =
             Schema.createUnion(
                 ImmutableList.of(
                     Schema.create(Schema.Type.NULL), getAvroSchema(type.getMapValueType())));
         return Schema.createMap(union);
       }
       if (type.isArray()) {
         Schema union =
             Schema.createUnion(
                 ImmutableList.of(
                     Schema.create(Schema.Type.NULL), getAvroSchema(type.getArrayElementType())));
         return Schema.createArray(union);
       }
       throw new IllegalStateException();
   }
 }
 Schema computeAvroSchema() {
   HashSet<String> observedSchemas = new HashSet<String>();
   List<Schema> fields = new ArrayList<Schema>();
   for (InferredType it : unionTypes) {
     Schema itS = it.getAvroSchema();
     if (itS == null) {
       continue;
     }
     String schemaDesc = itS.toString();
     if (!observedSchemas.contains(schemaDesc)) {
       observedSchemas.add(schemaDesc);
       fields.add(it.getAvroSchema());
     }
   }
   return Schema.createUnion(fields);
 }
    static {
      Schemas.STRING_LANG.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field("label", Schemas.STRING, null, null),
              new Schema.Field("language", Schemas.STRING, null, null)));
      Schemas.SHORT.setFields(
          ImmutableList.<Schema.Field>of(new Schema.Field("short", Schemas.INT, null, null)));
      Schemas.BYTE.setFields(
          ImmutableList.<Schema.Field>of(new Schema.Field("byte", Schemas.INT, null, null)));
      Schemas.BIGINTEGER.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field("biginteger", Schemas.STRING, null, null)));
      Schemas.BIGDECIMAL.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field("bigdecimal", Schemas.STRING, null, null)));
      Schemas.PLAIN_IDENTIFIER.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field("identifier", Schemas.STRING, null, null)));
      Schemas.COMPRESSED_IDENTIFIER.setFields(
          ImmutableList.<Schema.Field>of(new Schema.Field("identifier", Schemas.INT, null, null)));
      Schemas.CALENDAR.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field("timezone", Schemas.INT, null, null),
              new Schema.Field("timestamp", Schemas.LONG, null, null)));

      Schemas.STATEMENT.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field("subject", Schemas.IDENTIFIER, null, null),
              new Schema.Field("predicate", Schemas.IDENTIFIER, null, null),
              new Schema.Field(
                  "object",
                  Schema.createUnion(
                      ImmutableList.<Schema>of(
                          Schemas.BOOLEAN,
                          Schemas.STRING,
                          Schemas.STRING_LANG,
                          Schemas.LONG,
                          Schemas.INT,
                          Schemas.SHORT,
                          Schemas.BYTE,
                          Schemas.DOUBLE,
                          Schemas.FLOAT,
                          Schemas.BIGINTEGER,
                          Schemas.BIGDECIMAL,
                          Schemas.CALENDAR,
                          Schemas.PLAIN_IDENTIFIER,
                          Schemas.COMPRESSED_IDENTIFIER)),
                  null,
                  null), //
              new Schema.Field("context", Schemas.IDENTIFIER, null, null)));

      Schemas.PROPERTY.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field("propertyURI", Schemas.COMPRESSED_IDENTIFIER, null, null),
              new Schema.Field(
                  "propertyValue",
                  Schema.createUnion(
                      ImmutableList.<Schema>of(
                          Schemas.BOOLEAN,
                          Schemas.STRING,
                          Schemas.STRING_LANG,
                          Schemas.LONG,
                          Schemas.INT,
                          Schemas.SHORT,
                          Schemas.BYTE,
                          Schemas.DOUBLE,
                          Schemas.FLOAT,
                          Schemas.BIGINTEGER,
                          Schemas.BIGDECIMAL,
                          Schemas.CALENDAR,
                          Schemas.PLAIN_IDENTIFIER,
                          Schemas.COMPRESSED_IDENTIFIER,
                          Schemas.STATEMENT,
                          Schemas.RECORD,
                          Schemas.LIST)),
                  null,
                  null)));

      Schemas.RECORD.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field(
                  "id",
                  Schema.createUnion(
                      ImmutableList.<Schema>of(
                          Schemas.NULL, Schemas.PLAIN_IDENTIFIER, Schemas.COMPRESSED_IDENTIFIER)),
                  null,
                  null), //
              new Schema.Field("properties", Schema.createArray(Schemas.PROPERTY), null, null)));
    }
  private static final class Schemas {

    /** The namespace for KS-specific AVRO schemas. */
    public static final String NAMESPACE = "eu.fbk.knowledgestore";

    /** AVRO schema for NULL. */
    public static final Schema NULL = Schema.create(Schema.Type.NULL);

    /** AVRO schema for boolean literals. */
    public static final Schema BOOLEAN = Schema.create(Schema.Type.BOOLEAN);

    /** AVRO schema for string literals. */
    public static final Schema STRING = Schema.create(Schema.Type.STRING);

    /** AVRO schema for string literals with a language. */
    public static final Schema STRING_LANG =
        Schema.createRecord("stringlang", null, Schemas.NAMESPACE, false);

    /** AVRO schema for long literals. */
    public static final Schema LONG = Schema.create(Schema.Type.LONG);

    /** AVRO schema for int literals. */
    public static final Schema INT = Schema.create(Schema.Type.INT);

    /** AVRO schema for short literals. */
    public static final Schema SHORT = Schema.createRecord("short", null, Schemas.NAMESPACE, false);

    /** AVRO schema for byte literals. */
    public static final Schema BYTE = Schema.createRecord("byte", null, Schemas.NAMESPACE, false);

    /** AVRO schema for double literals. */
    public static final Schema DOUBLE = Schema.create(Schema.Type.DOUBLE);

    /** AVRO schema for float literals. */
    public static final Schema FLOAT = Schema.create(Schema.Type.FLOAT);

    /** AVRO schema for big integer literals. */
    public static final Schema BIGINTEGER =
        Schema.createRecord("biginteger", null, Schemas.NAMESPACE, false);

    /** AVRO schema for big decimal literals. */
    public static final Schema BIGDECIMAL =
        Schema.createRecord("bigdecimal", null, Schemas.NAMESPACE, false);

    /** AVRO schema for non-compressed IDs (URIs, BNodes). */
    public static final Schema PLAIN_IDENTIFIER =
        Schema //
            .createRecord("plainidentifier", null, Schemas.NAMESPACE, false);

    /** AVRO schema for compressed ID (URIs, BNodes). */
    public static final Schema COMPRESSED_IDENTIFIER =
        Schema //
            .createRecord("compressedidentifier", null, Schemas.NAMESPACE, false);

    /** AVRO schema for any ID (URIs, BNodes). */
    public static final Schema IDENTIFIER =
        Schema.createUnion(ImmutableList.<Schema>of(PLAIN_IDENTIFIER, COMPRESSED_IDENTIFIER));

    /** AVRO schema for calendar literals. */
    public static final Schema CALENDAR =
        Schema.createRecord("calendar", null, Schemas.NAMESPACE, false);

    /** AVRO schema for RDF statements. */
    public static final Schema STATEMENT =
        Schema.createRecord("statement", null, Schemas.NAMESPACE, false);

    /** AVRO schema for record nodes ({@code Record}). */
    public static final Schema RECORD =
        Schema.createRecord("struct", null, Schemas.NAMESPACE, false);

    /** AVRO schema for generic data model nodes. */
    public static final Schema NODE =
        Schema.createUnion(
            ImmutableList.<Schema>of(
                Schemas.BOOLEAN,
                Schemas.STRING,
                Schemas.STRING_LANG,
                Schemas.LONG,
                Schemas.INT,
                Schemas.SHORT,
                Schemas.BYTE,
                Schemas.DOUBLE,
                Schemas.FLOAT,
                Schemas.BIGINTEGER,
                Schemas.BIGDECIMAL,
                Schemas.PLAIN_IDENTIFIER,
                Schemas.COMPRESSED_IDENTIFIER,
                Schemas.CALENDAR,
                Schemas.STATEMENT,
                Schemas.RECORD));

    /** AVRO schema for lists of nodes. */
    public static final Schema LIST = Schema.createArray(Schemas.NODE);

    /** AVRO schema for properties of a record node. */
    public static final Schema PROPERTY =
        Schema.createRecord("property", null, Schemas.NAMESPACE, false);

    private Schemas() {}

    static {
      Schemas.STRING_LANG.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field("label", Schemas.STRING, null, null),
              new Schema.Field("language", Schemas.STRING, null, null)));
      Schemas.SHORT.setFields(
          ImmutableList.<Schema.Field>of(new Schema.Field("short", Schemas.INT, null, null)));
      Schemas.BYTE.setFields(
          ImmutableList.<Schema.Field>of(new Schema.Field("byte", Schemas.INT, null, null)));
      Schemas.BIGINTEGER.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field("biginteger", Schemas.STRING, null, null)));
      Schemas.BIGDECIMAL.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field("bigdecimal", Schemas.STRING, null, null)));
      Schemas.PLAIN_IDENTIFIER.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field("identifier", Schemas.STRING, null, null)));
      Schemas.COMPRESSED_IDENTIFIER.setFields(
          ImmutableList.<Schema.Field>of(new Schema.Field("identifier", Schemas.INT, null, null)));
      Schemas.CALENDAR.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field("timezone", Schemas.INT, null, null),
              new Schema.Field("timestamp", Schemas.LONG, null, null)));

      Schemas.STATEMENT.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field("subject", Schemas.IDENTIFIER, null, null),
              new Schema.Field("predicate", Schemas.IDENTIFIER, null, null),
              new Schema.Field(
                  "object",
                  Schema.createUnion(
                      ImmutableList.<Schema>of(
                          Schemas.BOOLEAN,
                          Schemas.STRING,
                          Schemas.STRING_LANG,
                          Schemas.LONG,
                          Schemas.INT,
                          Schemas.SHORT,
                          Schemas.BYTE,
                          Schemas.DOUBLE,
                          Schemas.FLOAT,
                          Schemas.BIGINTEGER,
                          Schemas.BIGDECIMAL,
                          Schemas.CALENDAR,
                          Schemas.PLAIN_IDENTIFIER,
                          Schemas.COMPRESSED_IDENTIFIER)),
                  null,
                  null), //
              new Schema.Field("context", Schemas.IDENTIFIER, null, null)));

      Schemas.PROPERTY.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field("propertyURI", Schemas.COMPRESSED_IDENTIFIER, null, null),
              new Schema.Field(
                  "propertyValue",
                  Schema.createUnion(
                      ImmutableList.<Schema>of(
                          Schemas.BOOLEAN,
                          Schemas.STRING,
                          Schemas.STRING_LANG,
                          Schemas.LONG,
                          Schemas.INT,
                          Schemas.SHORT,
                          Schemas.BYTE,
                          Schemas.DOUBLE,
                          Schemas.FLOAT,
                          Schemas.BIGINTEGER,
                          Schemas.BIGDECIMAL,
                          Schemas.CALENDAR,
                          Schemas.PLAIN_IDENTIFIER,
                          Schemas.COMPRESSED_IDENTIFIER,
                          Schemas.STATEMENT,
                          Schemas.RECORD,
                          Schemas.LIST)),
                  null,
                  null)));

      Schemas.RECORD.setFields(
          ImmutableList.<Schema.Field>of(
              new Schema.Field(
                  "id",
                  Schema.createUnion(
                      ImmutableList.<Schema>of(
                          Schemas.NULL, Schemas.PLAIN_IDENTIFIER, Schemas.COMPRESSED_IDENTIFIER)),
                  null,
                  null), //
              new Schema.Field("properties", Schema.createArray(Schemas.PROPERTY), null, null)));
    }
  }
  @Test
  public void testConvertBigQuerySchemaToAvroSchema() {
    TableSchema tableSchema = new TableSchema();
    tableSchema.setFields(fields);
    Schema avroSchema =
        BigQueryAvroUtils.toGenericAvroSchema("testSchema", tableSchema.getFields());

    assertThat(avroSchema.getField("number").schema(), equalTo(Schema.create(Type.LONG)));
    assertThat(
        avroSchema.getField("species").schema(),
        equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.STRING))));
    assertThat(
        avroSchema.getField("quality").schema(),
        equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.DOUBLE))));
    assertThat(
        avroSchema.getField("quantity").schema(),
        equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.LONG))));
    assertThat(
        avroSchema.getField("birthday").schema(),
        equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.LONG))));
    assertThat(
        avroSchema.getField("flighted").schema(),
        equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.BOOLEAN))));
    assertThat(
        avroSchema.getField("sound").schema(),
        equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.BYTES))));
    assertThat(
        avroSchema.getField("anniversaryDate").schema(),
        equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.STRING))));
    assertThat(
        avroSchema.getField("anniversaryDatetime").schema(),
        equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.STRING))));
    assertThat(
        avroSchema.getField("anniversaryTime").schema(),
        equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.STRING))));

    assertThat(
        avroSchema.getField("scion").schema(),
        equalTo(
            Schema.createUnion(
                Schema.create(Type.NULL),
                Schema.createRecord(
                    "scion",
                    "org.apache.beam.sdk.io.gcp.bigquery",
                    "Translated Avro Schema for scion",
                    false,
                    ImmutableList.of(
                        new Field(
                            "species",
                            Schema.createUnion(
                                Schema.create(Type.NULL), Schema.create(Type.STRING)),
                            null,
                            (Object) null))))));
    assertThat(
        avroSchema.getField("associates").schema(),
        equalTo(
            Schema.createArray(
                Schema.createRecord(
                    "associates",
                    "org.apache.beam.sdk.io.gcp.bigquery",
                    "Translated Avro Schema for associates",
                    false,
                    ImmutableList.of(
                        new Field(
                            "species",
                            Schema.createUnion(
                                Schema.create(Type.NULL), Schema.create(Type.STRING)),
                            null,
                            (Object) null))))));
  }
Exemple #7
0
 public static Schema generateAvroSchema(FieldType field) {
   return Schema.createUnion(Lists.newArrayList(Schema.create(NULL), getAvroSchema(field)));
 }