// Extract schema of the key field
  public String getKeySchema() throws IOException {
    Schema schema = AvroUtils.getAvroSchemaFromPath(getInputPath());

    String keySchema = schema.getField(keyField).schema().toString();

    return keySchema;
  }
  // Extract schema of the value field
  public String getValueSchema() throws IOException {
    Schema schema = AvroUtils.getAvroSchemaFromPath(getInputPath());

    String valueSchema = schema.getField(valueField).schema().toString();

    return valueSchema;
  }
  @Test
  public void testGenericAvroKVRecordKeyValueStore() throws Exception {
    // Only read the key and value fields (skip the 'blah' field).
    final Schema readerSchema = Schema.createRecord("record", null, null, false);
    readerSchema.setFields(
        Lists.newArrayList(
            new Schema.Field("key", Schema.create(Schema.Type.INT), null, null),
            new Schema.Field("value", Schema.create(Schema.Type.STRING), null, null)));

    // Open the store.
    final Path avroFilePath = writeGenericRecordAvroFile();
    final AvroKVRecordKeyValueStore<Integer, CharSequence> store =
        AvroKVRecordKeyValueStore.builder()
            .withConfiguration(getConf())
            .withInputPath(avroFilePath)
            .withReaderSchema(readerSchema)
            .build();
    final KeyValueStoreReader<Integer, CharSequence> reader = store.open();
    try {
      assertTrue(reader.containsKey(1));
      assertEquals("one", reader.get(1).toString());
      assertTrue(reader.containsKey(2));
      assertEquals("two", reader.get(2).toString()); // First field in wins.
    } finally {
      reader.close();
    }
  }
  // Get the schema for the Avro Record from the object container file
  public String getRecordSchema() throws IOException {
    Schema schema = AvroUtils.getAvroSchemaFromPath(getInputPath());

    String recSchema = schema.toString();

    return recSchema;
  }
Example #5
0
  /**
   * @param args
   *     <p>read *.avro file convert into java tree view. return tree
   */
  public Tree generateInputTree() {
    GenericDatumReader<GenericData> genericReader = new GenericDatumReader<GenericData>();
    //		DatumReader inputDatumReader = new SpecificDatumReader(UserInfo.class);
    DataFileReader<GenericData> dataFileReader;
    Tree root = new Tree(); // root tree for Tree data struture
    try {

      ///
      // home/lali/old/svn/trunck/eclipse/data-mapper/org.wso2.developerstudio.visualdatamapper.diagram/resource/inputs.avro
      String path =
          DataMapperCreationWizardPage
              .avroFilePath; // path for avro file selected in Create Datamapper Diagram swizard
      dataFileReader = new DataFileReader<GenericData>(new File(path), genericReader);

      Schema schm = dataFileReader.getSchema();
      multipleData = false;
      root.setName(schm.getName());

      List<Field> list = dataFileReader.getSchema().getFields();
      Iterator<Field> it = list.iterator();

      while (it.hasNext()) {
        Field field = it.next();
        fetchToTree(field, root);
      }

    } catch (IOException e) {

      e.printStackTrace();
    }

    return root;
  }
  @Override
  public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), getClass());
    conf.setJobName("UFO count");

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: avro UFO counter <in> <out>");
      System.exit(2);
    }

    FileInputFormat.addInputPath(conf, new Path(otherArgs[0]));
    Path outputPath = new Path(otherArgs[1]);
    FileOutputFormat.setOutputPath(conf, outputPath);
    outputPath.getFileSystem(conf).delete(outputPath);
    Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc"));
    AvroJob.setInputSchema(conf, input_schema);
    AvroJob.setMapOutputSchema(
        conf,
        Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG)));

    AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA);
    AvroJob.setMapperClass(conf, AvroRecordMapper.class);
    AvroJob.setReducerClass(conf, AvroRecordReducer.class);
    conf.setInputFormat(AvroInputFormat.class);
    JobClient.runJob(conf);

    return 0;
  }
  /**
   * Creates an Avro file of <docid, text> pairs to use for test input:
   *
   * <p>+-----+-----------------------+ | KEY | VALUE | +-----+-----------------------+ | 1 | "apple
   * banana carrot" | | 2 | "apple banana" | | 3 | "apple" | +-----+-----------------------+
   *
   * @return The avro file.
   */
  private File createInputFile() throws IOException {
    Schema keyValueSchema =
        AvroKeyValue.getSchema(Schema.create(Schema.Type.INT), Schema.create(Schema.Type.STRING));

    AvroKeyValue<Integer, CharSequence> record1 =
        new AvroKeyValue<Integer, CharSequence>(new GenericData.Record(keyValueSchema));
    record1.setKey(1);
    record1.setValue("apple banana carrot");

    AvroKeyValue<Integer, CharSequence> record2 =
        new AvroKeyValue<Integer, CharSequence>(new GenericData.Record(keyValueSchema));
    record2.setKey(2);
    record2.setValue("apple banana");

    AvroKeyValue<Integer, CharSequence> record3 =
        new AvroKeyValue<Integer, CharSequence>(new GenericData.Record(keyValueSchema));
    record3.setKey(3);
    record3.setValue("apple");

    return AvroFiles.createFile(
        new File(mTempDir.getRoot(), "inputKeyValues.avro"),
        keyValueSchema,
        record1.get(),
        record2.get(),
        record3.get());
  }
Example #8
0
 public static AvroKeySchema mergeSpecificStringTypes(
     Class<? extends SpecificRecord> specificClass, AvroKeySchema keySchema) {
   Schema schemaField;
   try {
     schemaField = (Schema) specificClass.getField("SCHEMA$").get(null);
   } catch (IllegalArgumentException e) {
     throw new DatasetException(e);
   } catch (SecurityException e) {
     throw new DatasetException(e);
   } catch (IllegalAccessException e) {
     throw new DatasetException(e);
   } catch (NoSuchFieldException e) {
     throw new DatasetException(e);
   }
   // Ensure schema is limited to keySchema's fields. The class may have more
   // fields
   // in the case that the entity is being used as a key.
   List<Field> fields = Lists.newArrayList();
   for (Schema.Field field : keySchema.getAvroSchema().getFields()) {
     fields.add(copy(schemaField.getField(field.name())));
   }
   Schema schema =
       Schema.createRecord(
           keySchema.getAvroSchema().getName(),
           keySchema.getAvroSchema().getDoc(),
           keySchema.getAvroSchema().getNamespace(),
           keySchema.getAvroSchema().isError());
   schema.setFields(fields);
   return new AvroKeySchema(schema, keySchema.getPartitionStrategy());
 }
Example #9
0
 private final Map.Entry<DataSchema, Schema> findUnionMemberSchema(
     Object value, UnionDataSchema unionDataSchema, Schema avroSchema) {
   int index = _genericData.resolveUnion(avroSchema, value);
   Schema memberAvroSchema = avroSchema.getTypes().get(index);
   String key;
   switch (memberAvroSchema.getType()) {
     case ENUM:
     case FIXED:
     case RECORD:
       key = memberAvroSchema.getFullName();
       break;
     default:
       key = memberAvroSchema.getType().toString().toLowerCase();
   }
   DataSchema memberDataSchema = unionDataSchema.getType(key);
   if (memberDataSchema == null) {
     for (DataSchema dataSchema : unionDataSchema.getTypes()) {
       AvroOverride avroOverride = getAvroOverride(dataSchema);
       if (avroOverride != null) {
         if (avroOverride.getAvroSchemaFullName().equals(key)) {
           memberDataSchema = dataSchema;
           break;
         }
       }
     }
   }
   if (memberDataSchema == null) {
     appendMessage("cannot find %1$s in union %2$s for value %3$s", key, unionDataSchema, value);
     return null;
   }
   return new AbstractMap.SimpleEntry<DataSchema, Schema>(memberDataSchema, memberAvroSchema);
 }
Example #10
0
  /**
   * Get a map of field names to default values for an Avro schema.
   *
   * @param avroRecordSchema The schema to get the map of field names to values.
   * @return The map.
   */
  public static Map<String, Object> getDefaultValueMap(Schema avroRecordSchema) {
    List<Field> defaultFields = new ArrayList<Field>();
    for (Field f : avroRecordSchema.getFields()) {
      if (f.defaultValue() != null) {
        // Need to create a new Field here or we will get
        // org.apache.avro.AvroRuntimeException: Field already used:
        // schemaVersion
        defaultFields.add(new Field(f.name(), f.schema(), f.doc(), f.defaultValue(), f.order()));
      }
    }

    Schema defaultSchema = Schema.createRecord(defaultFields);
    Schema emptyRecordSchema = Schema.createRecord(new ArrayList<Field>());
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(emptyRecordSchema);
    DatumReader<GenericRecord> reader =
        new GenericDatumReader<GenericRecord>(emptyRecordSchema, defaultSchema);

    GenericRecord emptyRecord = new GenericData.Record(emptyRecordSchema);
    GenericRecord defaultRecord =
        AvroUtils.readAvroEntity(AvroUtils.writeAvroEntity(emptyRecord, writer), reader);

    Map<String, Object> defaultValueMap = new HashMap<String, Object>();
    for (Field f : defaultFields) {
      defaultValueMap.put(f.name(), defaultRecord.get(f.name()));
    }
    return defaultValueMap;
  }
  @Override
  public void createDestination() throws Exception {
    FileInputStream schemaIn = new FileInputStream(avsc);
    Schema original = new Schema.Parser().parse(schemaIn);
    schemaIn.close();

    Schema evolved = getEvolvedSchema(original);

    FileOutputStream schemaOut = new FileOutputStream(evolvedAvsc);
    schemaOut.write(evolved.toString(true).getBytes());
    schemaOut.close();

    List<String> createArgs =
        Lists.newArrayList("create", dest, "-s", evolvedAvsc, "-r", repoUri, "-d", "target/data");
    createArgs.addAll(getExtraCreateArgs());

    TestUtil.run(
        LoggerFactory.getLogger(this.getClass()),
        "delete",
        dest,
        "-r",
        repoUri,
        "-d",
        "target/data");
    TestUtil.run(
        LoggerFactory.getLogger(this.getClass()),
        createArgs.toArray(new String[createArgs.size()]));
    this.console = mock(Logger.class);
    this.command = new CopyCommand(console);
    command.setConf(new Configuration());
  }
 /** Register a new schema with this repository. * */
 public AvroSchemaComposer add(Schema schema) {
   for (String alias : schema.getAliases()) {
     schemas.put(alias, schema);
   }
   schemas.put(schema.getFullName(), schema);
   mostRecent = schema;
   return this;
 }
Example #13
0
  public static Schema parseSchema(String schemaString) {

    String completeSchema = resolveSchema(schemaString);
    Schema schema = Schema.parse(completeSchema);
    String name = schema.getFullName();
    schemas.put(name, schema);
    return schema;
  }
Example #14
0
 /**
  * Wrap an avro schema as a nullable union if needed. For instance, wrap schema "int" as ["null",
  * "int"]
  */
 public static Schema wrapAsUnion(Schema schema, boolean nullable) {
   if (nullable) {
     /* if schema is an acceptable union, then return itself */
     if (schema.getType().equals(Schema.Type.UNION) && isAcceptableUnion(schema)) return schema;
     else return Schema.createUnion(Arrays.asList(NullSchema, schema));
   } else
     /*do not wrap it if not */
     return schema;
 }
Example #15
0
 @Test
 public void test_getOrcField_enum() throws Exception {
   final SchemaBuilder.FieldAssembler<Schema> builder =
       SchemaBuilder.record("testRecord").namespace("any.data").fields();
   builder.name("enumField").type().enumeration("enum").symbols("a", "b", "c").enumDefault("a");
   Schema testSchema = builder.endRecord();
   TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("enumField").schema());
   assertEquals(TypeInfoCreator.createString(), orcType);
 }
Example #16
0
 @Test
 public void test_getOrcField_array() throws Exception {
   final SchemaBuilder.FieldAssembler<Schema> builder =
       SchemaBuilder.record("testRecord").namespace("any.data").fields();
   builder.name("array").type().array().items().longType().noDefault();
   Schema testSchema = builder.endRecord();
   TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("array").schema());
   assertEquals(TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createLong()), orcType);
 }
Example #17
0
  public static Schema convertAvroSchema(Collection<SchemaField> fields) {
    List<Schema.Field> avroFields =
        fields.stream().map(AvroUtil::generateAvroField).collect(Collectors.toList());

    Schema schema = Schema.createRecord("collection", null, null, false);
    schema.setFields(avroFields);

    return schema;
  }
 private Schema namespacelessSchemaFor(Class<?> type) {
   return schemaCache.computeIfAbsent(
       type,
       clazz -> {
         Schema schema = ReflectData.get().getSchema(clazz);
         // kind of a hack to set an empty namespace :)
         return new Schema.Parser().parse(schema.toString().replace(schema.getNamespace(), ""));
       });
 }
Example #19
0
 private static int hashCode(HashData data, Schema schema) throws IOException {
   Decoder decoder = data.decoder;
   switch (schema.getType()) {
     case RECORD:
       {
         int hashCode = 1;
         for (Field field : schema.getFields()) {
           if (field.order() == Field.Order.IGNORE) {
             GenericDatumReader.skip(field.schema(), decoder);
             continue;
           }
           hashCode = hashCode * 31 + hashCode(data, field.schema());
         }
         return hashCode;
       }
     case ENUM:
     case INT:
       return decoder.readInt();
     case FLOAT:
       return Float.floatToIntBits(decoder.readFloat());
     case LONG:
       {
         long l = decoder.readLong();
         return (int) (l ^ (l >>> 32));
       }
     case DOUBLE:
       {
         long l = Double.doubleToLongBits(decoder.readDouble());
         return (int) (l ^ (l >>> 32));
       }
     case ARRAY:
       {
         Schema elementType = schema.getElementType();
         int hashCode = 1;
         for (long l = decoder.readArrayStart(); l != 0; l = decoder.arrayNext())
           for (long i = 0; i < l; i++) hashCode = hashCode * 31 + hashCode(data, elementType);
         return hashCode;
       }
     case MAP:
       throw new AvroRuntimeException("Can't hashCode maps!");
     case UNION:
       return hashCode(data, schema.getTypes().get(decoder.readInt()));
     case FIXED:
       return hashBytes(1, data, schema.getFixedSize(), false);
     case STRING:
       return hashBytes(0, data, decoder.readInt(), false);
     case BYTES:
       return hashBytes(1, data, decoder.readInt(), true);
     case BOOLEAN:
       return decoder.readBoolean() ? 1231 : 1237;
     case NULL:
       return 0;
     default:
       throw new AvroRuntimeException("Unexpected schema to hashCode!");
   }
 }
 private Value decodeValue(final Object generic) {
   if (generic instanceof GenericRecord) {
     final GenericRecord record = (GenericRecord) generic;
     final Schema schema = record.getSchema();
     if (schema.equals(Schemas.COMPRESSED_IDENTIFIER) || schema.equals(Schemas.PLAIN_IDENTIFIER)) {
       return decodeIdentifier(record);
     }
   }
   return decodeLiteral(generic);
 }
Example #21
0
  private static boolean isSingleValueField(Field field) {
    org.apache.avro.Schema fieldSchema = field.schema();
    fieldSchema = extractSchemaFromUnionIfNeeded(fieldSchema);

    final Type type = fieldSchema.getType();
    if (type == Type.ARRAY) {
      return false;
    }
    return true;
  }
 @Override
 public void setConf(org.apache.hadoop.conf.Configuration conf) {
   if (conf == null) return; // you first get a null configuration - ignore that
   String mos = conf.get(AvroJob.MAP_OUTPUT_SCHEMA);
   Schema schema = Schema.parse(mos);
   pair = new Pair<Object, Object>(schema);
   Schema keySchema = Pair.getKeySchema(schema);
   final List<Field> fields = keySchema.getFields();
   final GenericRecord key = new GenericData.Record(keySchema);
   projector = new Projector(key, fields);
 }
  /** Initializes test arguments before tests */
  @BeforeClass
  public static void setUp() {
    AvroRegistry registry = new AvroRegistry();
    Schema stringSchema = registry.getConverter(String.class).getSchema();
    Schema.Field jsonField = new Schema.Field("json", stringSchema, null, null, Order.ASCENDING);
    testSchema =
        Schema.createRecord("jira", null, null, false, Collections.singletonList(jsonField));
    testSchema.addProp(TALEND_IS_LOCKED, "true");

    testJson = "{\"startAt\":0,\"maxResults\":2,\"total\":1,\"issues\":[]}";
  }
Example #24
0
 @Test
 public void test_getOrcField_union() throws Exception {
   final SchemaBuilder.FieldAssembler<Schema> builder =
       SchemaBuilder.record("testRecord").namespace("any.data").fields();
   builder.name("union").type().unionOf().intType().and().booleanType().endUnion().noDefault();
   Schema testSchema = builder.endRecord();
   TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema());
   assertEquals(
       TypeInfoFactory.getUnionTypeInfo(
           Arrays.asList(TypeInfoCreator.createInt(), TypeInfoCreator.createBoolean())),
       orcType);
 }
 /** {@inheritDoc} */
 @Override
 public Array<CharSequence> convert(List<String> recommendationList) {
   List<CharSequence> recommendationArray = new ArrayList<CharSequence>();
   ;
   for (String s : recommendationList) {
     recommendationArray.add(s);
   }
   Array<CharSequence> recomendationArray =
       new Array<CharSequence>(
           Schema.createArray(Schema.create(Schema.Type.STRING)), recommendationArray);
   return recomendationArray;
 }
Example #26
0
 @Test
 public void test_getOrcField_map() throws Exception {
   final SchemaBuilder.FieldAssembler<Schema> builder =
       SchemaBuilder.record("testRecord").namespace("any.data").fields();
   builder.name("map").type().map().values().doubleType().noDefault();
   Schema testSchema = builder.endRecord();
   TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").schema());
   assertEquals(
       TypeInfoFactory.getMapTypeInfo(
           TypeInfoCreator.createString(), TypeInfoCreator.createDouble()),
       orcType);
 }
Example #27
0
  @Test
  public void test_getHiveTypeFromAvroType_primitive() throws Exception {
    // Expected ORC types
    String[] expectedTypes = {
      "INT", "BIGINT", "BOOLEAN", "FLOAT", "DOUBLE", "BINARY", "STRING",
    };

    Schema testSchema = buildPrimitiveAvroSchema();
    List<Schema.Field> fields = testSchema.getFields();
    for (int i = 0; i < fields.size(); i++) {
      assertEquals(expectedTypes[i], NiFiOrcUtils.getHiveTypeFromAvroType(fields.get(i).schema()));
    }
  }
Example #28
0
 static {
   AVRO_SCHEMA =
       SchemaBuilder.record(AVRO_RECORD_NAME)
           .namespace(AVRO_RECORD_NAMESPACE)
           .fields()
           .name(AVRO_TIMESTAMP_FIELD)
           .type(Schema.create(Schema.Type.LONG))
           .noDefault()
           .name(AVRO_CONTENT_FIELD)
           .type(Schema.create(Schema.Type.STRING))
           .noDefault()
           .endRecord();
 }
 @Override
 public Schema match(List<Schema> schemas, String definition) {
   Schema result = null;
   org.apache.avro.Schema source = new org.apache.avro.Schema.Parser().parse(definition);
   for (Schema s : schemas) {
     org.apache.avro.Schema target = new org.apache.avro.Schema.Parser().parse(s.getDefinition());
     if (target.equals(source)) {
       result = s;
       break;
     }
   }
   return result;
 }
Example #30
0
  /**
   * determine whether a union is a nullable union; note that this function doesn't check containing
   * types of the input union recursively.
   */
  public static boolean isAcceptableUnion(Schema in) {
    if (!in.getType().equals(Schema.Type.UNION)) return false;

    List<Schema> types = in.getTypes();
    if (types.size() <= 1) {
      return true;
    } else if (types.size() > 2) {
      return false; /*contains more than 2 types */
    } else {
      /* one of two types is NULL */
      return types.get(0).getType().equals(Schema.Type.NULL)
          || types.get(1).getType().equals(Schema.Type.NULL);
    }
  }