// Extract schema of the key field public String getKeySchema() throws IOException { Schema schema = AvroUtils.getAvroSchemaFromPath(getInputPath()); String keySchema = schema.getField(keyField).schema().toString(); return keySchema; }
// Extract schema of the value field public String getValueSchema() throws IOException { Schema schema = AvroUtils.getAvroSchemaFromPath(getInputPath()); String valueSchema = schema.getField(valueField).schema().toString(); return valueSchema; }
@Test public void testGenericAvroKVRecordKeyValueStore() throws Exception { // Only read the key and value fields (skip the 'blah' field). final Schema readerSchema = Schema.createRecord("record", null, null, false); readerSchema.setFields( Lists.newArrayList( new Schema.Field("key", Schema.create(Schema.Type.INT), null, null), new Schema.Field("value", Schema.create(Schema.Type.STRING), null, null))); // Open the store. final Path avroFilePath = writeGenericRecordAvroFile(); final AvroKVRecordKeyValueStore<Integer, CharSequence> store = AvroKVRecordKeyValueStore.builder() .withConfiguration(getConf()) .withInputPath(avroFilePath) .withReaderSchema(readerSchema) .build(); final KeyValueStoreReader<Integer, CharSequence> reader = store.open(); try { assertTrue(reader.containsKey(1)); assertEquals("one", reader.get(1).toString()); assertTrue(reader.containsKey(2)); assertEquals("two", reader.get(2).toString()); // First field in wins. } finally { reader.close(); } }
// Get the schema for the Avro Record from the object container file public String getRecordSchema() throws IOException { Schema schema = AvroUtils.getAvroSchemaFromPath(getInputPath()); String recSchema = schema.toString(); return recSchema; }
/** * @param args * <p>read *.avro file convert into java tree view. return tree */ public Tree generateInputTree() { GenericDatumReader<GenericData> genericReader = new GenericDatumReader<GenericData>(); // DatumReader inputDatumReader = new SpecificDatumReader(UserInfo.class); DataFileReader<GenericData> dataFileReader; Tree root = new Tree(); // root tree for Tree data struture try { /// // home/lali/old/svn/trunck/eclipse/data-mapper/org.wso2.developerstudio.visualdatamapper.diagram/resource/inputs.avro String path = DataMapperCreationWizardPage .avroFilePath; // path for avro file selected in Create Datamapper Diagram swizard dataFileReader = new DataFileReader<GenericData>(new File(path), genericReader); Schema schm = dataFileReader.getSchema(); multipleData = false; root.setName(schm.getName()); List<Field> list = dataFileReader.getSchema().getFields(); Iterator<Field> it = list.iterator(); while (it.hasNext()) { Field field = it.next(); fetchToTree(field, root); } } catch (IOException e) { e.printStackTrace(); } return root; }
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("UFO count"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: avro UFO counter <in> <out>"); System.exit(2); } FileInputFormat.addInputPath(conf, new Path(otherArgs[0])); Path outputPath = new Path(otherArgs[1]); FileOutputFormat.setOutputPath(conf, outputPath); outputPath.getFileSystem(conf).delete(outputPath); Schema input_schema = Schema.parse(getClass().getResourceAsStream("ufo.avsc")); AvroJob.setInputSchema(conf, input_schema); AvroJob.setMapOutputSchema( conf, Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.LONG))); AvroJob.setOutputSchema(conf, OUTPUT_SCHEMA); AvroJob.setMapperClass(conf, AvroRecordMapper.class); AvroJob.setReducerClass(conf, AvroRecordReducer.class); conf.setInputFormat(AvroInputFormat.class); JobClient.runJob(conf); return 0; }
/** * Creates an Avro file of <docid, text> pairs to use for test input: * * <p>+-----+-----------------------+ | KEY | VALUE | +-----+-----------------------+ | 1 | "apple * banana carrot" | | 2 | "apple banana" | | 3 | "apple" | +-----+-----------------------+ * * @return The avro file. */ private File createInputFile() throws IOException { Schema keyValueSchema = AvroKeyValue.getSchema(Schema.create(Schema.Type.INT), Schema.create(Schema.Type.STRING)); AvroKeyValue<Integer, CharSequence> record1 = new AvroKeyValue<Integer, CharSequence>(new GenericData.Record(keyValueSchema)); record1.setKey(1); record1.setValue("apple banana carrot"); AvroKeyValue<Integer, CharSequence> record2 = new AvroKeyValue<Integer, CharSequence>(new GenericData.Record(keyValueSchema)); record2.setKey(2); record2.setValue("apple banana"); AvroKeyValue<Integer, CharSequence> record3 = new AvroKeyValue<Integer, CharSequence>(new GenericData.Record(keyValueSchema)); record3.setKey(3); record3.setValue("apple"); return AvroFiles.createFile( new File(mTempDir.getRoot(), "inputKeyValues.avro"), keyValueSchema, record1.get(), record2.get(), record3.get()); }
public static AvroKeySchema mergeSpecificStringTypes( Class<? extends SpecificRecord> specificClass, AvroKeySchema keySchema) { Schema schemaField; try { schemaField = (Schema) specificClass.getField("SCHEMA$").get(null); } catch (IllegalArgumentException e) { throw new DatasetException(e); } catch (SecurityException e) { throw new DatasetException(e); } catch (IllegalAccessException e) { throw new DatasetException(e); } catch (NoSuchFieldException e) { throw new DatasetException(e); } // Ensure schema is limited to keySchema's fields. The class may have more // fields // in the case that the entity is being used as a key. List<Field> fields = Lists.newArrayList(); for (Schema.Field field : keySchema.getAvroSchema().getFields()) { fields.add(copy(schemaField.getField(field.name()))); } Schema schema = Schema.createRecord( keySchema.getAvroSchema().getName(), keySchema.getAvroSchema().getDoc(), keySchema.getAvroSchema().getNamespace(), keySchema.getAvroSchema().isError()); schema.setFields(fields); return new AvroKeySchema(schema, keySchema.getPartitionStrategy()); }
private final Map.Entry<DataSchema, Schema> findUnionMemberSchema( Object value, UnionDataSchema unionDataSchema, Schema avroSchema) { int index = _genericData.resolveUnion(avroSchema, value); Schema memberAvroSchema = avroSchema.getTypes().get(index); String key; switch (memberAvroSchema.getType()) { case ENUM: case FIXED: case RECORD: key = memberAvroSchema.getFullName(); break; default: key = memberAvroSchema.getType().toString().toLowerCase(); } DataSchema memberDataSchema = unionDataSchema.getType(key); if (memberDataSchema == null) { for (DataSchema dataSchema : unionDataSchema.getTypes()) { AvroOverride avroOverride = getAvroOverride(dataSchema); if (avroOverride != null) { if (avroOverride.getAvroSchemaFullName().equals(key)) { memberDataSchema = dataSchema; break; } } } } if (memberDataSchema == null) { appendMessage("cannot find %1$s in union %2$s for value %3$s", key, unionDataSchema, value); return null; } return new AbstractMap.SimpleEntry<DataSchema, Schema>(memberDataSchema, memberAvroSchema); }
/** * Get a map of field names to default values for an Avro schema. * * @param avroRecordSchema The schema to get the map of field names to values. * @return The map. */ public static Map<String, Object> getDefaultValueMap(Schema avroRecordSchema) { List<Field> defaultFields = new ArrayList<Field>(); for (Field f : avroRecordSchema.getFields()) { if (f.defaultValue() != null) { // Need to create a new Field here or we will get // org.apache.avro.AvroRuntimeException: Field already used: // schemaVersion defaultFields.add(new Field(f.name(), f.schema(), f.doc(), f.defaultValue(), f.order())); } } Schema defaultSchema = Schema.createRecord(defaultFields); Schema emptyRecordSchema = Schema.createRecord(new ArrayList<Field>()); DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(emptyRecordSchema); DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(emptyRecordSchema, defaultSchema); GenericRecord emptyRecord = new GenericData.Record(emptyRecordSchema); GenericRecord defaultRecord = AvroUtils.readAvroEntity(AvroUtils.writeAvroEntity(emptyRecord, writer), reader); Map<String, Object> defaultValueMap = new HashMap<String, Object>(); for (Field f : defaultFields) { defaultValueMap.put(f.name(), defaultRecord.get(f.name())); } return defaultValueMap; }
@Override public void createDestination() throws Exception { FileInputStream schemaIn = new FileInputStream(avsc); Schema original = new Schema.Parser().parse(schemaIn); schemaIn.close(); Schema evolved = getEvolvedSchema(original); FileOutputStream schemaOut = new FileOutputStream(evolvedAvsc); schemaOut.write(evolved.toString(true).getBytes()); schemaOut.close(); List<String> createArgs = Lists.newArrayList("create", dest, "-s", evolvedAvsc, "-r", repoUri, "-d", "target/data"); createArgs.addAll(getExtraCreateArgs()); TestUtil.run( LoggerFactory.getLogger(this.getClass()), "delete", dest, "-r", repoUri, "-d", "target/data"); TestUtil.run( LoggerFactory.getLogger(this.getClass()), createArgs.toArray(new String[createArgs.size()])); this.console = mock(Logger.class); this.command = new CopyCommand(console); command.setConf(new Configuration()); }
/** Register a new schema with this repository. * */ public AvroSchemaComposer add(Schema schema) { for (String alias : schema.getAliases()) { schemas.put(alias, schema); } schemas.put(schema.getFullName(), schema); mostRecent = schema; return this; }
public static Schema parseSchema(String schemaString) { String completeSchema = resolveSchema(schemaString); Schema schema = Schema.parse(completeSchema); String name = schema.getFullName(); schemas.put(name, schema); return schema; }
/** * Wrap an avro schema as a nullable union if needed. For instance, wrap schema "int" as ["null", * "int"] */ public static Schema wrapAsUnion(Schema schema, boolean nullable) { if (nullable) { /* if schema is an acceptable union, then return itself */ if (schema.getType().equals(Schema.Type.UNION) && isAcceptableUnion(schema)) return schema; else return Schema.createUnion(Arrays.asList(NullSchema, schema)); } else /*do not wrap it if not */ return schema; }
@Test public void test_getOrcField_enum() throws Exception { final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); builder.name("enumField").type().enumeration("enum").symbols("a", "b", "c").enumDefault("a"); Schema testSchema = builder.endRecord(); TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("enumField").schema()); assertEquals(TypeInfoCreator.createString(), orcType); }
@Test public void test_getOrcField_array() throws Exception { final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); builder.name("array").type().array().items().longType().noDefault(); Schema testSchema = builder.endRecord(); TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("array").schema()); assertEquals(TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createLong()), orcType); }
public static Schema convertAvroSchema(Collection<SchemaField> fields) { List<Schema.Field> avroFields = fields.stream().map(AvroUtil::generateAvroField).collect(Collectors.toList()); Schema schema = Schema.createRecord("collection", null, null, false); schema.setFields(avroFields); return schema; }
private Schema namespacelessSchemaFor(Class<?> type) { return schemaCache.computeIfAbsent( type, clazz -> { Schema schema = ReflectData.get().getSchema(clazz); // kind of a hack to set an empty namespace :) return new Schema.Parser().parse(schema.toString().replace(schema.getNamespace(), "")); }); }
private static int hashCode(HashData data, Schema schema) throws IOException { Decoder decoder = data.decoder; switch (schema.getType()) { case RECORD: { int hashCode = 1; for (Field field : schema.getFields()) { if (field.order() == Field.Order.IGNORE) { GenericDatumReader.skip(field.schema(), decoder); continue; } hashCode = hashCode * 31 + hashCode(data, field.schema()); } return hashCode; } case ENUM: case INT: return decoder.readInt(); case FLOAT: return Float.floatToIntBits(decoder.readFloat()); case LONG: { long l = decoder.readLong(); return (int) (l ^ (l >>> 32)); } case DOUBLE: { long l = Double.doubleToLongBits(decoder.readDouble()); return (int) (l ^ (l >>> 32)); } case ARRAY: { Schema elementType = schema.getElementType(); int hashCode = 1; for (long l = decoder.readArrayStart(); l != 0; l = decoder.arrayNext()) for (long i = 0; i < l; i++) hashCode = hashCode * 31 + hashCode(data, elementType); return hashCode; } case MAP: throw new AvroRuntimeException("Can't hashCode maps!"); case UNION: return hashCode(data, schema.getTypes().get(decoder.readInt())); case FIXED: return hashBytes(1, data, schema.getFixedSize(), false); case STRING: return hashBytes(0, data, decoder.readInt(), false); case BYTES: return hashBytes(1, data, decoder.readInt(), true); case BOOLEAN: return decoder.readBoolean() ? 1231 : 1237; case NULL: return 0; default: throw new AvroRuntimeException("Unexpected schema to hashCode!"); } }
private Value decodeValue(final Object generic) { if (generic instanceof GenericRecord) { final GenericRecord record = (GenericRecord) generic; final Schema schema = record.getSchema(); if (schema.equals(Schemas.COMPRESSED_IDENTIFIER) || schema.equals(Schemas.PLAIN_IDENTIFIER)) { return decodeIdentifier(record); } } return decodeLiteral(generic); }
private static boolean isSingleValueField(Field field) { org.apache.avro.Schema fieldSchema = field.schema(); fieldSchema = extractSchemaFromUnionIfNeeded(fieldSchema); final Type type = fieldSchema.getType(); if (type == Type.ARRAY) { return false; } return true; }
@Override public void setConf(org.apache.hadoop.conf.Configuration conf) { if (conf == null) return; // you first get a null configuration - ignore that String mos = conf.get(AvroJob.MAP_OUTPUT_SCHEMA); Schema schema = Schema.parse(mos); pair = new Pair<Object, Object>(schema); Schema keySchema = Pair.getKeySchema(schema); final List<Field> fields = keySchema.getFields(); final GenericRecord key = new GenericData.Record(keySchema); projector = new Projector(key, fields); }
/** Initializes test arguments before tests */ @BeforeClass public static void setUp() { AvroRegistry registry = new AvroRegistry(); Schema stringSchema = registry.getConverter(String.class).getSchema(); Schema.Field jsonField = new Schema.Field("json", stringSchema, null, null, Order.ASCENDING); testSchema = Schema.createRecord("jira", null, null, false, Collections.singletonList(jsonField)); testSchema.addProp(TALEND_IS_LOCKED, "true"); testJson = "{\"startAt\":0,\"maxResults\":2,\"total\":1,\"issues\":[]}"; }
@Test public void test_getOrcField_union() throws Exception { final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); builder.name("union").type().unionOf().intType().and().booleanType().endUnion().noDefault(); Schema testSchema = builder.endRecord(); TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema()); assertEquals( TypeInfoFactory.getUnionTypeInfo( Arrays.asList(TypeInfoCreator.createInt(), TypeInfoCreator.createBoolean())), orcType); }
/** {@inheritDoc} */ @Override public Array<CharSequence> convert(List<String> recommendationList) { List<CharSequence> recommendationArray = new ArrayList<CharSequence>(); ; for (String s : recommendationList) { recommendationArray.add(s); } Array<CharSequence> recomendationArray = new Array<CharSequence>( Schema.createArray(Schema.create(Schema.Type.STRING)), recommendationArray); return recomendationArray; }
@Test public void test_getOrcField_map() throws Exception { final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields(); builder.name("map").type().map().values().doubleType().noDefault(); Schema testSchema = builder.endRecord(); TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").schema()); assertEquals( TypeInfoFactory.getMapTypeInfo( TypeInfoCreator.createString(), TypeInfoCreator.createDouble()), orcType); }
@Test public void test_getHiveTypeFromAvroType_primitive() throws Exception { // Expected ORC types String[] expectedTypes = { "INT", "BIGINT", "BOOLEAN", "FLOAT", "DOUBLE", "BINARY", "STRING", }; Schema testSchema = buildPrimitiveAvroSchema(); List<Schema.Field> fields = testSchema.getFields(); for (int i = 0; i < fields.size(); i++) { assertEquals(expectedTypes[i], NiFiOrcUtils.getHiveTypeFromAvroType(fields.get(i).schema())); } }
static { AVRO_SCHEMA = SchemaBuilder.record(AVRO_RECORD_NAME) .namespace(AVRO_RECORD_NAMESPACE) .fields() .name(AVRO_TIMESTAMP_FIELD) .type(Schema.create(Schema.Type.LONG)) .noDefault() .name(AVRO_CONTENT_FIELD) .type(Schema.create(Schema.Type.STRING)) .noDefault() .endRecord(); }
@Override public Schema match(List<Schema> schemas, String definition) { Schema result = null; org.apache.avro.Schema source = new org.apache.avro.Schema.Parser().parse(definition); for (Schema s : schemas) { org.apache.avro.Schema target = new org.apache.avro.Schema.Parser().parse(s.getDefinition()); if (target.equals(source)) { result = s; break; } } return result; }
/** * determine whether a union is a nullable union; note that this function doesn't check containing * types of the input union recursively. */ public static boolean isAcceptableUnion(Schema in) { if (!in.getType().equals(Schema.Type.UNION)) return false; List<Schema> types = in.getTypes(); if (types.size() <= 1) { return true; } else if (types.size() > 2) { return false; /*contains more than 2 types */ } else { /* one of two types is NULL */ return types.get(0).getType().equals(Schema.Type.NULL) || types.get(1).getType().equals(Schema.Type.NULL); } }