/** * Returns the merger of schemas. The merged schema will include the union all columns. If there * is a type conflict between columns with the same schemapath but different types, the merged * schema will contain a Union type. * * @param schemas * @return */ public static BatchSchema mergeSchemas(BatchSchema... schemas) { Map<SchemaPath, Set<MinorType>> typeSetMap = Maps.newLinkedHashMap(); for (BatchSchema s : schemas) { for (MaterializedField field : s) { SchemaPath path = field.getPath(); Set<MinorType> currentTypes = typeSetMap.get(path); if (currentTypes == null) { currentTypes = Sets.newHashSet(); typeSetMap.put(path, currentTypes); } MinorType newType = field.getType().getMinorType(); if (newType == MinorType.MAP || newType == MinorType.LIST) { throw new RuntimeException( "Schema change not currently supported for schemas with complex types"); } if (newType == MinorType.UNION) { for (MinorType subType : field.getType().getSubTypeList()) { currentTypes.add(subType); } } else { currentTypes.add(newType); } } } List<MaterializedField> fields = Lists.newArrayList(); for (SchemaPath path : typeSetMap.keySet()) { Set<MinorType> types = typeSetMap.get(path); if (types.size() > 1) { MajorType.Builder builder = MajorType.newBuilder().setMinorType(MinorType.UNION).setMode(DataMode.OPTIONAL); for (MinorType t : types) { builder.addSubType(t); } MaterializedField field = MaterializedField.create(path, builder.build()); fields.add(field); } else { MaterializedField field = MaterializedField.create(path, Types.optional(types.iterator().next())); fields.add(field); } } SchemaBuilder schemaBuilder = new SchemaBuilder(); BatchSchema s = schemaBuilder .addFields(fields) .setSelectionVectorMode(schemas[0].getSelectionVectorMode()) .build(); return s; }