@Test public void testCreateAndGetNestedTable1() throws Exception { // schema creation // three level nested schema // // s1 // |- s2 // |- s3 // |- s4 // |- s7 // |- s5 // |- s6 // |- s8 // |- s9 SchemaBuilder nestedSchema = SchemaBuilder.builder(); nestedSchema.add("s1", Type.INT8); nestedSchema.add("s2", Type.INT8); Schema s5 = SchemaBuilder.builder().add("s6", Type.INT8).build(); Schema s7 = SchemaBuilder.builder().add("s5", new TypeDesc(s5)).build(); Schema s3 = SchemaBuilder.builder() .add("s4", Type.INT8) .add("s7", new TypeDesc(s7)) .add("s8", Type.INT8) .build(); nestedSchema.add("s3", new TypeDesc(s3)); nestedSchema.add("s9", Type.INT8); assertSchemaEquality("nested_schema1", nestedSchema.build()); }
/** * Load a record batch from a single buffer. * * @param def The definition for the record batch. * @param buf The buffer that holds the data associated with the record batch * @return Whether or not the schema changed since the previous load. * @throws SchemaChangeException */ public boolean load(RecordBatchDef def, ByteBuf buf) throws SchemaChangeException { // logger.debug("Loading record batch with def {} and data {}", def, buf); this.valueCount = def.getRecordCount(); boolean schemaChanged = schema == null; Map<MaterializedField, ValueVector> oldFields = Maps.newHashMap(); for (VectorWrapper<?> w : container) { ValueVector v = w.getValueVector(); oldFields.put(v.getField(), v); } VectorContainer newVectors = new VectorContainer(); List<FieldMetadata> fields = def.getFieldList(); int bufOffset = 0; for (FieldMetadata fmd : fields) { FieldDef fieldDef = fmd.getDef(); ValueVector v = oldFields.remove(fieldDef); if (v != null) { container.add(v); continue; } // if we arrive here, we didn't have a matching vector. schemaChanged = true; MaterializedField m = new MaterializedField(fieldDef); v = TypeHelper.getNewVector(m, allocator); if (fmd.getValueCount() == 0) { v.clear(); } else { v.load(fmd, buf.slice(bufOffset, fmd.getBufferLength())); } bufOffset += fmd.getBufferLength(); newVectors.add(v); } if (!oldFields.isEmpty()) { schemaChanged = true; for (ValueVector v : oldFields.values()) { v.close(); } } // rebuild the schema. SchemaBuilder b = BatchSchema.newBuilder(); for (VectorWrapper<?> v : newVectors) { b.addField(v.getField()); } b.setSelectionVectorMode(BatchSchema.SelectionVectorMode.NONE); this.schema = b.build(); container = newVectors; return schemaChanged; }
/** * Returns the merger of schemas. The merged schema will include the union all columns. If there * is a type conflict between columns with the same schemapath but different types, the merged * schema will contain a Union type. * * @param schemas * @return */ public static BatchSchema mergeSchemas(BatchSchema... schemas) { Map<SchemaPath, Set<MinorType>> typeSetMap = Maps.newLinkedHashMap(); for (BatchSchema s : schemas) { for (MaterializedField field : s) { SchemaPath path = field.getPath(); Set<MinorType> currentTypes = typeSetMap.get(path); if (currentTypes == null) { currentTypes = Sets.newHashSet(); typeSetMap.put(path, currentTypes); } MinorType newType = field.getType().getMinorType(); if (newType == MinorType.MAP || newType == MinorType.LIST) { throw new RuntimeException( "Schema change not currently supported for schemas with complex types"); } if (newType == MinorType.UNION) { for (MinorType subType : field.getType().getSubTypeList()) { currentTypes.add(subType); } } else { currentTypes.add(newType); } } } List<MaterializedField> fields = Lists.newArrayList(); for (SchemaPath path : typeSetMap.keySet()) { Set<MinorType> types = typeSetMap.get(path); if (types.size() > 1) { MajorType.Builder builder = MajorType.newBuilder().setMinorType(MinorType.UNION).setMode(DataMode.OPTIONAL); for (MinorType t : types) { builder.addSubType(t); } MaterializedField field = MaterializedField.create(path, builder.build()); fields.add(field); } else { MaterializedField field = MaterializedField.create(path, Types.optional(types.iterator().next())); fields.add(field); } } SchemaBuilder schemaBuilder = new SchemaBuilder(); BatchSchema s = schemaBuilder .addFields(fields) .setSelectionVectorMode(schemas[0].getSelectionVectorMode()) .build(); return s; }
@Test public final void testAddAndDeleteTablePartitionByRange() throws Exception { Schema schema = SchemaBuilder.builder() .add("id", Type.INT4) .add("name", Type.TEXT) .add("age", Type.INT4) .add("score", Type.FLOAT8) .build(); String tableName = IdentifierUtil.buildFQName(TajoConstants.DEFAULT_DATABASE_NAME, "addedtable"); KeyValueSet opts = new KeyValueSet(); opts.set("file.delimiter", ","); TableMeta meta = CatalogUtil.newTableMeta("TEXT", opts); Schema partSchema = SchemaBuilder.builder().add("id", Type.INT4).build(); PartitionMethodDesc partitionDesc = new PartitionMethodDesc( DEFAULT_DATABASE_NAME, tableName, CatalogProtos.PartitionType.RANGE, "id", partSchema); TableDesc desc = new TableDesc( tableName, schema, meta, new Path(CommonTestingUtil.getTestDir(), "addedtable").toUri()); desc.setPartitionMethod(partitionDesc); assertFalse(catalog.existsTable(tableName)); catalog.createTable(desc); assertTrue(catalog.existsTable(tableName)); TableDesc retrieved = catalog.getTableDesc(tableName); assertEquals(retrieved.getName(), tableName); assertEquals( retrieved.getPartitionMethod().getPartitionType(), CatalogProtos.PartitionType.RANGE); assertEquals( retrieved.getPartitionMethod().getExpressionSchema().getColumn(0).getSimpleName(), "id"); catalog.dropTable(tableName); assertFalse(catalog.existsTable(tableName)); }
private TableDesc createMockupTable(String databaseName, String tableName) throws IOException { schema1 = SchemaBuilder.builder() .add(FieldName1, Type.BLOB) .add(FieldName2, Type.INT4) .add(FieldName3, Type.INT8) .build(); Path path = new Path(CommonTestingUtil.getTestDir(), tableName); TableDesc table = new TableDesc( IdentifierUtil.buildFQName(databaseName, tableName), schema1, new TableMeta("TEXT", new KeyValueSet()), path.toUri(), true); return table; }
public static TableDesc prepareTable() throws IOException { relationSchema = SchemaBuilder.builder() .add(DEFAULT_DATABASE_NAME + ".indexed.id", Type.INT4) .add(DEFAULT_DATABASE_NAME + ".indexed.name", Type.TEXT) .add(DEFAULT_DATABASE_NAME + ".indexed.age", Type.INT4) .add(DEFAULT_DATABASE_NAME + ".indexed.score", Type.FLOAT8) .build(); String tableName = "indexed"; TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.TEXT, server.getConf()); return new TableDesc( IdentifierUtil.buildFQName(TajoConstants.DEFAULT_DATABASE_NAME, tableName), relationSchema, meta, new Path(CommonTestingUtil.getTestDir(), "indexed").toUri()); }
@Test public void testGetTable() throws Exception { schema1 = SchemaBuilder.builder() .add(FieldName1, Type.BLOB) .add(FieldName2, Type.INT4) .add(FieldName3, Type.INT8) .build(); Path path = new Path(CommonTestingUtil.getTestDir(), "table1"); TableDesc meta = new TableDesc( IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, "getTable"), schema1, "TEXT", new KeyValueSet(), path.toUri()); assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable")); catalog.createTable(meta); assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable")); catalog.dropTable(IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, "getTable")); assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable")); }
// TODO: This should be added at TAJO-1891 public final void testAddAndDeleteTablePartitionByColumn() throws Exception { Schema schema = SchemaBuilder.builder() .add("id", Type.INT4) .add("name", Type.TEXT) .add("age", Type.INT4) .add("score", Type.FLOAT8) .build(); String simpleTableName = "addedtable"; String tableName = IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, simpleTableName); KeyValueSet opts = new KeyValueSet(); opts.set("file.delimiter", ","); TableMeta meta = CatalogUtil.newTableMeta("TEXT", opts); Schema partSchema = SchemaBuilder.builder().add("id", Type.INT4).add("name", Type.TEXT).build(); PartitionMethodDesc partitionMethodDesc = new PartitionMethodDesc( DEFAULT_DATABASE_NAME, tableName, CatalogProtos.PartitionType.COLUMN, "id,name", partSchema); TableDesc desc = new TableDesc( tableName, schema, meta, new Path(CommonTestingUtil.getTestDir(), simpleTableName).toUri()); desc.setPartitionMethod(partitionMethodDesc); assertFalse(catalog.existsTable(tableName)); catalog.createTable(desc); assertTrue(catalog.existsTable(tableName)); TableDesc retrieved = catalog.getTableDesc(tableName); assertEquals(retrieved.getName(), tableName); assertEquals( retrieved.getPartitionMethod().getPartitionType(), CatalogProtos.PartitionType.COLUMN); assertEquals( retrieved.getPartitionMethod().getExpressionSchema().getColumn(0).getSimpleName(), "id"); testAddPartition(tableName, "id=10/name=aaa"); testAddPartition(tableName, "id=20/name=bbb"); List<CatalogProtos.PartitionDescProto> partitions = catalog.getPartitionsOfTable(DEFAULT_DATABASE_NAME, simpleTableName); assertNotNull(partitions); assertEquals(partitions.size(), 2); assertEquals(partitions.get(0).getNumBytes(), 0L); testGetPartitionsByAlgebra(DEFAULT_DATABASE_NAME, simpleTableName); testDropPartition(tableName, "id=10/name=aaa"); testDropPartition(tableName, "id=20/name=bbb"); partitions = catalog.getPartitionsOfTable(DEFAULT_DATABASE_NAME, simpleTableName); assertNotNull(partitions); assertEquals(partitions.size(), 0); catalog.dropTable(tableName); assertFalse(catalog.existsTable(tableName)); }