Exemplo n.º 1
0
  @Test
  public void testCreateAndGetNestedTable1() throws Exception {
    // schema creation
    // three level nested schema
    //
    // s1
    //  |- s2
    //  |- s3
    //      |- s4
    //      |- s7
    //          |- s5
    //              |- s6
    //      |- s8
    //  |- s9

    SchemaBuilder nestedSchema = SchemaBuilder.builder();
    nestedSchema.add("s1", Type.INT8);
    nestedSchema.add("s2", Type.INT8);

    Schema s5 = SchemaBuilder.builder().add("s6", Type.INT8).build();
    Schema s7 = SchemaBuilder.builder().add("s5", new TypeDesc(s5)).build();

    Schema s3 =
        SchemaBuilder.builder()
            .add("s4", Type.INT8)
            .add("s7", new TypeDesc(s7))
            .add("s8", Type.INT8)
            .build();

    nestedSchema.add("s3", new TypeDesc(s3));
    nestedSchema.add("s9", Type.INT8);

    assertSchemaEquality("nested_schema1", nestedSchema.build());
  }
  /**
   * Load a record batch from a single buffer.
   *
   * @param def The definition for the record batch.
   * @param buf The buffer that holds the data associated with the record batch
   * @return Whether or not the schema changed since the previous load.
   * @throws SchemaChangeException
   */
  public boolean load(RecordBatchDef def, ByteBuf buf) throws SchemaChangeException {
    //    logger.debug("Loading record batch with def {} and data {}", def, buf);
    this.valueCount = def.getRecordCount();
    boolean schemaChanged = schema == null;

    Map<MaterializedField, ValueVector> oldFields = Maps.newHashMap();
    for (VectorWrapper<?> w : container) {
      ValueVector v = w.getValueVector();
      oldFields.put(v.getField(), v);
    }

    VectorContainer newVectors = new VectorContainer();

    List<FieldMetadata> fields = def.getFieldList();

    int bufOffset = 0;
    for (FieldMetadata fmd : fields) {
      FieldDef fieldDef = fmd.getDef();
      ValueVector v = oldFields.remove(fieldDef);
      if (v != null) {
        container.add(v);
        continue;
      }

      // if we arrive here, we didn't have a matching vector.
      schemaChanged = true;
      MaterializedField m = new MaterializedField(fieldDef);
      v = TypeHelper.getNewVector(m, allocator);
      if (fmd.getValueCount() == 0) {
        v.clear();
      } else {
        v.load(fmd, buf.slice(bufOffset, fmd.getBufferLength()));
      }
      bufOffset += fmd.getBufferLength();
      newVectors.add(v);
    }

    if (!oldFields.isEmpty()) {
      schemaChanged = true;
      for (ValueVector v : oldFields.values()) {
        v.close();
      }
    }

    // rebuild the schema.
    SchemaBuilder b = BatchSchema.newBuilder();
    for (VectorWrapper<?> v : newVectors) {
      b.addField(v.getField());
    }
    b.setSelectionVectorMode(BatchSchema.SelectionVectorMode.NONE);
    this.schema = b.build();
    container = newVectors;
    return schemaChanged;
  }
Exemplo n.º 3
0
  /**
   * Returns the merger of schemas. The merged schema will include the union all columns. If there
   * is a type conflict between columns with the same schemapath but different types, the merged
   * schema will contain a Union type.
   *
   * @param schemas
   * @return
   */
  public static BatchSchema mergeSchemas(BatchSchema... schemas) {
    Map<SchemaPath, Set<MinorType>> typeSetMap = Maps.newLinkedHashMap();

    for (BatchSchema s : schemas) {
      for (MaterializedField field : s) {
        SchemaPath path = field.getPath();
        Set<MinorType> currentTypes = typeSetMap.get(path);
        if (currentTypes == null) {
          currentTypes = Sets.newHashSet();
          typeSetMap.put(path, currentTypes);
        }
        MinorType newType = field.getType().getMinorType();
        if (newType == MinorType.MAP || newType == MinorType.LIST) {
          throw new RuntimeException(
              "Schema change not currently supported for schemas with complex types");
        }
        if (newType == MinorType.UNION) {
          for (MinorType subType : field.getType().getSubTypeList()) {
            currentTypes.add(subType);
          }
        } else {
          currentTypes.add(newType);
        }
      }
    }

    List<MaterializedField> fields = Lists.newArrayList();

    for (SchemaPath path : typeSetMap.keySet()) {
      Set<MinorType> types = typeSetMap.get(path);
      if (types.size() > 1) {
        MajorType.Builder builder =
            MajorType.newBuilder().setMinorType(MinorType.UNION).setMode(DataMode.OPTIONAL);
        for (MinorType t : types) {
          builder.addSubType(t);
        }
        MaterializedField field = MaterializedField.create(path, builder.build());
        fields.add(field);
      } else {
        MaterializedField field =
            MaterializedField.create(path, Types.optional(types.iterator().next()));
        fields.add(field);
      }
    }

    SchemaBuilder schemaBuilder = new SchemaBuilder();
    BatchSchema s =
        schemaBuilder
            .addFields(fields)
            .setSelectionVectorMode(schemas[0].getSelectionVectorMode())
            .build();
    return s;
  }
Exemplo n.º 4
0
  @Test
  public final void testAddAndDeleteTablePartitionByRange() throws Exception {
    Schema schema =
        SchemaBuilder.builder()
            .add("id", Type.INT4)
            .add("name", Type.TEXT)
            .add("age", Type.INT4)
            .add("score", Type.FLOAT8)
            .build();

    String tableName =
        IdentifierUtil.buildFQName(TajoConstants.DEFAULT_DATABASE_NAME, "addedtable");
    KeyValueSet opts = new KeyValueSet();
    opts.set("file.delimiter", ",");
    TableMeta meta = CatalogUtil.newTableMeta("TEXT", opts);

    Schema partSchema = SchemaBuilder.builder().add("id", Type.INT4).build();
    PartitionMethodDesc partitionDesc =
        new PartitionMethodDesc(
            DEFAULT_DATABASE_NAME, tableName, CatalogProtos.PartitionType.RANGE, "id", partSchema);

    TableDesc desc =
        new TableDesc(
            tableName,
            schema,
            meta,
            new Path(CommonTestingUtil.getTestDir(), "addedtable").toUri());
    desc.setPartitionMethod(partitionDesc);
    assertFalse(catalog.existsTable(tableName));
    catalog.createTable(desc);
    assertTrue(catalog.existsTable(tableName));

    TableDesc retrieved = catalog.getTableDesc(tableName);

    assertEquals(retrieved.getName(), tableName);
    assertEquals(
        retrieved.getPartitionMethod().getPartitionType(), CatalogProtos.PartitionType.RANGE);
    assertEquals(
        retrieved.getPartitionMethod().getExpressionSchema().getColumn(0).getSimpleName(), "id");

    catalog.dropTable(tableName);
    assertFalse(catalog.existsTable(tableName));
  }
Exemplo n.º 5
0
 private TableDesc createMockupTable(String databaseName, String tableName) throws IOException {
   schema1 =
       SchemaBuilder.builder()
           .add(FieldName1, Type.BLOB)
           .add(FieldName2, Type.INT4)
           .add(FieldName3, Type.INT8)
           .build();
   Path path = new Path(CommonTestingUtil.getTestDir(), tableName);
   TableDesc table =
       new TableDesc(
           IdentifierUtil.buildFQName(databaseName, tableName),
           schema1,
           new TableMeta("TEXT", new KeyValueSet()),
           path.toUri(),
           true);
   return table;
 }
Exemplo n.º 6
0
  public static TableDesc prepareTable() throws IOException {
    relationSchema =
        SchemaBuilder.builder()
            .add(DEFAULT_DATABASE_NAME + ".indexed.id", Type.INT4)
            .add(DEFAULT_DATABASE_NAME + ".indexed.name", Type.TEXT)
            .add(DEFAULT_DATABASE_NAME + ".indexed.age", Type.INT4)
            .add(DEFAULT_DATABASE_NAME + ".indexed.score", Type.FLOAT8)
            .build();

    String tableName = "indexed";

    TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.TEXT, server.getConf());
    return new TableDesc(
        IdentifierUtil.buildFQName(TajoConstants.DEFAULT_DATABASE_NAME, tableName),
        relationSchema,
        meta,
        new Path(CommonTestingUtil.getTestDir(), "indexed").toUri());
  }
Exemplo n.º 7
0
  @Test
  public void testGetTable() throws Exception {
    schema1 =
        SchemaBuilder.builder()
            .add(FieldName1, Type.BLOB)
            .add(FieldName2, Type.INT4)
            .add(FieldName3, Type.INT8)
            .build();
    Path path = new Path(CommonTestingUtil.getTestDir(), "table1");
    TableDesc meta =
        new TableDesc(
            IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, "getTable"),
            schema1,
            "TEXT",
            new KeyValueSet(),
            path.toUri());

    assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable"));
    catalog.createTable(meta);
    assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable"));

    catalog.dropTable(IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, "getTable"));
    assertFalse(catalog.existsTable(DEFAULT_DATABASE_NAME, "getTable"));
  }
Exemplo n.º 8
0
  // TODO: This should be added at TAJO-1891
  public final void testAddAndDeleteTablePartitionByColumn() throws Exception {
    Schema schema =
        SchemaBuilder.builder()
            .add("id", Type.INT4)
            .add("name", Type.TEXT)
            .add("age", Type.INT4)
            .add("score", Type.FLOAT8)
            .build();

    String simpleTableName = "addedtable";
    String tableName = IdentifierUtil.buildFQName(DEFAULT_DATABASE_NAME, simpleTableName);
    KeyValueSet opts = new KeyValueSet();
    opts.set("file.delimiter", ",");
    TableMeta meta = CatalogUtil.newTableMeta("TEXT", opts);

    Schema partSchema = SchemaBuilder.builder().add("id", Type.INT4).add("name", Type.TEXT).build();

    PartitionMethodDesc partitionMethodDesc =
        new PartitionMethodDesc(
            DEFAULT_DATABASE_NAME,
            tableName,
            CatalogProtos.PartitionType.COLUMN,
            "id,name",
            partSchema);

    TableDesc desc =
        new TableDesc(
            tableName,
            schema,
            meta,
            new Path(CommonTestingUtil.getTestDir(), simpleTableName).toUri());
    desc.setPartitionMethod(partitionMethodDesc);
    assertFalse(catalog.existsTable(tableName));
    catalog.createTable(desc);
    assertTrue(catalog.existsTable(tableName));

    TableDesc retrieved = catalog.getTableDesc(tableName);

    assertEquals(retrieved.getName(), tableName);
    assertEquals(
        retrieved.getPartitionMethod().getPartitionType(), CatalogProtos.PartitionType.COLUMN);
    assertEquals(
        retrieved.getPartitionMethod().getExpressionSchema().getColumn(0).getSimpleName(), "id");

    testAddPartition(tableName, "id=10/name=aaa");
    testAddPartition(tableName, "id=20/name=bbb");

    List<CatalogProtos.PartitionDescProto> partitions =
        catalog.getPartitionsOfTable(DEFAULT_DATABASE_NAME, simpleTableName);
    assertNotNull(partitions);
    assertEquals(partitions.size(), 2);
    assertEquals(partitions.get(0).getNumBytes(), 0L);

    testGetPartitionsByAlgebra(DEFAULT_DATABASE_NAME, simpleTableName);

    testDropPartition(tableName, "id=10/name=aaa");
    testDropPartition(tableName, "id=20/name=bbb");

    partitions = catalog.getPartitionsOfTable(DEFAULT_DATABASE_NAME, simpleTableName);
    assertNotNull(partitions);
    assertEquals(partitions.size(), 0);

    catalog.dropTable(tableName);
    assertFalse(catalog.existsTable(tableName));
  }