Пример #1
0
 private static TupleType[] tupleTypes(Schema schema, int[] keyIndex) {
   TupleType[] types = new TupleType[keyIndex.length];
   for (int i = 0; i < keyIndex.length; i++) {
     types[i] = tupleType(schema.getColumn(keyIndex[i]).getDataType().getType());
   }
   return types;
 }
Пример #2
0
  /**
   * Creates a new TajoRecordConverter.
   *
   * @param parquetSchema The Parquet schema of the projection.
   * @param tajoReadSchema The Tajo schema of the table.
   * @param projectionMap An array mapping the projection column to the column index in the table.
   */
  public TajoRecordConverter(GroupType parquetSchema, Schema tajoReadSchema, int[] projectionMap) {
    this.parquetSchema = parquetSchema;
    this.tajoReadSchema = tajoReadSchema;
    this.projectionMap = projectionMap;
    this.tupleSize = tajoReadSchema.size();

    // The projectionMap.length does not match parquetSchema.getFieldCount()
    // when the projection contains NULL_TYPE columns. We will skip over the
    // NULL_TYPE columns when we construct the converters and populate the
    // NULL_TYPE columns with NullDatums in start().
    int index = 0;
    this.converters = new Converter[parquetSchema.getFieldCount()];
    for (int i = 0; i < projectionMap.length; ++i) {
      final int projectionIndex = projectionMap[i];
      Column column = tajoReadSchema.getColumn(projectionIndex);
      if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) {
        continue;
      }
      Type type = parquetSchema.getType(index);
      final int writeIndex = i;
      converters[index] =
          newConverter(
              column,
              type,
              new ParentValueContainer() {
                @Override
                void add(Object value) {
                  TajoRecordConverter.this.set(writeIndex, value);
                }
              });
      ++index;
    }
  }
Пример #3
0
  public static SortSpec[] schemaToSortSpecs(Schema schema) {
    SortSpec[] specs = new SortSpec[schema.getColumnNum()];

    for (int i = 0; i < schema.getColumnNum(); i++) {
      specs[i] = new SortSpec(schema.getColumn(i), true, false);
    }

    return specs;
  }
Пример #4
0
 /** Called after all fields have been processed. */
 @Override
 public void end() {
   for (int i = 0; i < projectionMap.length; ++i) {
     final int projectionIndex = projectionMap[i];
     Column column = tajoReadSchema.getColumn(projectionIndex);
     if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE
         || currentTuple.isBlankOrNull(i)) {
       set(projectionIndex, NullDatum.get());
     }
   }
 }
Пример #5
0
  private boolean isClonedSchema(Schema origSchema, Schema newSchema) {
    // Check schema of tables
    boolean schemaEqual = (origSchema.size() == newSchema.size());
    if (schemaEqual == false) {
      fail("Number of columns in schema not equal");
      return false;
    }

    for (int col = 0; col < origSchema.size(); col++) {
      Column colA = origSchema.getColumn(col);
      Column colB = newSchema.getColumn(col);
      if (colA.getSimpleName().equals(colB.getSimpleName()) == false) {
        fail("Column names at index " + col + " do not match");
        return false;
      }
      if (colA.getDataType().equals(colB.getDataType()) == false) {
        fail("Column datatypes at index " + col + " do not match");
        return false;
      }
    }
    return true;
  }
Пример #6
0
  @Test
  public final void testGetSortKeysFromJoinQual() {
    Schema outerSchema = new Schema();
    outerSchema.addColumn("employee.id1", CatalogUtil.newSimpleDataType(Type.INT4));
    outerSchema.addColumn("employee.id2", CatalogUtil.newSimpleDataType(Type.INT4));
    Schema innerSchema = new Schema();
    innerSchema.addColumn("people.fid1", CatalogUtil.newSimpleDataType(Type.INT4));
    innerSchema.addColumn("people.fid2", CatalogUtil.newSimpleDataType(Type.INT4));

    FieldEval f1 = new FieldEval("employee.id1", CatalogUtil.newSimpleDataType(Type.INT4));
    FieldEval f2 = new FieldEval("people.fid1", CatalogUtil.newSimpleDataType(Type.INT4));
    FieldEval f3 = new FieldEval("employee.id2", CatalogUtil.newSimpleDataType(Type.INT4));
    FieldEval f4 = new FieldEval("people.fid2", CatalogUtil.newSimpleDataType(Type.INT4));

    EvalNode joinQual = new BinaryEval(EvalType.EQUAL, f1, f2);
    SortSpec[][] sortSpecs =
        PlannerUtil.getSortKeysFromJoinQual(joinQual, outerSchema, innerSchema);
    assertEquals(2, sortSpecs.length);
    assertEquals(1, sortSpecs[0].length);
    assertEquals(1, sortSpecs[1].length);
    assertEquals(outerSchema.getColumn("id1"), sortSpecs[0][0].getSortKey());
    assertEquals(innerSchema.getColumn("fid1"), sortSpecs[1][0].getSortKey());

    // tests for composited join key
    EvalNode joinQual2 = new BinaryEval(EvalType.EQUAL, f3, f4);
    EvalNode compositedJoinQual = new BinaryEval(EvalType.AND, joinQual, joinQual2);

    sortSpecs = PlannerUtil.getSortKeysFromJoinQual(compositedJoinQual, outerSchema, innerSchema);
    assertEquals(2, sortSpecs.length);
    assertEquals(2, sortSpecs[0].length);
    assertEquals(2, sortSpecs[1].length);
    assertEquals(outerSchema.getColumn("id1"), sortSpecs[0][0].getSortKey());
    assertEquals(outerSchema.getColumn("id2"), sortSpecs[0][1].getSortKey());
    assertEquals(innerSchema.getColumn("fid1"), sortSpecs[1][0].getSortKey());
    assertEquals(innerSchema.getColumn("fid2"), sortSpecs[1][1].getSortKey());
  }
  /**
   * It computes the value cardinality of a tuple range.
   *
   * @return
   */
  public static BigDecimal computeCardinalityForAllColumns(
      Schema schema, TupleRange range, boolean inclusive) {
    Tuple start = range.getStart();
    Tuple end = range.getEnd();
    Column col;

    BigDecimal cardinality = new BigDecimal(1);
    BigDecimal columnCard;
    for (int i = 0; i < schema.getColumnNum(); i++) {
      col = schema.getColumn(i);
      columnCard = computeCardinality(col.getDataType(), start.get(i), end.get(i), inclusive);

      if (new BigDecimal(0).compareTo(columnCard) < 0) {
        cardinality = cardinality.multiply(columnCard);
      }
    }

    return cardinality;
  }
Пример #8
0
 private void writeRecordFields(GroupType schema, Schema tajoSchema, Tuple tuple) {
   List<Type> fields = schema.getFields();
   // Parquet ignores Tajo NULL_TYPE columns, so the index may differ.
   int index = 0;
   for (int tajoIndex = 0; tajoIndex < tajoSchema.size(); ++tajoIndex) {
     Column column = tajoSchema.getColumn(tajoIndex);
     if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) {
       continue;
     }
     Type fieldType = fields.get(index);
     if (!tuple.isBlankOrNull(tajoIndex)) {
       recordConsumer.startField(fieldType.getName(), index);
       writeValue(column, tuple, tajoIndex);
       recordConsumer.endField(fieldType.getName(), index);
     } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
       throw new RuntimeException("Null-value for required field: " + column.getSimpleName());
     }
     ++index;
   }
 }
Пример #9
0
  @Before
  public void setup() throws Exception {
    this.randomValues = new HashMap<Integer, Integer>();
    this.conf = new TajoConf();
    util = new TajoTestingCluster();
    util.startCatalogCluster();
    catalog = util.getMiniCatalogCluster().getCatalog();

    Path workDir = CommonTestingUtil.getTestDir();
    catalog.createTablespace(DEFAULT_TABLESPACE_NAME, workDir.toUri().toString());
    catalog.createDatabase(TajoConstants.DEFAULT_DATABASE_NAME, DEFAULT_TABLESPACE_NAME);
    sm = StorageManagerFactory.getStorageManager(conf, workDir);

    idxPath = new Path(workDir, "test.idx");

    Schema schema = new Schema();
    schema.addColumn("managerid", Type.INT4);
    schema.addColumn("empid", Type.INT4);
    schema.addColumn("deptname", Type.TEXT);

    this.idxSchema = new Schema();
    idxSchema.addColumn("managerid", Type.INT4);
    SortSpec[] sortKeys = new SortSpec[1];
    sortKeys[0] = new SortSpec(idxSchema.getColumn("managerid"), true, false);
    this.comp = new TupleComparator(idxSchema, sortKeys);

    this.writer =
        new BSTIndex(conf)
            .getIndexWriter(idxPath, BSTIndex.TWO_LEVEL_INDEX, this.idxSchema, this.comp);
    writer.setLoadNum(100);
    writer.open();
    long offset;

    meta = CatalogUtil.newTableMeta(StoreType.CSV);
    tablePath = StorageUtil.concatPath(workDir, "employee", "table.csv");
    fs = tablePath.getFileSystem(conf);
    fs.mkdirs(tablePath.getParent());

    FileAppender appender =
        (FileAppender)
            StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath);
    appender.init();
    Tuple tuple = new VTuple(schema.size());
    for (int i = 0; i < 10000; i++) {

      Tuple key = new VTuple(this.idxSchema.size());
      int rndKey = rnd.nextInt(250);
      if (this.randomValues.containsKey(rndKey)) {
        int t = this.randomValues.remove(rndKey) + 1;
        this.randomValues.put(rndKey, t);
      } else {
        this.randomValues.put(rndKey, 1);
      }

      key.put(new Datum[] {DatumFactory.createInt4(rndKey)});
      tuple.put(
          new Datum[] {
            DatumFactory.createInt4(rndKey),
            DatumFactory.createInt4(rnd.nextInt(10)),
            DatumFactory.createText("dept_" + rnd.nextInt(10))
          });
      offset = appender.getOffset();
      appender.addTuple(tuple);
      writer.write(key, offset);
    }
    appender.flush();
    appender.close();
    writer.close();

    TableDesc desc =
        new TableDesc(
            CatalogUtil.buildFQName(TajoConstants.DEFAULT_DATABASE_NAME, "employee"),
            schema,
            meta,
            sm.getTablePath("employee"));
    catalog.createTable(desc);

    analyzer = new SQLAnalyzer();
    planner = new LogicalPlanner(catalog);
    optimizer = new LogicalOptimizer(conf);
  }