private static TupleType[] tupleTypes(Schema schema, int[] keyIndex) { TupleType[] types = new TupleType[keyIndex.length]; for (int i = 0; i < keyIndex.length; i++) { types[i] = tupleType(schema.getColumn(keyIndex[i]).getDataType().getType()); } return types; }
/** * Creates a new TajoRecordConverter. * * @param parquetSchema The Parquet schema of the projection. * @param tajoReadSchema The Tajo schema of the table. * @param projectionMap An array mapping the projection column to the column index in the table. */ public TajoRecordConverter(GroupType parquetSchema, Schema tajoReadSchema, int[] projectionMap) { this.parquetSchema = parquetSchema; this.tajoReadSchema = tajoReadSchema; this.projectionMap = projectionMap; this.tupleSize = tajoReadSchema.size(); // The projectionMap.length does not match parquetSchema.getFieldCount() // when the projection contains NULL_TYPE columns. We will skip over the // NULL_TYPE columns when we construct the converters and populate the // NULL_TYPE columns with NullDatums in start(). int index = 0; this.converters = new Converter[parquetSchema.getFieldCount()]; for (int i = 0; i < projectionMap.length; ++i) { final int projectionIndex = projectionMap[i]; Column column = tajoReadSchema.getColumn(projectionIndex); if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) { continue; } Type type = parquetSchema.getType(index); final int writeIndex = i; converters[index] = newConverter( column, type, new ParentValueContainer() { @Override void add(Object value) { TajoRecordConverter.this.set(writeIndex, value); } }); ++index; } }
public static SortSpec[] schemaToSortSpecs(Schema schema) { SortSpec[] specs = new SortSpec[schema.getColumnNum()]; for (int i = 0; i < schema.getColumnNum(); i++) { specs[i] = new SortSpec(schema.getColumn(i), true, false); } return specs; }
/** Called after all fields have been processed. */ @Override public void end() { for (int i = 0; i < projectionMap.length; ++i) { final int projectionIndex = projectionMap[i]; Column column = tajoReadSchema.getColumn(projectionIndex); if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE || currentTuple.isBlankOrNull(i)) { set(projectionIndex, NullDatum.get()); } } }
private boolean isClonedSchema(Schema origSchema, Schema newSchema) { // Check schema of tables boolean schemaEqual = (origSchema.size() == newSchema.size()); if (schemaEqual == false) { fail("Number of columns in schema not equal"); return false; } for (int col = 0; col < origSchema.size(); col++) { Column colA = origSchema.getColumn(col); Column colB = newSchema.getColumn(col); if (colA.getSimpleName().equals(colB.getSimpleName()) == false) { fail("Column names at index " + col + " do not match"); return false; } if (colA.getDataType().equals(colB.getDataType()) == false) { fail("Column datatypes at index " + col + " do not match"); return false; } } return true; }
@Test public final void testGetSortKeysFromJoinQual() { Schema outerSchema = new Schema(); outerSchema.addColumn("employee.id1", CatalogUtil.newSimpleDataType(Type.INT4)); outerSchema.addColumn("employee.id2", CatalogUtil.newSimpleDataType(Type.INT4)); Schema innerSchema = new Schema(); innerSchema.addColumn("people.fid1", CatalogUtil.newSimpleDataType(Type.INT4)); innerSchema.addColumn("people.fid2", CatalogUtil.newSimpleDataType(Type.INT4)); FieldEval f1 = new FieldEval("employee.id1", CatalogUtil.newSimpleDataType(Type.INT4)); FieldEval f2 = new FieldEval("people.fid1", CatalogUtil.newSimpleDataType(Type.INT4)); FieldEval f3 = new FieldEval("employee.id2", CatalogUtil.newSimpleDataType(Type.INT4)); FieldEval f4 = new FieldEval("people.fid2", CatalogUtil.newSimpleDataType(Type.INT4)); EvalNode joinQual = new BinaryEval(EvalType.EQUAL, f1, f2); SortSpec[][] sortSpecs = PlannerUtil.getSortKeysFromJoinQual(joinQual, outerSchema, innerSchema); assertEquals(2, sortSpecs.length); assertEquals(1, sortSpecs[0].length); assertEquals(1, sortSpecs[1].length); assertEquals(outerSchema.getColumn("id1"), sortSpecs[0][0].getSortKey()); assertEquals(innerSchema.getColumn("fid1"), sortSpecs[1][0].getSortKey()); // tests for composited join key EvalNode joinQual2 = new BinaryEval(EvalType.EQUAL, f3, f4); EvalNode compositedJoinQual = new BinaryEval(EvalType.AND, joinQual, joinQual2); sortSpecs = PlannerUtil.getSortKeysFromJoinQual(compositedJoinQual, outerSchema, innerSchema); assertEquals(2, sortSpecs.length); assertEquals(2, sortSpecs[0].length); assertEquals(2, sortSpecs[1].length); assertEquals(outerSchema.getColumn("id1"), sortSpecs[0][0].getSortKey()); assertEquals(outerSchema.getColumn("id2"), sortSpecs[0][1].getSortKey()); assertEquals(innerSchema.getColumn("fid1"), sortSpecs[1][0].getSortKey()); assertEquals(innerSchema.getColumn("fid2"), sortSpecs[1][1].getSortKey()); }
/** * It computes the value cardinality of a tuple range. * * @return */ public static BigDecimal computeCardinalityForAllColumns( Schema schema, TupleRange range, boolean inclusive) { Tuple start = range.getStart(); Tuple end = range.getEnd(); Column col; BigDecimal cardinality = new BigDecimal(1); BigDecimal columnCard; for (int i = 0; i < schema.getColumnNum(); i++) { col = schema.getColumn(i); columnCard = computeCardinality(col.getDataType(), start.get(i), end.get(i), inclusive); if (new BigDecimal(0).compareTo(columnCard) < 0) { cardinality = cardinality.multiply(columnCard); } } return cardinality; }
private void writeRecordFields(GroupType schema, Schema tajoSchema, Tuple tuple) { List<Type> fields = schema.getFields(); // Parquet ignores Tajo NULL_TYPE columns, so the index may differ. int index = 0; for (int tajoIndex = 0; tajoIndex < tajoSchema.size(); ++tajoIndex) { Column column = tajoSchema.getColumn(tajoIndex); if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) { continue; } Type fieldType = fields.get(index); if (!tuple.isBlankOrNull(tajoIndex)) { recordConsumer.startField(fieldType.getName(), index); writeValue(column, tuple, tajoIndex); recordConsumer.endField(fieldType.getName(), index); } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) { throw new RuntimeException("Null-value for required field: " + column.getSimpleName()); } ++index; } }
@Before public void setup() throws Exception { this.randomValues = new HashMap<Integer, Integer>(); this.conf = new TajoConf(); util = new TajoTestingCluster(); util.startCatalogCluster(); catalog = util.getMiniCatalogCluster().getCatalog(); Path workDir = CommonTestingUtil.getTestDir(); catalog.createTablespace(DEFAULT_TABLESPACE_NAME, workDir.toUri().toString()); catalog.createDatabase(TajoConstants.DEFAULT_DATABASE_NAME, DEFAULT_TABLESPACE_NAME); sm = StorageManagerFactory.getStorageManager(conf, workDir); idxPath = new Path(workDir, "test.idx"); Schema schema = new Schema(); schema.addColumn("managerid", Type.INT4); schema.addColumn("empid", Type.INT4); schema.addColumn("deptname", Type.TEXT); this.idxSchema = new Schema(); idxSchema.addColumn("managerid", Type.INT4); SortSpec[] sortKeys = new SortSpec[1]; sortKeys[0] = new SortSpec(idxSchema.getColumn("managerid"), true, false); this.comp = new TupleComparator(idxSchema, sortKeys); this.writer = new BSTIndex(conf) .getIndexWriter(idxPath, BSTIndex.TWO_LEVEL_INDEX, this.idxSchema, this.comp); writer.setLoadNum(100); writer.open(); long offset; meta = CatalogUtil.newTableMeta(StoreType.CSV); tablePath = StorageUtil.concatPath(workDir, "employee", "table.csv"); fs = tablePath.getFileSystem(conf); fs.mkdirs(tablePath.getParent()); FileAppender appender = (FileAppender) StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); appender.init(); Tuple tuple = new VTuple(schema.size()); for (int i = 0; i < 10000; i++) { Tuple key = new VTuple(this.idxSchema.size()); int rndKey = rnd.nextInt(250); if (this.randomValues.containsKey(rndKey)) { int t = this.randomValues.remove(rndKey) + 1; this.randomValues.put(rndKey, t); } else { this.randomValues.put(rndKey, 1); } key.put(new Datum[] {DatumFactory.createInt4(rndKey)}); tuple.put( new Datum[] { DatumFactory.createInt4(rndKey), DatumFactory.createInt4(rnd.nextInt(10)), DatumFactory.createText("dept_" + rnd.nextInt(10)) }); offset = appender.getOffset(); appender.addTuple(tuple); writer.write(key, offset); } appender.flush(); appender.close(); writer.close(); TableDesc desc = new TableDesc( CatalogUtil.buildFQName(TajoConstants.DEFAULT_DATABASE_NAME, "employee"), schema, meta, sm.getTablePath("employee")); catalog.createTable(desc); analyzer = new SQLAnalyzer(); planner = new LogicalPlanner(catalog); optimizer = new LogicalOptimizer(conf); }