@Override public FunctionDefinition[] getFunctionDefintions() { return new FunctionDefinition[] { FunctionDefinition.simple( "bytesubstring", new BasicArgumentValidator( new Arg( Types.required(TypeProtos.MinorType.VARBINARY), Types.optional(TypeProtos.MinorType.VARBINARY)), new Arg(false, false, "offset", TypeProtos.MinorType.BIGINT), new Arg(false, false, "length", TypeProtos.MinorType.BIGINT)), new OutputTypeDeterminer.SameAsFirstInput(), "byte_substr") }; }
public OrderedPartitionRecordBatch( OrderedPartitionSender pop, RecordBatch incoming, FragmentContext context) throws OutOfMemoryException { super(pop, context); this.incoming = incoming; this.partitions = pop.getDestinations().size(); this.sendingMajorFragmentWidth = pop.getSendingWidth(); this.recordsToSample = pop.getRecordsToSample(); this.samplingFactor = pop.getSamplingFactor(); this.completionFactor = pop.getCompletionFactor(); DistributedCache cache = context.getDrillbitContext().getCache(); this.mmap = cache.getMultiMap(MULTI_CACHE_CONFIG); this.tableMap = cache.getMap(SINGLE_CACHE_CONFIG); Preconditions.checkNotNull(tableMap); this.mapKey = String.format( "%s_%d", context.getHandle().getQueryId(), context.getHandle().getMajorFragmentId()); this.minorFragmentSampleCount = cache.getCounter(mapKey); SchemaPath outputPath = popConfig.getRef(); MaterializedField outputField = MaterializedField.create(outputPath, Types.required(TypeProtos.MinorType.INT)); this.partitionKeyVector = (IntVector) TypeHelper.getNewVector(outputField, oContext.getAllocator()); }
@Test public void testVectorMetadataIsAccurate() throws Exception { final VectorVerifier noChild = new ChildVerifier(); final VectorVerifier offsetChild = new ChildVerifier(UInt4Holder.TYPE); final ImmutableMap.Builder<Class, VectorVerifier> builder = ImmutableMap.builder(); builder.put(UInt4Vector.class, noChild); builder.put(BitVector.class, noChild); builder.put(VarCharVector.class, offsetChild); builder.put( NullableVarCharVector.class, new ChildVerifier(UInt1Holder.TYPE, Types.optional(TypeProtos.MinorType.VARCHAR))); builder.put( RepeatedListVector.class, new ChildVerifier(UInt4Holder.TYPE, Types.LATE_BIND_TYPE)); builder.put(MapVector.class, noChild); builder.put(RepeatedMapVector.class, offsetChild); final ImmutableMap<Class, VectorVerifier> children = builder.build(); testVectors( new VectorVerifier() { @Override public void verify(ValueVector vector) throws Exception { final Class klazz = vector.getClass(); final VectorVerifier verifier = children.get(klazz); verifier.verify(vector); } }); }
private void initCols(Schema schema) throws SchemaChangeException { ImmutableList.Builder<ProjectedColumnInfo> pciBuilder = ImmutableList.builder(); for (int i = 0; i < schema.getColumnCount(); i++) { ColumnSchema col = schema.getColumnByIndex(i); final String name = col.getName(); final Type kuduType = col.getType(); MinorType minorType = TYPES.get(kuduType); if (minorType == null) { logger.warn( "Ignoring column that is unsupported.", UserException.unsupportedError() .message( "A column you queried has a data type that is not currently supported by the Kudu storage plugin. " + "The column's name was %s and its Kudu data type was %s. ", name, kuduType.toString()) .addContext("column Name", name) .addContext("plugin", "kudu") .build(logger)); continue; } MajorType majorType; if (col.isNullable()) { majorType = Types.optional(minorType); } else { majorType = Types.required(minorType); } MaterializedField field = MaterializedField.create(name, majorType); final Class<? extends ValueVector> clazz = (Class<? extends ValueVector>) TypeHelper.getValueVectorClass(minorType, majorType.getMode()); ValueVector vector = output.addField(field, clazz); vector.allocateNew(); ProjectedColumnInfo pci = new ProjectedColumnInfo(); pci.vv = vector; pci.kuduColumn = col; pci.index = i; pciBuilder.add(pci); } projectedCols = pciBuilder.build(); }
@Override public boolean equals(DataValue v) { if (v == null) return false; if (Types.isNumericType(v.getDataType())) { return this.compareTo(v) == 0; } else { return false; } }
@Override public LogicalExpression visitFunctionCall( FunctionCall call, FunctionImplementationRegistry registry) { List<LogicalExpression> args = Lists.newArrayList(); for (int i = 0; i < call.args.size(); ++i) { LogicalExpression newExpr = call.args.get(i).accept(this, registry); args.add(newExpr); } // replace with a new function call, since its argument could be changed. call = new FunctionCall(call.getDefinition(), args, call.getPosition()); // call function resolver, get the best match. FunctionResolver resolver = FunctionResolverFactory.getResolver(call); DrillFuncHolder matchedFuncHolder = resolver.getBestMatch(registry.getMethods().get(call.getDefinition().getName()), call); // new arg lists, possible with implicit cast inserted. List<LogicalExpression> argsWithCast = Lists.newArrayList(); if (matchedFuncHolder == null) { // TODO: found no matched funcholder. Raise exception here? return validateNewExpr(call); } else { // Compare parm type against arg type. Insert cast on top of arg, whenever necessary. for (int i = 0; i < call.args.size(); ++i) { MajorType parmType = matchedFuncHolder.getParmMajorType(i); // Case 1: If 1) the argument is NullExpression // 2) the parameter of matchedFuncHolder allows null input, or func's // null_handling is NULL_IF_NULL (means null and non-null are exchangable). // then replace NullExpression with a TypedNullConstant if (call.args.get(i).equals(NullExpression.INSTANCE) && (parmType.getMode().equals(DataMode.OPTIONAL) || matchedFuncHolder.getNullHandling() == NullHandling.NULL_IF_NULL)) { argsWithCast.add(new TypedNullConstant(parmType)); } else if (Types.softEquals( parmType, call.args.get(i).getMajorType(), matchedFuncHolder.getNullHandling() == NullHandling.NULL_IF_NULL)) { // Case 2: argument and parameter matches. Do nothing. argsWithCast.add(call.args.get(i)); } else { // Case 3: insert cast if param type is different from arg type. FunctionDefinition castFuncDef = CastFunctionDefs.getCastFuncDef(parmType.getMinorType()); List<LogicalExpression> castArgs = Lists.newArrayList(); castArgs.add(call.args.get(i)); // input_expr argsWithCast.add(new FunctionCall(castFuncDef, castArgs, ExpressionPosition.UNKNOWN)); } } } return validateNewExpr( new FunctionCall(call.getDefinition(), argsWithCast, call.getPosition())); }
/** * Returns the merger of schemas. The merged schema will include the union all columns. If there * is a type conflict between columns with the same schemapath but different types, the merged * schema will contain a Union type. * * @param schemas * @return */ public static BatchSchema mergeSchemas(BatchSchema... schemas) { Map<SchemaPath, Set<MinorType>> typeSetMap = Maps.newLinkedHashMap(); for (BatchSchema s : schemas) { for (MaterializedField field : s) { SchemaPath path = field.getPath(); Set<MinorType> currentTypes = typeSetMap.get(path); if (currentTypes == null) { currentTypes = Sets.newHashSet(); typeSetMap.put(path, currentTypes); } MinorType newType = field.getType().getMinorType(); if (newType == MinorType.MAP || newType == MinorType.LIST) { throw new RuntimeException( "Schema change not currently supported for schemas with complex types"); } if (newType == MinorType.UNION) { for (MinorType subType : field.getType().getSubTypeList()) { currentTypes.add(subType); } } else { currentTypes.add(newType); } } } List<MaterializedField> fields = Lists.newArrayList(); for (SchemaPath path : typeSetMap.keySet()) { Set<MinorType> types = typeSetMap.get(path); if (types.size() > 1) { MajorType.Builder builder = MajorType.newBuilder().setMinorType(MinorType.UNION).setMode(DataMode.OPTIONAL); for (MinorType t : types) { builder.addSubType(t); } MaterializedField field = MaterializedField.create(path, builder.build()); fields.add(field); } else { MaterializedField field = MaterializedField.create(path, Types.optional(types.iterator().next())); fields.add(field); } } SchemaBuilder schemaBuilder = new SchemaBuilder(); BatchSchema s = schemaBuilder .addFields(fields) .setSelectionVectorMode(schemas[0].getSelectionVectorMode()) .build(); return s; }
/** * We initialize and add the repeated varchar vector to the record batch in this constructor. * Perform some sanity checks if the selected columns are valid or not. * * @param outputMutator Used to create/modify schema in the record batch * @param columns List of columns selected in the query * @param isStarQuery boolean to indicate if all fields are selected or not * @throws SchemaChangeException */ public RepeatedVarCharOutput( OutputMutator outputMutator, Collection<SchemaPath> columns, boolean isStarQuery) throws SchemaChangeException { super(); MaterializedField field = MaterializedField.create(REF, Types.repeated(TypeProtos.MinorType.VARCHAR)); this.vector = outputMutator.addField(field, RepeatedVarCharVector.class); this.mutator = vector.getMutator(); { // setup fields List<Integer> columnIds = new ArrayList<Integer>(); if (!isStarQuery) { String pathStr; for (SchemaPath path : columns) { assert path.getRootSegment().isNamed() : "root segment should be named"; pathStr = path.getRootSegment().getPath(); Preconditions.checkArgument( pathStr.equals(COL_NAME) || (pathStr.equals("*") && path.getRootSegment().getChild() == null), String.format( "Selected column '%s' must have name 'columns' or must be plain '*'", pathStr)); if (path.getRootSegment().getChild() != null) { Preconditions.checkArgument( path.getRootSegment().getChild().isArray(), String.format("Selected column '%s' must be an array index", pathStr)); int index = path.getRootSegment().getChild().getArraySegment().getIndex(); columnIds.add(index); } } Collections.sort(columnIds); } boolean[] fields = new boolean[MAXIMUM_NUMBER_COLUMNS]; int maxField = fields.length; if (isStarQuery) { Arrays.fill(fields, true); } else { for (Integer i : columnIds) { maxField = 0; maxField = Math.max(maxField, i); fields[i] = true; } } this.collectedFields = fields; this.maxField = maxField; } }
protected FieldWriter getWriter(MinorType type) { if (state == State.UNION) { return writer; } if (state == State.UNTYPED) { if (type == null) { return null; } ValueVector v = listVector.addOrGetVector(new VectorDescriptor(Types.optional(type))).getVector(); v.allocateNew(); setWriter(v); writer.setPosition(position); } if (type != this.type) { return promoteToUnion(); } return writer; }
private FieldWriter promoteToUnion() { String name = vector.getField().getLastName(); TransferPair tp = vector.getTransferPair( vector.getField().getType().getMinorType().name().toLowerCase(), vector.getAllocator()); tp.transfer(); if (parentContainer != null) { unionVector = parentContainer.addOrGet(name, Types.optional(MinorType.UNION), UnionVector.class); } else if (listVector != null) { unionVector = listVector.promoteToUnion(); } unionVector.addVector(tp.getTo()); writer = new UnionWriter(unionVector); writer.setPosition(idx()); for (int i = 0; i < idx(); i++) { unionVector.getMutator().setType(i, vector.getField().getType().getMinorType()); } vector = null; state = State.UNION; return writer; }
@Override public TypeProtos.MajorType getVectorType(SchemaPath column, PlannerSettings plannerSettings) { return Types.optional(TypeProtos.MinorType.VARCHAR); }
@Override public void setup(OperatorContext operatorContext, OutputMutator output) throws ExecutionSetupException { this.operatorContext = operatorContext; if (!isStarQuery()) { columnsFound = new boolean[getColumns().size()]; nullFilledVectors = new ArrayList<>(); } columnStatuses = new ArrayList<>(); // totalRecords = footer.getBlocks().get(rowGroupIndex).getRowCount(); List<ColumnDescriptor> columns = footer.getFileMetaData().getSchema().getColumns(); allFieldsFixedLength = true; ColumnDescriptor column; ColumnChunkMetaData columnChunkMetaData; int columnsToScan = 0; mockRecordsRead = 0; MaterializedField field; // ParquetMetadataConverter metaConverter = new ParquetMetadataConverter(); FileMetaData fileMetaData; logger.debug( "Reading row group({}) with {} records in file {}.", rowGroupIndex, footer.getBlocks().get(rowGroupIndex).getRowCount(), hadoopPath.toUri().getPath()); totalRecordsRead = 0; // TODO - figure out how to deal with this better once we add nested reading, note also look // where this map is used below // store a map from column name to converted types if they are non-null Map<String, SchemaElement> schemaElements = ParquetReaderUtility.getColNameToSchemaElementMapping(footer); // loop to add up the length of the fixed width columns and build the schema for (int i = 0; i < columns.size(); ++i) { column = columns.get(i); SchemaElement se = schemaElements.get(column.getPath()[0]); MajorType mt = ParquetToDrillTypeConverter.toMajorType( column.getType(), se.getType_length(), getDataMode(column), se, fragmentContext.getOptions()); field = MaterializedField.create(toFieldName(column.getPath()), mt); if (!fieldSelected(field)) { continue; } columnsToScan++; int dataTypeLength = getDataTypeLength(column, se); if (dataTypeLength == -1) { allFieldsFixedLength = false; } else { bitWidthAllFixedFields += dataTypeLength; } } // rowGroupOffset = // footer.getBlocks().get(rowGroupIndex).getColumns().get(0).getFirstDataPageOffset(); if (columnsToScan != 0 && allFieldsFixedLength) { recordsPerBatch = (int) Math.min( Math.min( batchSize / bitWidthAllFixedFields, footer.getBlocks().get(0).getColumns().get(0).getValueCount()), 65535); } else { recordsPerBatch = DEFAULT_RECORDS_TO_READ_IF_NOT_FIXED_WIDTH; } try { ValueVector vector; SchemaElement schemaElement; final ArrayList<VarLengthColumn<? extends ValueVector>> varLengthColumns = new ArrayList<>(); // initialize all of the column read status objects boolean fieldFixedLength; // the column chunk meta-data is not guaranteed to be in the same order as the columns in the // schema // a map is constructed for fast access to the correct columnChunkMetadata to correspond // to an element in the schema Map<String, Integer> columnChunkMetadataPositionsInList = new HashMap<>(); BlockMetaData rowGroupMetadata = footer.getBlocks().get(rowGroupIndex); int colChunkIndex = 0; for (ColumnChunkMetaData colChunk : rowGroupMetadata.getColumns()) { columnChunkMetadataPositionsInList.put( Arrays.toString(colChunk.getPath().toArray()), colChunkIndex); colChunkIndex++; } for (int i = 0; i < columns.size(); ++i) { column = columns.get(i); columnChunkMetaData = rowGroupMetadata .getColumns() .get(columnChunkMetadataPositionsInList.get(Arrays.toString(column.getPath()))); schemaElement = schemaElements.get(column.getPath()[0]); MajorType type = ParquetToDrillTypeConverter.toMajorType( column.getType(), schemaElement.getType_length(), getDataMode(column), schemaElement, fragmentContext.getOptions()); field = MaterializedField.create(toFieldName(column.getPath()), type); // the field was not requested to be read if (!fieldSelected(field)) { continue; } fieldFixedLength = column.getType() != PrimitiveType.PrimitiveTypeName.BINARY; vector = output.addField( field, (Class<? extends ValueVector>) TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode())); if (column.getType() != PrimitiveType.PrimitiveTypeName.BINARY) { if (column.getMaxRepetitionLevel() > 0) { final RepeatedValueVector repeatedVector = RepeatedValueVector.class.cast(vector); ColumnReader<?> dataReader = ColumnReaderFactory.createFixedColumnReader( this, fieldFixedLength, column, columnChunkMetaData, recordsPerBatch, repeatedVector.getDataVector(), schemaElement); varLengthColumns.add( new FixedWidthRepeatedReader( this, dataReader, getTypeLengthInBits(column.getType()), -1, column, columnChunkMetaData, false, repeatedVector, schemaElement)); } else { columnStatuses.add( ColumnReaderFactory.createFixedColumnReader( this, fieldFixedLength, column, columnChunkMetaData, recordsPerBatch, vector, schemaElement)); } } else { // create a reader and add it to the appropriate list varLengthColumns.add( ColumnReaderFactory.getReader( this, -1, column, columnChunkMetaData, false, vector, schemaElement)); } } varLengthReader = new VarLenBinaryReader(this, varLengthColumns); if (!isStarQuery()) { List<SchemaPath> projectedColumns = Lists.newArrayList(getColumns()); SchemaPath col; for (int i = 0; i < columnsFound.length; i++) { col = projectedColumns.get(i); assert col != null; if (!columnsFound[i] && !col.equals(STAR_COLUMN)) { nullFilledVectors.add( (NullableIntVector) output.addField( MaterializedField.create( col.getAsUnescapedPath(), Types.optional(TypeProtos.MinorType.INT)), (Class<? extends ValueVector>) TypeHelper.getValueVectorClass( TypeProtos.MinorType.INT, DataMode.OPTIONAL))); } } } } catch (Exception e) { handleAndRaise("Failure in setting up reader", e); } }
/** * Gets AvaticaType carrying both JDBC {@code java.sql.Type.*} type code and SQL type name for * given SQL type name. */ private static AvaticaType getAvaticaType(String sqlTypeName) { final int jdbcTypeId = Types.getJdbcTypeCode(sqlTypeName); return ColumnMetaData.scalar(jdbcTypeId, sqlTypeName, Rep.BOOLEAN /* dummy value, unused */); }
public void updateColumnMetaData( String catalogName, String schemaName, String tableName, BatchSchema schema, List<Class<?>> getObjectClasses) { final List<ColumnMetaData> newColumns = new ArrayList<>(schema.getFieldCount()); for (int colOffset = 0; colOffset < schema.getFieldCount(); colOffset++) { final MaterializedField field = schema.getColumn(colOffset); Class<?> objectClass = getObjectClasses.get(colOffset); final String columnName = field.getPath(); final MajorType rpcDataType = field.getType(); final AvaticaType bundledSqlDataType = getAvaticaType(rpcDataType); final String columnClassName = objectClass.getName(); final int nullability; switch (field.getDataMode()) { case OPTIONAL: nullability = ResultSetMetaData.columnNullable; break; case REQUIRED: nullability = ResultSetMetaData.columnNoNulls; break; // Should REPEATED still map to columnNoNulls? or to columnNullable? case REPEATED: nullability = ResultSetMetaData.columnNoNulls; break; default: throw new AssertionError( "Unexpected new DataMode value '" + field.getDataMode().name() + "'"); } final boolean isSigned = Types.isJdbcSignedType(rpcDataType); // TODO(DRILL-3355): TODO(DRILL-3356): When string lengths, precisions, // interval kinds, etc., are available from RPC-level data, implement: // - precision for ResultSetMetadata.getPrecision(...) (like // getColumns()'s COLUMN_SIZE) // - scale for getScale(...), and // - and displaySize for getColumnDisplaySize(...). final int precision = rpcDataType.hasPrecision() ? rpcDataType.getPrecision() : 0; final int scale = rpcDataType.hasScale() ? rpcDataType.getScale() : 0; final int displaySize = 10; ColumnMetaData col = new ColumnMetaData( colOffset, // (zero-based ordinal (for Java arrays/lists).) false, /* autoIncrement */ false, /* caseSensitive */ true, /* searchable */ false, /* currency */ nullability, isSigned, displaySize, columnName, /* label */ columnName, /* columnName */ schemaName, precision, scale, tableName, catalogName, bundledSqlDataType, true, /* readOnly */ false, /* writable */ false, /* definitelyWritable */ columnClassName); newColumns.add(col); } columns = newColumns; }
@Override public boolean supportsCompare(DataValue dv2) { return Types.isNumericType(dv2.getDataType()); }
@Test public void testHashFunctionResolution(@Injectable DrillConfig config) throws JClassAlreadyExistsException, IOException { FunctionImplementationRegistry registry = new FunctionImplementationRegistry(config); // test required vs nullable Int input resolveHash( config, new TypedNullConstant(Types.optional(TypeProtos.MinorType.INT)), Types.optional(TypeProtos.MinorType.INT), Types.required(TypeProtos.MinorType.INT), TypeProtos.DataMode.OPTIONAL, registry); resolveHash( config, new ValueExpressions.IntExpression(1, ExpressionPosition.UNKNOWN), Types.required(TypeProtos.MinorType.INT), Types.required(TypeProtos.MinorType.INT), TypeProtos.DataMode.REQUIRED, registry); // test required vs nullable float input resolveHash( config, new TypedNullConstant(Types.optional(TypeProtos.MinorType.FLOAT4)), Types.optional(TypeProtos.MinorType.FLOAT4), Types.required(TypeProtos.MinorType.FLOAT4), TypeProtos.DataMode.OPTIONAL, registry); resolveHash( config, new ValueExpressions.FloatExpression(5.0f, ExpressionPosition.UNKNOWN), Types.required(TypeProtos.MinorType.FLOAT4), Types.required(TypeProtos.MinorType.FLOAT4), TypeProtos.DataMode.REQUIRED, registry); // test required vs nullable long input resolveHash( config, new TypedNullConstant(Types.optional(TypeProtos.MinorType.BIGINT)), Types.optional(TypeProtos.MinorType.BIGINT), Types.required(TypeProtos.MinorType.BIGINT), TypeProtos.DataMode.OPTIONAL, registry); resolveHash( config, new ValueExpressions.LongExpression(100L, ExpressionPosition.UNKNOWN), Types.required(TypeProtos.MinorType.BIGINT), Types.required(TypeProtos.MinorType.BIGINT), TypeProtos.DataMode.REQUIRED, registry); // test required vs nullable double input resolveHash( config, new TypedNullConstant(Types.optional(TypeProtos.MinorType.FLOAT8)), Types.optional(TypeProtos.MinorType.FLOAT8), Types.required(TypeProtos.MinorType.FLOAT8), TypeProtos.DataMode.OPTIONAL, registry); resolveHash( config, new ValueExpressions.DoubleExpression(100.0, ExpressionPosition.UNKNOWN), Types.required(TypeProtos.MinorType.FLOAT8), Types.required(TypeProtos.MinorType.FLOAT8), TypeProtos.DataMode.REQUIRED, registry); }