private void inferOutputFieldsFromLeftSide() { outputFields = Lists.newArrayList(); Iterator<MaterializedField> iter = leftSide.getRecordBatch().getSchema().iterator(); while (iter.hasNext()) { MaterializedField field = iter.next(); outputFields.add(MaterializedField.create(field.getPath(), field.getType())); } }
@Test public void testFixedType() { // Build a required uint field definition MajorType.Builder typeBuilder = MajorType.newBuilder(); FieldDef.Builder defBuilder = FieldDef.newBuilder(); typeBuilder.setMinorType(MinorType.UINT4).setMode(DataMode.REQUIRED).setWidth(4); defBuilder.setMajorType(typeBuilder.build()); MaterializedField field = MaterializedField.create(defBuilder.build()); // Create a new value vector for 1024 integers UInt4Vector v = new UInt4Vector(field, allocator); UInt4Vector.Mutator m = v.getMutator(); v.allocateNew(1024); // Put and set a few values m.set(0, 100); m.set(1, 101); m.set(100, 102); m.set(1022, 103); m.set(1023, 104); assertEquals(100, v.getAccessor().get(0)); assertEquals(101, v.getAccessor().get(1)); assertEquals(102, v.getAccessor().get(100)); assertEquals(103, v.getAccessor().get(1022)); assertEquals(104, v.getAccessor().get(1023)); // Ensure unallocated space returns 0 assertEquals(0, v.getAccessor().get(3)); }
@Test public void testReAllocNullableVariableWidthVector() { final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVarCharHolder.TYPE); // Create a new value vector for 1024 integers try (final NullableVarCharVector vector = (NullableVarCharVector) TypeHelper.getNewVector(field, allocator)) { final NullableVarCharVector.Mutator m = vector.getMutator(); vector.allocateNew(); int initialCapacity = vector.getValueCapacity(); // Put values in indexes that fall within the initial allocation m.setSafe(0, STR1, 0, STR1.length); m.setSafe(initialCapacity - 1, STR2, 0, STR2.length); // Now try to put values in space that falls beyond the initial allocation m.setSafe(initialCapacity + 200, STR3, 0, STR3.length); // Check valueCapacity is more than initial allocation assertEquals((initialCapacity + 1) * 2 - 1, vector.getValueCapacity()); final NullableVarCharVector.Accessor accessor = vector.getAccessor(); assertArrayEquals(STR1, accessor.get(0)); assertArrayEquals(STR2, accessor.get(initialCapacity - 1)); assertArrayEquals(STR3, accessor.get(initialCapacity + 200)); // Set the valueCount to be more than valueCapacity of current allocation. This is possible // for NullableValueVectors // as we don't call setSafe for null values, but we do call setValueCount when the current // batch is processed. m.setValueCount(vector.getValueCapacity() + 200); } }
@Test public void testNullableVarLen2() { final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVarCharHolder.TYPE); // Create a new value vector for 1024 integers. try (final NullableVarCharVector vector = new NullableVarCharVector(field, allocator)) { final NullableVarCharVector.Mutator m = vector.getMutator(); vector.allocateNew(1024 * 10, 1024); m.set(0, STR1); m.set(1, STR2); m.set(2, STR3); // Check the sample strings. final NullableVarCharVector.Accessor accessor = vector.getAccessor(); assertArrayEquals(STR1, accessor.get(0)); assertArrayEquals(STR2, accessor.get(1)); assertArrayEquals(STR3, accessor.get(2)); // Ensure null value throws. boolean b = false; try { vector.getAccessor().get(3); } catch (IllegalStateException e) { b = true; } finally { assertTrue(b); } } }
@Test(expected = OversizedAllocationException.class) public void testVariableVectorReallocation() { final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE); final VarCharVector vector = new VarCharVector(field, allocator); // edge case 1: value count = MAX_VALUE_ALLOCATION final int expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; final int expectedOffsetSize = 10; try { vector.allocateNew(expectedAllocationInBytes, 10); assertEquals(expectedOffsetSize, vector.getValueCapacity()); assertEquals(expectedAllocationInBytes, vector.getBuffer().capacity()); vector.reAlloc(); assertEquals(expectedOffsetSize * 2, vector.getValueCapacity()); assertEquals(expectedAllocationInBytes * 2, vector.getBuffer().capacity()); } finally { vector.close(); } // common: value count < MAX_VALUE_ALLOCATION try { vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION vector.reAlloc(); // this tests if it overflows } finally { vector.close(); } }
@Test(expected = OversizedAllocationException.class) public void testBitVectorReallocation() { final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE); final BitVector vector = new BitVector(field, allocator); // edge case 1: buffer size ~ max value capacity final int expectedValueCapacity = 1 << 29; try { vector.allocateNew(expectedValueCapacity); assertEquals(expectedValueCapacity, vector.getValueCapacity()); vector.reAlloc(); assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); } finally { vector.close(); } // common: value count < MAX_VALUE_ALLOCATION try { vector.allocateNew(expectedValueCapacity); for (int i = 0; i < 3; i++) { vector.reAlloc(); // expand buffer size } assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); vector.reAlloc(); // buffer size ~ max allocation assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); vector.reAlloc(); // overflow } finally { vector.close(); } }
public OrderedPartitionRecordBatch( OrderedPartitionSender pop, RecordBatch incoming, FragmentContext context) throws OutOfMemoryException { super(pop, context); this.incoming = incoming; this.partitions = pop.getDestinations().size(); this.sendingMajorFragmentWidth = pop.getSendingWidth(); this.recordsToSample = pop.getRecordsToSample(); this.samplingFactor = pop.getSamplingFactor(); this.completionFactor = pop.getCompletionFactor(); DistributedCache cache = context.getDrillbitContext().getCache(); this.mmap = cache.getMultiMap(MULTI_CACHE_CONFIG); this.tableMap = cache.getMap(SINGLE_CACHE_CONFIG); Preconditions.checkNotNull(tableMap); this.mapKey = String.format( "%s_%d", context.getHandle().getQueryId(), context.getHandle().getMajorFragmentId()); this.minorFragmentSampleCount = cache.getCounter(mapKey); SchemaPath outputPath = popConfig.getRef(); MaterializedField outputField = MaterializedField.create(outputPath, Types.required(TypeProtos.MinorType.INT)); this.partitionKeyVector = (IntVector) TypeHelper.getNewVector(outputField, oContext.getAllocator()); }
@Override public <T extends ValueVector> T addField(MaterializedField field, Class<T> clazz) throws SchemaChangeException { ValueVector v = fieldVectorMap.get(field.key()); if (v == null || v.getClass() != clazz) { // Field does not exist add it to the map v = TypeHelper.getNewVector(field, oContext.getAllocator()); if (!clazz.isAssignableFrom(v.getClass())) { throw new SchemaChangeException( String.format( "Class %s was provided, expected %s.", clazz.getSimpleName(), v.getClass().getSimpleName())); } fieldVectorMap.put(field.key(), v); } return clazz.cast(v); }
/** * Creates a copy a record batch, converting any fields as necessary to coerce it into the * provided schema * * @param in * @param toSchema * @param context * @return */ public static VectorContainer coerceContainer( VectorAccessible in, BatchSchema toSchema, OperatorContext context) { int recordCount = in.getRecordCount(); Map<SchemaPath, ValueVector> vectorMap = Maps.newHashMap(); for (VectorWrapper w : in) { ValueVector v = w.getValueVector(); vectorMap.put(v.getField().getPath(), v); } VectorContainer c = new VectorContainer(context); for (MaterializedField field : toSchema) { ValueVector v = vectorMap.remove(field.getPath()); if (v != null) { int valueCount = v.getAccessor().getValueCount(); TransferPair tp = v.getTransferPair(); tp.transfer(); if (v.getField().getType().getMinorType().equals(field.getType().getMinorType())) { if (field.getType().getMinorType() == MinorType.UNION) { UnionVector u = (UnionVector) tp.getTo(); for (MinorType t : field.getType().getSubTypeList()) { if (u.getField().getType().getSubTypeList().contains(t)) { continue; } u.addSubType(t); } } c.add(tp.getTo()); } else { ValueVector newVector = TypeHelper.getNewVector(field, context.getAllocator()); Preconditions.checkState( field.getType().getMinorType() == MinorType.UNION, "Can only convert vector to Union vector"); UnionVector u = (UnionVector) newVector; u.addVector(tp.getTo()); MinorType type = v.getField().getType().getMinorType(); for (int i = 0; i < valueCount; i++) { u.getMutator().setType(i, type); } for (MinorType t : field.getType().getSubTypeList()) { if (u.getField().getType().getSubTypeList().contains(t)) { continue; } u.addSubType(t); } u.getMutator().setValueCount(valueCount); c.add(u); } } else { v = TypeHelper.getNewVector(field, context.getAllocator()); v.allocateNew(); v.getMutator().setValueCount(recordCount); c.add(v); } } c.buildSchema(in.getSchema().getSelectionVectorMode()); c.setRecordCount(recordCount); Preconditions.checkState(vectorMap.size() == 0, "Leftover vector from incoming batch"); return c; }
/** * Convenience method that allows running tests on various {@link ValueVector vector} instances. * * @param test test function to execute */ private void testVectors(VectorVerifier test) throws Exception { final MaterializedField[] fields = { MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, BitHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, VarCharHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVarCharHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedListVector.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, MapVector.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedMapVector.TYPE) }; final ValueVector[] vectors = { new UInt4Vector(fields[0], allocator), new BitVector(fields[1], allocator), new VarCharVector(fields[2], allocator), new NullableVarCharVector(fields[3], allocator), new RepeatedListVector(fields[4], allocator, null), new MapVector(fields[5], allocator, null), new RepeatedMapVector(fields[6], allocator, null) }; try { for (final ValueVector vector : vectors) { test.verify(vector); } } finally { AutoCloseables.close(vectors); } }
private VectorContainer constructHyperBatch(List<BatchGroup> batchGroupList) { VectorContainer cont = new VectorContainer(); for (MaterializedField field : schema) { ValueVector[] vectors = new ValueVector[batchGroupList.size()]; int i = 0; for (BatchGroup group : batchGroupList) { vectors[i++] = group .getValueAccessorById( field.getValueClass(), group.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds()) .getValueVector(); } cont.add(vectors); } cont.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE); return cont; }
@Test public void testNullableVarCharVectorLoad() { final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVarCharHolder.TYPE); // Create a new value vector for 1024 nullable variable length strings. final NullableVarCharVector vector1 = new NullableVarCharVector(field, allocator); final NullableVarCharVector.Mutator mutator = vector1.getMutator(); vector1.allocateNew(1024 * 10, 1024); // Populate the vector. final StringBuilder stringBuilder = new StringBuilder(); final int valueCount = 10; for (int i = 0; i < valueCount; ++i) { stringBuilder.append('x'); mutator.set(i, stringBuilder.toString().getBytes(utf8Charset)); } // Check the contents. final NullableVarCharVector.Accessor accessor1 = vector1.getAccessor(); stringBuilder.setLength(0); for (int i = 0; i < valueCount; ++i) { stringBuilder.append('x'); final Object object = accessor1.getObject(i); assertEquals(stringBuilder.toString(), object.toString()); } mutator.setValueCount(valueCount); assertEquals(valueCount, vector1.getAccessor().getValueCount()); // Still ok after setting value count? stringBuilder.setLength(0); for (int i = 0; i < valueCount; ++i) { stringBuilder.append('x'); final Object object = accessor1.getObject(i); assertEquals(stringBuilder.toString(), object.toString()); } // Combine into a single buffer so we can load it into a new vector. final DrillBuf[] buffers1 = vector1.getBuffers(false); final DrillBuf buffer1 = combineBuffers(allocator, buffers1); final NullableVarCharVector vector2 = new NullableVarCharVector(field, allocator); vector2.load(vector1.getMetadata(), buffer1); // Check the vector's contents. final NullableVarCharVector.Accessor accessor2 = vector2.getAccessor(); stringBuilder.setLength(0); for (int i = 0; i < valueCount; ++i) { stringBuilder.append('x'); final Object object = accessor2.getObject(i); assertEquals(stringBuilder.toString(), object.toString()); } vector1.close(); vector2.close(); buffer1.release(); }
@Test public void testRepeatedIntVector() { final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedIntHolder.TYPE); // Create a new value vector. final RepeatedIntVector vector1 = new RepeatedIntVector(field, allocator); // Populate the vector. final int[] values = {2, 3, 5, 7, 11, 13, 17, 19, 23, 27}; // some tricksy primes final int nRecords = 7; final int nElements = values.length; vector1.allocateNew(nRecords, nRecords * nElements); final RepeatedIntVector.Mutator mutator = vector1.getMutator(); for (int recordIndex = 0; recordIndex < nRecords; ++recordIndex) { mutator.startNewValue(recordIndex); for (int elementIndex = 0; elementIndex < nElements; ++elementIndex) { mutator.add(recordIndex, recordIndex * values[elementIndex]); } } mutator.setValueCount(nRecords); // Verify the contents. final RepeatedIntVector.Accessor accessor1 = vector1.getAccessor(); assertEquals(nRecords, accessor1.getValueCount()); for (int recordIndex = 0; recordIndex < nRecords; ++recordIndex) { for (int elementIndex = 0; elementIndex < nElements; ++elementIndex) { final int value = accessor1.get(recordIndex, elementIndex); assertEquals(recordIndex * values[elementIndex], value); } } /* TODO(cwestin) the interface to load has changed // Serialize, reify, and verify. final DrillBuf[] buffers1 = vector1.getBuffers(false); final DrillBuf buffer1 = combineBuffers(allocator, buffers1); final RepeatedIntVector vector2 = new RepeatedIntVector(field, allocator); vector2.load(nRecords, nRecords * nElements, buffer1); final RepeatedIntVector.Accessor accessor2 = vector2.getAccessor(); for(int recordIndex = 0; recordIndex < nRecords; ++recordIndex) { for(int elementIndex = 0; elementIndex < nElements; ++elementIndex) { final int value = accessor2.get(recordIndex, elementIndex); assertEquals(accessor1.get(recordIndex, elementIndex), value); } } */ vector1.close(); /* TODO(cwestin) vector2.close(); buffer1.release(); */ }
@Test public void testNullableFloat() { // Build an optional float field definition MajorType.Builder typeBuilder = MajorType.newBuilder(); FieldDef.Builder defBuilder = FieldDef.newBuilder(); typeBuilder.setMinorType(MinorType.FLOAT4).setMode(DataMode.OPTIONAL).setWidth(4); defBuilder.setMajorType(typeBuilder.build()); MaterializedField field = MaterializedField.create(defBuilder.build()); // Create a new value vector for 1024 integers NullableFloat4Vector v = (NullableFloat4Vector) TypeHelper.getNewVector(field, allocator); NullableFloat4Vector.Mutator m = v.getMutator(); v.allocateNew(1024); // Put and set a few values m.set(0, 100.1f); m.set(1, 101.2f); m.set(100, 102.3f); m.set(1022, 103.4f); m.set(1023, 104.5f); assertEquals(100.1f, v.getAccessor().get(0), 0); assertEquals(101.2f, v.getAccessor().get(1), 0); assertEquals(102.3f, v.getAccessor().get(100), 0); assertEquals(103.4f, v.getAccessor().get(1022), 0); assertEquals(104.5f, v.getAccessor().get(1023), 0); // Ensure null values throw { boolean b = false; try { v.getAccessor().get(3); } catch (AssertionError e) { b = true; } finally { if (!b) { assert false; } } } v.allocateNew(2048); { boolean b = false; try { v.getAccessor().get(0); } catch (AssertionError e) { b = true; } finally { if (!b) { assert false; } } } }
/** * We initialize and add the repeated varchar vector to the record batch in this constructor. * Perform some sanity checks if the selected columns are valid or not. * * @param outputMutator Used to create/modify schema in the record batch * @param columns List of columns selected in the query * @param isStarQuery boolean to indicate if all fields are selected or not * @throws SchemaChangeException */ public RepeatedVarCharOutput( OutputMutator outputMutator, Collection<SchemaPath> columns, boolean isStarQuery) throws SchemaChangeException { super(); MaterializedField field = MaterializedField.create(REF, Types.repeated(TypeProtos.MinorType.VARCHAR)); this.vector = outputMutator.addField(field, RepeatedVarCharVector.class); this.mutator = vector.getMutator(); { // setup fields List<Integer> columnIds = new ArrayList<Integer>(); if (!isStarQuery) { String pathStr; for (SchemaPath path : columns) { assert path.getRootSegment().isNamed() : "root segment should be named"; pathStr = path.getRootSegment().getPath(); Preconditions.checkArgument( pathStr.equals(COL_NAME) || (pathStr.equals("*") && path.getRootSegment().getChild() == null), String.format( "Selected column '%s' must have name 'columns' or must be plain '*'", pathStr)); if (path.getRootSegment().getChild() != null) { Preconditions.checkArgument( path.getRootSegment().getChild().isArray(), String.format("Selected column '%s' must be an array index", pathStr)); int index = path.getRootSegment().getChild().getArraySegment().getIndex(); columnIds.add(index); } } Collections.sort(columnIds); } boolean[] fields = new boolean[MAXIMUM_NUMBER_COLUMNS]; int maxField = fields.length; if (isStarQuery) { Arrays.fill(fields, true); } else { for (Integer i : columnIds) { maxField = 0; maxField = Math.max(maxField, i); fields[i] = true; } } this.collectedFields = fields; this.maxField = maxField; } }
public static FragmentWritableBatch getEmptyLastWithSchema( QueryId queryId, int sendMajorFragmentId, int sendMinorFragmentId, int receiveMajorFragmentId, int receiveMinorFragmentId, BatchSchema schema) { List<SerializedField> fields = Lists.newArrayList(); for (MaterializedField field : schema) { fields.add(field.getAsBuilder().build()); } RecordBatchDef def = RecordBatchDef.newBuilder().addAllField(fields).build(); return new FragmentWritableBatch( true, queryId, sendMajorFragmentId, sendMinorFragmentId, receiveMajorFragmentId, receiveMinorFragmentId, def); }
@Test public void testVVInitialCapacity() throws Exception { final MaterializedField[] fields = new MaterializedField[9]; final ValueVector[] valueVectors = new ValueVector[9]; fields[0] = MaterializedField.create(EMPTY_SCHEMA_PATH, BitHolder.TYPE); fields[1] = MaterializedField.create(EMPTY_SCHEMA_PATH, IntHolder.TYPE); fields[2] = MaterializedField.create(EMPTY_SCHEMA_PATH, VarCharHolder.TYPE); fields[3] = MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVar16CharHolder.TYPE); fields[4] = MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedFloat4Holder.TYPE); fields[5] = MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedVarBinaryHolder.TYPE); fields[6] = MaterializedField.create(EMPTY_SCHEMA_PATH, MapVector.TYPE); fields[6].addChild(fields[0] /*bit*/); fields[6].addChild(fields[2] /*varchar*/); fields[7] = MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedMapVector.TYPE); fields[7].addChild(fields[1] /*int*/); fields[7].addChild(fields[3] /*optional var16char*/); fields[8] = MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedListVector.TYPE); fields[8].addChild(fields[1] /*int*/); final int initialCapacity = 1024; try { for (int i = 0; i < valueVectors.length; i++) { valueVectors[i] = TypeHelper.getNewVector(fields[i], allocator); valueVectors[i].setInitialCapacity(initialCapacity); valueVectors[i].allocateNew(); } for (int i = 0; i < valueVectors.length; i++) { final ValueVector vv = valueVectors[i]; final int vvCapacity = vv.getValueCapacity(); assertEquals( String.format("Incorrect value capacity for %s [%d]", vv.getField(), vvCapacity), initialCapacity, vvCapacity); } } finally { AutoCloseables.close(valueVectors); } }
@Test public void testNullableFloat() { final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, NullableFloat4Holder.TYPE); // Create a new value vector for 1024 integers try (final NullableFloat4Vector vector = (NullableFloat4Vector) TypeHelper.getNewVector(field, allocator)) { final NullableFloat4Vector.Mutator m = vector.getMutator(); vector.allocateNew(1024); // Put and set a few values. m.set(0, 100.1f); m.set(1, 101.2f); m.set(100, 102.3f); m.set(1022, 103.4f); m.set(1023, 104.5f); final NullableFloat4Vector.Accessor accessor = vector.getAccessor(); assertEquals(100.1f, accessor.get(0), 0); assertEquals(101.2f, accessor.get(1), 0); assertEquals(102.3f, accessor.get(100), 0); assertEquals(103.4f, accessor.get(1022), 0); assertEquals(104.5f, accessor.get(1023), 0); // Ensure null values throw. { boolean b = false; try { vector.getAccessor().get(3); } catch (IllegalStateException e) { b = true; } finally { assertTrue(b); } } vector.allocateNew(2048); { boolean b = false; try { accessor.get(0); } catch (IllegalStateException e) { b = true; } finally { assertTrue(b); } } } }
/** * Creates a copier that does a project for every Nth record from a VectorContainer incoming into * VectorContainer outgoing. Each Ordering in orderings generates a column, and evaluation of the * expression associated with each Ordering determines the value of each column. These records * will later be sorted based on the values in each column, in the same order as the orderings. * * @param sv4 * @param incoming * @param outgoing * @param orderings * @return * @throws SchemaChangeException */ private SampleCopier getCopier( SelectionVector4 sv4, VectorContainer incoming, VectorContainer outgoing, List<Ordering> orderings, List<ValueVector> localAllocationVectors) throws SchemaChangeException { final ErrorCollector collector = new ErrorCollectorImpl(); final ClassGenerator<SampleCopier> cg = CodeGenerator.getRoot(SampleCopier.TEMPLATE_DEFINITION, context.getFunctionRegistry()); int i = 0; for (Ordering od : orderings) { final LogicalExpression expr = ExpressionTreeMaterializer.materialize( od.getExpr(), incoming, collector, context.getFunctionRegistry()); SchemaPath schemaPath = SchemaPath.getSimplePath("f" + i++); TypeProtos.MajorType.Builder builder = TypeProtos.MajorType.newBuilder() .mergeFrom(expr.getMajorType()) .clearMode() .setMode(TypeProtos.DataMode.REQUIRED); TypeProtos.MajorType newType = builder.build(); MaterializedField outputField = MaterializedField.create(schemaPath, newType); if (collector.hasErrors()) { throw new SchemaChangeException( String.format( "Failure while trying to materialize incoming schema. Errors:\n %s.", collector.toErrorString())); } ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator()); localAllocationVectors.add(vector); TypedFieldId fid = outgoing.add(vector); ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true); HoldingContainer hc = cg.addExpr(write); cg.getEvalBlock()._if(hc.getValue().eq(JExpr.lit(0)))._then()._return(JExpr.FALSE); } cg.rotateBlock(); cg.getEvalBlock()._return(JExpr.TRUE); outgoing.buildSchema(BatchSchema.SelectionVectorMode.NONE); try { SampleCopier sampleCopier = context.getImplementationClass(cg); sampleCopier.setupCopier(context, sv4, incoming, outgoing); return sampleCopier; } catch (ClassTransformationException | IOException e) { throw new SchemaChangeException(e); } }
/** * Returns the merger of schemas. The merged schema will include the union all columns. If there * is a type conflict between columns with the same schemapath but different types, the merged * schema will contain a Union type. * * @param schemas * @return */ public static BatchSchema mergeSchemas(BatchSchema... schemas) { Map<SchemaPath, Set<MinorType>> typeSetMap = Maps.newLinkedHashMap(); for (BatchSchema s : schemas) { for (MaterializedField field : s) { SchemaPath path = field.getPath(); Set<MinorType> currentTypes = typeSetMap.get(path); if (currentTypes == null) { currentTypes = Sets.newHashSet(); typeSetMap.put(path, currentTypes); } MinorType newType = field.getType().getMinorType(); if (newType == MinorType.MAP || newType == MinorType.LIST) { throw new RuntimeException( "Schema change not currently supported for schemas with complex types"); } if (newType == MinorType.UNION) { for (MinorType subType : field.getType().getSubTypeList()) { currentTypes.add(subType); } } else { currentTypes.add(newType); } } } List<MaterializedField> fields = Lists.newArrayList(); for (SchemaPath path : typeSetMap.keySet()) { Set<MinorType> types = typeSetMap.get(path); if (types.size() > 1) { MajorType.Builder builder = MajorType.newBuilder().setMinorType(MinorType.UNION).setMode(DataMode.OPTIONAL); for (MinorType t : types) { builder.addSubType(t); } MaterializedField field = MaterializedField.create(path, builder.build()); fields.add(field); } else { MaterializedField field = MaterializedField.create(path, Types.optional(types.iterator().next())); fields.add(field); } } SchemaBuilder schemaBuilder = new SchemaBuilder(); BatchSchema s = schemaBuilder .addFields(fields) .setSelectionVectorMode(schemas[0].getSelectionVectorMode()) .build(); return s; }
private void initCols(Schema schema) throws SchemaChangeException { ImmutableList.Builder<ProjectedColumnInfo> pciBuilder = ImmutableList.builder(); for (int i = 0; i < schema.getColumnCount(); i++) { ColumnSchema col = schema.getColumnByIndex(i); final String name = col.getName(); final Type kuduType = col.getType(); MinorType minorType = TYPES.get(kuduType); if (minorType == null) { logger.warn( "Ignoring column that is unsupported.", UserException.unsupportedError() .message( "A column you queried has a data type that is not currently supported by the Kudu storage plugin. " + "The column's name was %s and its Kudu data type was %s. ", name, kuduType.toString()) .addContext("column Name", name) .addContext("plugin", "kudu") .build(logger)); continue; } MajorType majorType; if (col.isNullable()) { majorType = Types.optional(minorType); } else { majorType = Types.required(minorType); } MaterializedField field = MaterializedField.create(name, majorType); final Class<? extends ValueVector> clazz = (Class<? extends ValueVector>) TypeHelper.getValueVectorClass(minorType, majorType.getMode()); ValueVector vector = output.addField(field, clazz); vector.allocateNew(); ProjectedColumnInfo pci = new ProjectedColumnInfo(); pci.vv = vector; pci.kuduColumn = col; pci.index = i; pciBuilder.add(pci); } projectedCols = pciBuilder.build(); }
private boolean fieldSelected(MaterializedField field) { // TODO - not sure if this is how we want to represent this // for now it makes the existing tests pass, simply selecting // all available data if no columns are provided if (isStarQuery()) { return true; } int i = 0; for (SchemaPath expr : getColumns()) { if (field.getPath().equalsIgnoreCase(expr.getAsUnescapedPath())) { columnsFound[i] = true; return true; } i++; } return false; }
@Test public void testBitVector() { // Build a required boolean field definition MajorType.Builder typeBuilder = MajorType.newBuilder(); FieldDef.Builder defBuilder = FieldDef.newBuilder(); typeBuilder.setMinorType(MinorType.BIT).setMode(DataMode.REQUIRED).setWidth(4); defBuilder.setMajorType(typeBuilder.build()); MaterializedField field = MaterializedField.create(defBuilder.build()); // Create a new value vector for 1024 integers BitVector v = new BitVector(field, allocator); BitVector.Mutator m = v.getMutator(); v.allocateNew(1024); // Put and set a few values m.set(0, 1); m.set(1, 0); m.set(100, 0); m.set(1022, 1); assertEquals(1, v.getAccessor().get(0)); assertEquals(0, v.getAccessor().get(1)); assertEquals(0, v.getAccessor().get(100)); assertEquals(1, v.getAccessor().get(1022)); // test setting the same value twice m.set(0, 1); m.set(0, 1); m.set(1, 0); m.set(1, 0); assertEquals(1, v.getAccessor().get(0)); assertEquals(0, v.getAccessor().get(1)); // test toggling the values m.set(0, 0); m.set(1, 1); assertEquals(0, v.getAccessor().get(0)); assertEquals(1, v.getAccessor().get(1)); // Ensure unallocated space returns 0 assertEquals(0, v.getAccessor().get(3)); }
@Test public void testBitVector() { final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, BitHolder.TYPE); // Create a new value vector for 1024 integers try (final BitVector vector = new BitVector(field, allocator)) { final BitVector.Mutator m = vector.getMutator(); vector.allocateNew(1024); // Put and set a few values m.set(0, 1); m.set(1, 0); m.set(100, 0); m.set(1022, 1); final BitVector.Accessor accessor = vector.getAccessor(); assertEquals(1, accessor.get(0)); assertEquals(0, accessor.get(1)); assertEquals(0, accessor.get(100)); assertEquals(1, accessor.get(1022)); // test setting the same value twice m.set(0, 1); m.set(0, 1); m.set(1, 0); m.set(1, 0); assertEquals(1, accessor.get(0)); assertEquals(0, accessor.get(1)); // test toggling the values m.set(0, 0); m.set(1, 1); assertEquals(0, accessor.get(0)); assertEquals(1, accessor.get(1)); // Ensure unallocated space returns 0 assertEquals(0, accessor.get(3)); } }
@Test public void testNullableVarLen2() { // Build an optional varchar field definition MajorType.Builder typeBuilder = MajorType.newBuilder(); FieldDef.Builder defBuilder = FieldDef.newBuilder(); typeBuilder.setMinorType(MinorType.VARCHAR).setMode(DataMode.OPTIONAL).setWidth(2); defBuilder.setMajorType(typeBuilder.build()); MaterializedField field = MaterializedField.create(defBuilder.build()); // Create a new value vector for 1024 integers NullableVarCharVector v = new NullableVarCharVector(field, allocator); NullableVarCharVector.Mutator m = v.getMutator(); v.allocateNew(1024 * 10, 1024); // Create and set 3 sample strings String str1 = new String("AAAAA1"); String str2 = new String("BBBBBBBBB2"); String str3 = new String("CCCC3"); m.set(0, str1.getBytes(Charset.forName("UTF-8"))); m.set(1, str2.getBytes(Charset.forName("UTF-8"))); m.set(2, str3.getBytes(Charset.forName("UTF-8"))); // Check the sample strings assertEquals(str1, new String(v.getAccessor().get(0), Charset.forName("UTF-8"))); assertEquals(str2, new String(v.getAccessor().get(1), Charset.forName("UTF-8"))); assertEquals(str3, new String(v.getAccessor().get(2), Charset.forName("UTF-8"))); // Ensure null value throws boolean b = false; try { v.getAccessor().get(3); } catch (AssertionError e) { b = true; } finally { if (!b) { assert false; } } }
@Test public void testReAllocNullableFixedWidthVector() { final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, NullableFloat4Holder.TYPE); // Create a new value vector for 1024 integers try (final NullableFloat4Vector vector = (NullableFloat4Vector) TypeHelper.getNewVector(field, allocator)) { final NullableFloat4Vector.Mutator m = vector.getMutator(); vector.allocateNew(1024); assertEquals(1024, vector.getValueCapacity()); // Put values in indexes that fall within the initial allocation m.setSafe(0, 100.1f); m.setSafe(100, 102.3f); m.setSafe(1023, 104.5f); // Now try to put values in space that falls beyond the initial allocation m.setSafe(2000, 105.5f); // Check valueCapacity is more than initial allocation assertEquals(1024 * 2, vector.getValueCapacity()); final NullableFloat4Vector.Accessor accessor = vector.getAccessor(); assertEquals(100.1f, accessor.get(0), 0); assertEquals(102.3f, accessor.get(100), 0); assertEquals(104.5f, accessor.get(1023), 0); assertEquals(105.5f, accessor.get(2000), 0); // Set the valueCount to be more than valueCapacity of current allocation. This is possible // for NullableValueVectors // as we don't call setSafe for null values, but we do call setValueCount when all values are // inserted into the // vector m.setValueCount(vector.getValueCapacity() + 200); } }
@Test public void testFixedType() { final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE); // Create a new value vector for 1024 integers. try (final UInt4Vector vector = new UInt4Vector(field, allocator)) { final UInt4Vector.Mutator m = vector.getMutator(); vector.allocateNew(1024); // Put and set a few values m.setSafe(0, 100); m.setSafe(1, 101); m.setSafe(100, 102); m.setSafe(1022, 103); m.setSafe(1023, 104); final UInt4Vector.Accessor accessor = vector.getAccessor(); assertEquals(100, accessor.get(0)); assertEquals(101, accessor.get(1)); assertEquals(102, accessor.get(100)); assertEquals(103, accessor.get(1022)); assertEquals(104, accessor.get(1023)); } }
@Test(expected = OversizedAllocationException.class) public void testFixedVectorReallocation() { final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE); final UInt4Vector vector = new UInt4Vector(field, allocator); // edge case 1: buffer size = max value capacity final int expectedValueCapacity = BaseValueVector.MAX_ALLOCATION_SIZE / 4; try { vector.allocateNew(expectedValueCapacity); assertEquals(expectedValueCapacity, vector.getValueCapacity()); vector.reAlloc(); assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); } finally { vector.close(); } // common case: value count < max value capacity try { vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 8); vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION vector.reAlloc(); // this should throw an IOOB } finally { vector.close(); } }
@Override public void setup(OperatorContext operatorContext, OutputMutator output) throws ExecutionSetupException { this.operatorContext = operatorContext; if (!isStarQuery()) { columnsFound = new boolean[getColumns().size()]; nullFilledVectors = new ArrayList<>(); } columnStatuses = new ArrayList<>(); // totalRecords = footer.getBlocks().get(rowGroupIndex).getRowCount(); List<ColumnDescriptor> columns = footer.getFileMetaData().getSchema().getColumns(); allFieldsFixedLength = true; ColumnDescriptor column; ColumnChunkMetaData columnChunkMetaData; int columnsToScan = 0; mockRecordsRead = 0; MaterializedField field; // ParquetMetadataConverter metaConverter = new ParquetMetadataConverter(); FileMetaData fileMetaData; logger.debug( "Reading row group({}) with {} records in file {}.", rowGroupIndex, footer.getBlocks().get(rowGroupIndex).getRowCount(), hadoopPath.toUri().getPath()); totalRecordsRead = 0; // TODO - figure out how to deal with this better once we add nested reading, note also look // where this map is used below // store a map from column name to converted types if they are non-null Map<String, SchemaElement> schemaElements = ParquetReaderUtility.getColNameToSchemaElementMapping(footer); // loop to add up the length of the fixed width columns and build the schema for (int i = 0; i < columns.size(); ++i) { column = columns.get(i); SchemaElement se = schemaElements.get(column.getPath()[0]); MajorType mt = ParquetToDrillTypeConverter.toMajorType( column.getType(), se.getType_length(), getDataMode(column), se, fragmentContext.getOptions()); field = MaterializedField.create(toFieldName(column.getPath()), mt); if (!fieldSelected(field)) { continue; } columnsToScan++; int dataTypeLength = getDataTypeLength(column, se); if (dataTypeLength == -1) { allFieldsFixedLength = false; } else { bitWidthAllFixedFields += dataTypeLength; } } // rowGroupOffset = // footer.getBlocks().get(rowGroupIndex).getColumns().get(0).getFirstDataPageOffset(); if (columnsToScan != 0 && allFieldsFixedLength) { recordsPerBatch = (int) Math.min( Math.min( batchSize / bitWidthAllFixedFields, footer.getBlocks().get(0).getColumns().get(0).getValueCount()), 65535); } else { recordsPerBatch = DEFAULT_RECORDS_TO_READ_IF_NOT_FIXED_WIDTH; } try { ValueVector vector; SchemaElement schemaElement; final ArrayList<VarLengthColumn<? extends ValueVector>> varLengthColumns = new ArrayList<>(); // initialize all of the column read status objects boolean fieldFixedLength; // the column chunk meta-data is not guaranteed to be in the same order as the columns in the // schema // a map is constructed for fast access to the correct columnChunkMetadata to correspond // to an element in the schema Map<String, Integer> columnChunkMetadataPositionsInList = new HashMap<>(); BlockMetaData rowGroupMetadata = footer.getBlocks().get(rowGroupIndex); int colChunkIndex = 0; for (ColumnChunkMetaData colChunk : rowGroupMetadata.getColumns()) { columnChunkMetadataPositionsInList.put( Arrays.toString(colChunk.getPath().toArray()), colChunkIndex); colChunkIndex++; } for (int i = 0; i < columns.size(); ++i) { column = columns.get(i); columnChunkMetaData = rowGroupMetadata .getColumns() .get(columnChunkMetadataPositionsInList.get(Arrays.toString(column.getPath()))); schemaElement = schemaElements.get(column.getPath()[0]); MajorType type = ParquetToDrillTypeConverter.toMajorType( column.getType(), schemaElement.getType_length(), getDataMode(column), schemaElement, fragmentContext.getOptions()); field = MaterializedField.create(toFieldName(column.getPath()), type); // the field was not requested to be read if (!fieldSelected(field)) { continue; } fieldFixedLength = column.getType() != PrimitiveType.PrimitiveTypeName.BINARY; vector = output.addField( field, (Class<? extends ValueVector>) TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode())); if (column.getType() != PrimitiveType.PrimitiveTypeName.BINARY) { if (column.getMaxRepetitionLevel() > 0) { final RepeatedValueVector repeatedVector = RepeatedValueVector.class.cast(vector); ColumnReader<?> dataReader = ColumnReaderFactory.createFixedColumnReader( this, fieldFixedLength, column, columnChunkMetaData, recordsPerBatch, repeatedVector.getDataVector(), schemaElement); varLengthColumns.add( new FixedWidthRepeatedReader( this, dataReader, getTypeLengthInBits(column.getType()), -1, column, columnChunkMetaData, false, repeatedVector, schemaElement)); } else { columnStatuses.add( ColumnReaderFactory.createFixedColumnReader( this, fieldFixedLength, column, columnChunkMetaData, recordsPerBatch, vector, schemaElement)); } } else { // create a reader and add it to the appropriate list varLengthColumns.add( ColumnReaderFactory.getReader( this, -1, column, columnChunkMetaData, false, vector, schemaElement)); } } varLengthReader = new VarLenBinaryReader(this, varLengthColumns); if (!isStarQuery()) { List<SchemaPath> projectedColumns = Lists.newArrayList(getColumns()); SchemaPath col; for (int i = 0; i < columnsFound.length; i++) { col = projectedColumns.get(i); assert col != null; if (!columnsFound[i] && !col.equals(STAR_COLUMN)) { nullFilledVectors.add( (NullableIntVector) output.addField( MaterializedField.create( col.getAsUnescapedPath(), Types.optional(TypeProtos.MinorType.INT)), (Class<? extends ValueVector>) TypeHelper.getValueVectorClass( TypeProtos.MinorType.INT, DataMode.OPTIONAL))); } } } } catch (Exception e) { handleAndRaise("Failure in setting up reader", e); } }
private StreamingAggregator createAggregatorInternal() throws SchemaChangeException, ClassTransformationException, IOException { ClassGenerator<StreamingAggregator> cg = CodeGenerator.getRoot( StreamingAggTemplate.TEMPLATE_DEFINITION, context.getFunctionRegistry()); container.clear(); LogicalExpression[] keyExprs = new LogicalExpression[popConfig.getKeys().length]; LogicalExpression[] valueExprs = new LogicalExpression[popConfig.getExprs().length]; TypedFieldId[] keyOutputIds = new TypedFieldId[popConfig.getKeys().length]; ErrorCollector collector = new ErrorCollectorImpl(); for (int i = 0; i < keyExprs.length; i++) { final NamedExpression ne = popConfig.getKeys()[i]; final LogicalExpression expr = ExpressionTreeMaterializer.materialize( ne.getExpr(), incoming, collector, context.getFunctionRegistry()); if (expr == null) { continue; } keyExprs[i] = expr; final MaterializedField outputField = MaterializedField.create(ne.getRef(), expr.getMajorType()); final ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator()); keyOutputIds[i] = container.add(vector); } for (int i = 0; i < valueExprs.length; i++) { final NamedExpression ne = popConfig.getExprs()[i]; final LogicalExpression expr = ExpressionTreeMaterializer.materialize( ne.getExpr(), incoming, collector, context.getFunctionRegistry()); if (expr instanceof IfExpression) { throw UserException.unsupportedError( new UnsupportedOperationException( "Union type not supported in aggregate functions")) .build(logger); } if (expr == null) { continue; } final MaterializedField outputField = MaterializedField.create(ne.getRef(), expr.getMajorType()); ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator()); TypedFieldId id = container.add(vector); valueExprs[i] = new ValueVectorWriteExpression(id, expr, true); } if (collector.hasErrors()) { throw new SchemaChangeException( "Failure while materializing expression. " + collector.toErrorString()); } setupIsSame(cg, keyExprs); setupIsSameApart(cg, keyExprs); addRecordValues(cg, valueExprs); outputRecordKeys(cg, keyOutputIds, keyExprs); outputRecordKeysPrev(cg, keyOutputIds, keyExprs); cg.getBlock("resetValues")._return(JExpr.TRUE); getIndex(cg); container.buildSchema(SelectionVectorMode.NONE); StreamingAggregator agg = context.getImplementationClass(cg); agg.setup(oContext, incoming, this); return agg; }