/** * Creates a copy a record batch, converting any fields as necessary to coerce it into the * provided schema * * @param in * @param toSchema * @param context * @return */ public static VectorContainer coerceContainer( VectorAccessible in, BatchSchema toSchema, OperatorContext context) { int recordCount = in.getRecordCount(); Map<SchemaPath, ValueVector> vectorMap = Maps.newHashMap(); for (VectorWrapper w : in) { ValueVector v = w.getValueVector(); vectorMap.put(v.getField().getPath(), v); } VectorContainer c = new VectorContainer(context); for (MaterializedField field : toSchema) { ValueVector v = vectorMap.remove(field.getPath()); if (v != null) { int valueCount = v.getAccessor().getValueCount(); TransferPair tp = v.getTransferPair(); tp.transfer(); if (v.getField().getType().getMinorType().equals(field.getType().getMinorType())) { if (field.getType().getMinorType() == MinorType.UNION) { UnionVector u = (UnionVector) tp.getTo(); for (MinorType t : field.getType().getSubTypeList()) { if (u.getField().getType().getSubTypeList().contains(t)) { continue; } u.addSubType(t); } } c.add(tp.getTo()); } else { ValueVector newVector = TypeHelper.getNewVector(field, context.getAllocator()); Preconditions.checkState( field.getType().getMinorType() == MinorType.UNION, "Can only convert vector to Union vector"); UnionVector u = (UnionVector) newVector; u.addVector(tp.getTo()); MinorType type = v.getField().getType().getMinorType(); for (int i = 0; i < valueCount; i++) { u.getMutator().setType(i, type); } for (MinorType t : field.getType().getSubTypeList()) { if (u.getField().getType().getSubTypeList().contains(t)) { continue; } u.addSubType(t); } u.getMutator().setValueCount(valueCount); c.add(u); } } else { v = TypeHelper.getNewVector(field, context.getAllocator()); v.allocateNew(); v.getMutator().setValueCount(recordCount); c.add(v); } } c.buildSchema(in.getSchema().getSelectionVectorMode()); c.setRecordCount(recordCount); Preconditions.checkState(vectorMap.size() == 0, "Leftover vector from incoming batch"); return c; }
private void createCopier( VectorAccessible batch, List<BatchGroup> batchGroupList, VectorContainer outputContainer, boolean spilling) throws SchemaChangeException { try { if (copier == null) { CodeGenerator<PriorityQueueCopier> cg = CodeGenerator.get( PriorityQueueCopier.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions()); ClassGenerator<PriorityQueueCopier> g = cg.getRoot(); generateComparisons(g, batch); g.setMappingSet(COPIER_MAPPING_SET); CopyUtil.generateCopies(g, batch, true); g.setMappingSet(MAIN_MAPPING); copier = context.getImplementationClass(cg); } else { copier.close(); } BufferAllocator allocator = spilling ? copierAllocator : oAllocator; for (VectorWrapper<?> i : batch) { ValueVector v = TypeHelper.getNewVector(i.getField(), allocator); outputContainer.add(v); } copier.setup(context, allocator, batch, batchGroupList, outputContainer); } catch (ClassTransformationException | IOException e) { throw new RuntimeException(e); } }
@Test public void testReAllocNullableVariableWidthVector() { final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVarCharHolder.TYPE); // Create a new value vector for 1024 integers try (final NullableVarCharVector vector = (NullableVarCharVector) TypeHelper.getNewVector(field, allocator)) { final NullableVarCharVector.Mutator m = vector.getMutator(); vector.allocateNew(); int initialCapacity = vector.getValueCapacity(); // Put values in indexes that fall within the initial allocation m.setSafe(0, STR1, 0, STR1.length); m.setSafe(initialCapacity - 1, STR2, 0, STR2.length); // Now try to put values in space that falls beyond the initial allocation m.setSafe(initialCapacity + 200, STR3, 0, STR3.length); // Check valueCapacity is more than initial allocation assertEquals((initialCapacity + 1) * 2 - 1, vector.getValueCapacity()); final NullableVarCharVector.Accessor accessor = vector.getAccessor(); assertArrayEquals(STR1, accessor.get(0)); assertArrayEquals(STR2, accessor.get(initialCapacity - 1)); assertArrayEquals(STR3, accessor.get(initialCapacity + 200)); // Set the valueCount to be more than valueCapacity of current allocation. This is possible // for NullableValueVectors // as we don't call setSafe for null values, but we do call setValueCount when the current // batch is processed. m.setValueCount(vector.getValueCapacity() + 200); } }
public OrderedPartitionRecordBatch( OrderedPartitionSender pop, RecordBatch incoming, FragmentContext context) throws OutOfMemoryException { super(pop, context); this.incoming = incoming; this.partitions = pop.getDestinations().size(); this.sendingMajorFragmentWidth = pop.getSendingWidth(); this.recordsToSample = pop.getRecordsToSample(); this.samplingFactor = pop.getSamplingFactor(); this.completionFactor = pop.getCompletionFactor(); DistributedCache cache = context.getDrillbitContext().getCache(); this.mmap = cache.getMultiMap(MULTI_CACHE_CONFIG); this.tableMap = cache.getMap(SINGLE_CACHE_CONFIG); Preconditions.checkNotNull(tableMap); this.mapKey = String.format( "%s_%d", context.getHandle().getQueryId(), context.getHandle().getMajorFragmentId()); this.minorFragmentSampleCount = cache.getCounter(mapKey); SchemaPath outputPath = popConfig.getRef(); MaterializedField outputField = MaterializedField.create(outputPath, Types.required(TypeProtos.MinorType.INT)); this.partitionKeyVector = (IntVector) TypeHelper.getNewVector(outputField, oContext.getAllocator()); }
@JsonCreator public MockScanEntry( @JsonProperty("records") int records, @JsonProperty("types") MockColumn[] types) { this.records = records; this.types = types; int size = 0; for (MockColumn dt : types) { size += TypeHelper.getSize(dt.getMajorType()); } this.recordSize = size; }
@Test public void testNullableFloat() { final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, NullableFloat4Holder.TYPE); // Create a new value vector for 1024 integers try (final NullableFloat4Vector vector = (NullableFloat4Vector) TypeHelper.getNewVector(field, allocator)) { final NullableFloat4Vector.Mutator m = vector.getMutator(); vector.allocateNew(1024); // Put and set a few values. m.set(0, 100.1f); m.set(1, 101.2f); m.set(100, 102.3f); m.set(1022, 103.4f); m.set(1023, 104.5f); final NullableFloat4Vector.Accessor accessor = vector.getAccessor(); assertEquals(100.1f, accessor.get(0), 0); assertEquals(101.2f, accessor.get(1), 0); assertEquals(102.3f, accessor.get(100), 0); assertEquals(103.4f, accessor.get(1022), 0); assertEquals(104.5f, accessor.get(1023), 0); // Ensure null values throw. { boolean b = false; try { vector.getAccessor().get(3); } catch (IllegalStateException e) { b = true; } finally { assertTrue(b); } } vector.allocateNew(2048); { boolean b = false; try { accessor.get(0); } catch (IllegalStateException e) { b = true; } finally { assertTrue(b); } } } }
/** * Creates a copier that does a project for every Nth record from a VectorContainer incoming into * VectorContainer outgoing. Each Ordering in orderings generates a column, and evaluation of the * expression associated with each Ordering determines the value of each column. These records * will later be sorted based on the values in each column, in the same order as the orderings. * * @param sv4 * @param incoming * @param outgoing * @param orderings * @return * @throws SchemaChangeException */ private SampleCopier getCopier( SelectionVector4 sv4, VectorContainer incoming, VectorContainer outgoing, List<Ordering> orderings, List<ValueVector> localAllocationVectors) throws SchemaChangeException { final ErrorCollector collector = new ErrorCollectorImpl(); final ClassGenerator<SampleCopier> cg = CodeGenerator.getRoot(SampleCopier.TEMPLATE_DEFINITION, context.getFunctionRegistry()); int i = 0; for (Ordering od : orderings) { final LogicalExpression expr = ExpressionTreeMaterializer.materialize( od.getExpr(), incoming, collector, context.getFunctionRegistry()); SchemaPath schemaPath = SchemaPath.getSimplePath("f" + i++); TypeProtos.MajorType.Builder builder = TypeProtos.MajorType.newBuilder() .mergeFrom(expr.getMajorType()) .clearMode() .setMode(TypeProtos.DataMode.REQUIRED); TypeProtos.MajorType newType = builder.build(); MaterializedField outputField = MaterializedField.create(schemaPath, newType); if (collector.hasErrors()) { throw new SchemaChangeException( String.format( "Failure while trying to materialize incoming schema. Errors:\n %s.", collector.toErrorString())); } ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator()); localAllocationVectors.add(vector); TypedFieldId fid = outgoing.add(vector); ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true); HoldingContainer hc = cg.addExpr(write); cg.getEvalBlock()._if(hc.getValue().eq(JExpr.lit(0)))._then()._return(JExpr.FALSE); } cg.rotateBlock(); cg.getEvalBlock()._return(JExpr.TRUE); outgoing.buildSchema(BatchSchema.SelectionVectorMode.NONE); try { SampleCopier sampleCopier = context.getImplementationClass(cg); sampleCopier.setupCopier(context, sv4, incoming, outgoing); return sampleCopier; } catch (ClassTransformationException | IOException e) { throw new SchemaChangeException(e); } }
@Override public <T extends ValueVector> T addField(MaterializedField field, Class<T> clazz) throws SchemaChangeException { ValueVector v = fieldVectorMap.get(field.key()); if (v == null || v.getClass() != clazz) { // Field does not exist add it to the map v = TypeHelper.getNewVector(field, oContext.getAllocator()); if (!clazz.isAssignableFrom(v.getClass())) { throw new SchemaChangeException( String.format( "Class %s was provided, expected %s.", clazz.getSimpleName(), v.getClass().getSimpleName())); } fieldVectorMap.put(field.key(), v); } return clazz.cast(v); }
private void initCols(Schema schema) throws SchemaChangeException { ImmutableList.Builder<ProjectedColumnInfo> pciBuilder = ImmutableList.builder(); for (int i = 0; i < schema.getColumnCount(); i++) { ColumnSchema col = schema.getColumnByIndex(i); final String name = col.getName(); final Type kuduType = col.getType(); MinorType minorType = TYPES.get(kuduType); if (minorType == null) { logger.warn( "Ignoring column that is unsupported.", UserException.unsupportedError() .message( "A column you queried has a data type that is not currently supported by the Kudu storage plugin. " + "The column's name was %s and its Kudu data type was %s. ", name, kuduType.toString()) .addContext("column Name", name) .addContext("plugin", "kudu") .build(logger)); continue; } MajorType majorType; if (col.isNullable()) { majorType = Types.optional(minorType); } else { majorType = Types.required(minorType); } MaterializedField field = MaterializedField.create(name, majorType); final Class<? extends ValueVector> clazz = (Class<? extends ValueVector>) TypeHelper.getValueVectorClass(minorType, majorType.getMode()); ValueVector vector = output.addField(field, clazz); vector.allocateNew(); ProjectedColumnInfo pci = new ProjectedColumnInfo(); pci.vv = vector; pci.kuduColumn = col; pci.index = i; pciBuilder.add(pci); } projectedCols = pciBuilder.build(); }
@Test public void testVVInitialCapacity() throws Exception { final MaterializedField[] fields = new MaterializedField[9]; final ValueVector[] valueVectors = new ValueVector[9]; fields[0] = MaterializedField.create(EMPTY_SCHEMA_PATH, BitHolder.TYPE); fields[1] = MaterializedField.create(EMPTY_SCHEMA_PATH, IntHolder.TYPE); fields[2] = MaterializedField.create(EMPTY_SCHEMA_PATH, VarCharHolder.TYPE); fields[3] = MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVar16CharHolder.TYPE); fields[4] = MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedFloat4Holder.TYPE); fields[5] = MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedVarBinaryHolder.TYPE); fields[6] = MaterializedField.create(EMPTY_SCHEMA_PATH, MapVector.TYPE); fields[6].addChild(fields[0] /*bit*/); fields[6].addChild(fields[2] /*varchar*/); fields[7] = MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedMapVector.TYPE); fields[7].addChild(fields[1] /*int*/); fields[7].addChild(fields[3] /*optional var16char*/); fields[8] = MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedListVector.TYPE); fields[8].addChild(fields[1] /*int*/); final int initialCapacity = 1024; try { for (int i = 0; i < valueVectors.length; i++) { valueVectors[i] = TypeHelper.getNewVector(fields[i], allocator); valueVectors[i].setInitialCapacity(initialCapacity); valueVectors[i].allocateNew(); } for (int i = 0; i < valueVectors.length; i++) { final ValueVector vv = valueVectors[i]; final int vvCapacity = vv.getValueCapacity(); assertEquals( String.format("Incorrect value capacity for %s [%d]", vv.getField(), vvCapacity), initialCapacity, vvCapacity); } } finally { AutoCloseables.close(valueVectors); } }
@Test public void testReAllocNullableFixedWidthVector() { final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, NullableFloat4Holder.TYPE); // Create a new value vector for 1024 integers try (final NullableFloat4Vector vector = (NullableFloat4Vector) TypeHelper.getNewVector(field, allocator)) { final NullableFloat4Vector.Mutator m = vector.getMutator(); vector.allocateNew(1024); assertEquals(1024, vector.getValueCapacity()); // Put values in indexes that fall within the initial allocation m.setSafe(0, 100.1f); m.setSafe(100, 102.3f); m.setSafe(1023, 104.5f); // Now try to put values in space that falls beyond the initial allocation m.setSafe(2000, 105.5f); // Check valueCapacity is more than initial allocation assertEquals(1024 * 2, vector.getValueCapacity()); final NullableFloat4Vector.Accessor accessor = vector.getAccessor(); assertEquals(100.1f, accessor.get(0), 0); assertEquals(102.3f, accessor.get(100), 0); assertEquals(104.5f, accessor.get(1023), 0); assertEquals(105.5f, accessor.get(2000), 0); // Set the valueCount to be more than valueCapacity of current allocation. This is possible // for NullableValueVectors // as we don't call setSafe for null values, but we do call setValueCount when all values are // inserted into the // vector m.setValueCount(vector.getValueCapacity() + 200); } }
@Override public void setup(OperatorContext operatorContext, OutputMutator output) throws ExecutionSetupException { this.operatorContext = operatorContext; if (!isStarQuery()) { columnsFound = new boolean[getColumns().size()]; nullFilledVectors = new ArrayList<>(); } columnStatuses = new ArrayList<>(); // totalRecords = footer.getBlocks().get(rowGroupIndex).getRowCount(); List<ColumnDescriptor> columns = footer.getFileMetaData().getSchema().getColumns(); allFieldsFixedLength = true; ColumnDescriptor column; ColumnChunkMetaData columnChunkMetaData; int columnsToScan = 0; mockRecordsRead = 0; MaterializedField field; // ParquetMetadataConverter metaConverter = new ParquetMetadataConverter(); FileMetaData fileMetaData; logger.debug( "Reading row group({}) with {} records in file {}.", rowGroupIndex, footer.getBlocks().get(rowGroupIndex).getRowCount(), hadoopPath.toUri().getPath()); totalRecordsRead = 0; // TODO - figure out how to deal with this better once we add nested reading, note also look // where this map is used below // store a map from column name to converted types if they are non-null Map<String, SchemaElement> schemaElements = ParquetReaderUtility.getColNameToSchemaElementMapping(footer); // loop to add up the length of the fixed width columns and build the schema for (int i = 0; i < columns.size(); ++i) { column = columns.get(i); SchemaElement se = schemaElements.get(column.getPath()[0]); MajorType mt = ParquetToDrillTypeConverter.toMajorType( column.getType(), se.getType_length(), getDataMode(column), se, fragmentContext.getOptions()); field = MaterializedField.create(toFieldName(column.getPath()), mt); if (!fieldSelected(field)) { continue; } columnsToScan++; int dataTypeLength = getDataTypeLength(column, se); if (dataTypeLength == -1) { allFieldsFixedLength = false; } else { bitWidthAllFixedFields += dataTypeLength; } } // rowGroupOffset = // footer.getBlocks().get(rowGroupIndex).getColumns().get(0).getFirstDataPageOffset(); if (columnsToScan != 0 && allFieldsFixedLength) { recordsPerBatch = (int) Math.min( Math.min( batchSize / bitWidthAllFixedFields, footer.getBlocks().get(0).getColumns().get(0).getValueCount()), 65535); } else { recordsPerBatch = DEFAULT_RECORDS_TO_READ_IF_NOT_FIXED_WIDTH; } try { ValueVector vector; SchemaElement schemaElement; final ArrayList<VarLengthColumn<? extends ValueVector>> varLengthColumns = new ArrayList<>(); // initialize all of the column read status objects boolean fieldFixedLength; // the column chunk meta-data is not guaranteed to be in the same order as the columns in the // schema // a map is constructed for fast access to the correct columnChunkMetadata to correspond // to an element in the schema Map<String, Integer> columnChunkMetadataPositionsInList = new HashMap<>(); BlockMetaData rowGroupMetadata = footer.getBlocks().get(rowGroupIndex); int colChunkIndex = 0; for (ColumnChunkMetaData colChunk : rowGroupMetadata.getColumns()) { columnChunkMetadataPositionsInList.put( Arrays.toString(colChunk.getPath().toArray()), colChunkIndex); colChunkIndex++; } for (int i = 0; i < columns.size(); ++i) { column = columns.get(i); columnChunkMetaData = rowGroupMetadata .getColumns() .get(columnChunkMetadataPositionsInList.get(Arrays.toString(column.getPath()))); schemaElement = schemaElements.get(column.getPath()[0]); MajorType type = ParquetToDrillTypeConverter.toMajorType( column.getType(), schemaElement.getType_length(), getDataMode(column), schemaElement, fragmentContext.getOptions()); field = MaterializedField.create(toFieldName(column.getPath()), type); // the field was not requested to be read if (!fieldSelected(field)) { continue; } fieldFixedLength = column.getType() != PrimitiveType.PrimitiveTypeName.BINARY; vector = output.addField( field, (Class<? extends ValueVector>) TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode())); if (column.getType() != PrimitiveType.PrimitiveTypeName.BINARY) { if (column.getMaxRepetitionLevel() > 0) { final RepeatedValueVector repeatedVector = RepeatedValueVector.class.cast(vector); ColumnReader<?> dataReader = ColumnReaderFactory.createFixedColumnReader( this, fieldFixedLength, column, columnChunkMetaData, recordsPerBatch, repeatedVector.getDataVector(), schemaElement); varLengthColumns.add( new FixedWidthRepeatedReader( this, dataReader, getTypeLengthInBits(column.getType()), -1, column, columnChunkMetaData, false, repeatedVector, schemaElement)); } else { columnStatuses.add( ColumnReaderFactory.createFixedColumnReader( this, fieldFixedLength, column, columnChunkMetaData, recordsPerBatch, vector, schemaElement)); } } else { // create a reader and add it to the appropriate list varLengthColumns.add( ColumnReaderFactory.getReader( this, -1, column, columnChunkMetaData, false, vector, schemaElement)); } } varLengthReader = new VarLenBinaryReader(this, varLengthColumns); if (!isStarQuery()) { List<SchemaPath> projectedColumns = Lists.newArrayList(getColumns()); SchemaPath col; for (int i = 0; i < columnsFound.length; i++) { col = projectedColumns.get(i); assert col != null; if (!columnsFound[i] && !col.equals(STAR_COLUMN)) { nullFilledVectors.add( (NullableIntVector) output.addField( MaterializedField.create( col.getAsUnescapedPath(), Types.optional(TypeProtos.MinorType.INT)), (Class<? extends ValueVector>) TypeHelper.getValueVectorClass( TypeProtos.MinorType.INT, DataMode.OPTIONAL))); } } } } catch (Exception e) { handleAndRaise("Failure in setting up reader", e); } }
private StreamingAggregator createAggregatorInternal() throws SchemaChangeException, ClassTransformationException, IOException { ClassGenerator<StreamingAggregator> cg = CodeGenerator.getRoot( StreamingAggTemplate.TEMPLATE_DEFINITION, context.getFunctionRegistry()); container.clear(); LogicalExpression[] keyExprs = new LogicalExpression[popConfig.getKeys().length]; LogicalExpression[] valueExprs = new LogicalExpression[popConfig.getExprs().length]; TypedFieldId[] keyOutputIds = new TypedFieldId[popConfig.getKeys().length]; ErrorCollector collector = new ErrorCollectorImpl(); for (int i = 0; i < keyExprs.length; i++) { final NamedExpression ne = popConfig.getKeys()[i]; final LogicalExpression expr = ExpressionTreeMaterializer.materialize( ne.getExpr(), incoming, collector, context.getFunctionRegistry()); if (expr == null) { continue; } keyExprs[i] = expr; final MaterializedField outputField = MaterializedField.create(ne.getRef(), expr.getMajorType()); final ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator()); keyOutputIds[i] = container.add(vector); } for (int i = 0; i < valueExprs.length; i++) { final NamedExpression ne = popConfig.getExprs()[i]; final LogicalExpression expr = ExpressionTreeMaterializer.materialize( ne.getExpr(), incoming, collector, context.getFunctionRegistry()); if (expr instanceof IfExpression) { throw UserException.unsupportedError( new UnsupportedOperationException( "Union type not supported in aggregate functions")) .build(logger); } if (expr == null) { continue; } final MaterializedField outputField = MaterializedField.create(ne.getRef(), expr.getMajorType()); ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator()); TypedFieldId id = container.add(vector); valueExprs[i] = new ValueVectorWriteExpression(id, expr, true); } if (collector.hasErrors()) { throw new SchemaChangeException( "Failure while materializing expression. " + collector.toErrorString()); } setupIsSame(cg, keyExprs); setupIsSameApart(cg, keyExprs); addRecordValues(cg, valueExprs); outputRecordKeys(cg, keyOutputIds, keyExprs); outputRecordKeysPrev(cg, keyOutputIds, keyExprs); cg.getBlock("resetValues")._return(JExpr.TRUE); getIndex(cg); container.buildSchema(SelectionVectorMode.NONE); StreamingAggregator agg = context.getImplementationClass(cg); agg.setup(oContext, incoming, this); return agg; }
public Class<?> getValueClass() { return TypeHelper.getValueVectorClass(getType().getMinorType(), getDataMode()); }
public BatchGroup mergeAndSpill(LinkedList<BatchGroup> batchGroups) throws SchemaChangeException { logger.debug("Copier allocator current allocation {}", copierAllocator.getAllocatedMemory()); logger.debug( "mergeAndSpill: starting total size in memory = {}", oAllocator.getAllocatedMemory()); VectorContainer outputContainer = new VectorContainer(); List<BatchGroup> batchGroupList = Lists.newArrayList(); int batchCount = batchGroups.size(); for (int i = 0; i < batchCount / 2; i++) { if (batchGroups.size() == 0) { break; } BatchGroup batch = batchGroups.pollLast(); assert batch != null : "Encountered a null batch during merge and spill operation"; batchGroupList.add(batch); } if (batchGroupList.size() == 0) { return null; } int estimatedRecordSize = 0; for (VectorWrapper<?> w : batchGroupList.get(0)) { try { estimatedRecordSize += TypeHelper.getSize(w.getField().getType()); } catch (UnsupportedOperationException e) { estimatedRecordSize += 50; } } int targetRecordCount = Math.max(1, COPIER_BATCH_MEM_LIMIT / estimatedRecordSize); VectorContainer hyperBatch = constructHyperBatch(batchGroupList); createCopier(hyperBatch, batchGroupList, outputContainer, true); int count = copier.next(targetRecordCount); assert count > 0; logger.debug( "mergeAndSpill: estimated record size = {}, target record count = {}", estimatedRecordSize, targetRecordCount); // 1 output container is kept in memory, so we want to hold on to it and transferClone // allows keeping ownership VectorContainer c1 = VectorContainer.getTransferClone(outputContainer, oContext); c1.buildSchema(BatchSchema.SelectionVectorMode.NONE); c1.setRecordCount(count); String spillDir = dirs.next(); Path currSpillPath = new Path(Joiner.on("/").join(spillDir, fileName)); currSpillDirs.add(currSpillPath); String outputFile = Joiner.on("/").join(currSpillPath, spillCount++); try { fs.deleteOnExit(currSpillPath); } catch (IOException e) { // since this is meant to be used in a batches's spilling, we don't propagate the exception logger.warn("Unable to mark spill directory " + currSpillPath + " for deleting on exit", e); } stats.setLongStat(Metric.SPILL_COUNT, spillCount); BatchGroup newGroup = new BatchGroup(c1, fs, outputFile, oContext); try (AutoCloseable a = AutoCloseables.all(batchGroupList)) { logger.info("Merging and spilling to {}", outputFile); while ((count = copier.next(targetRecordCount)) > 0) { outputContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE); outputContainer.setRecordCount(count); // note that addBatch also clears the outputContainer newGroup.addBatch(outputContainer); } injector.injectChecked( context.getExecutionControls(), INTERRUPTION_WHILE_SPILLING, IOException.class); newGroup.closeOutputStream(); } catch (Throwable e) { // we only need to cleanup newGroup if spill failed try { AutoCloseables.close(e, newGroup); } catch (Throwable t) { /* close() may hit the same IO issue; just ignore */ } throw UserException.resourceError(e) .message("External Sort encountered an error while spilling to disk") .addContext(e.getMessage() /* more detail */) .build(logger); } finally { hyperBatch.clear(); } logger.debug("mergeAndSpill: final total size in memory = {}", oAllocator.getAllocatedMemory()); logger.info("Completed spilling to {}", outputFile); return newGroup; }
@Override public IterOutcome innerNext() { if (schema != null) { if (spillCount == 0) { return (getSelectionVector4().next()) ? IterOutcome.OK : IterOutcome.NONE; } else { Stopwatch w = Stopwatch.createStarted(); int count = copier.next(targetRecordCount); if (count > 0) { long t = w.elapsed(TimeUnit.MICROSECONDS); logger.debug("Took {} us to merge {} records", t, count); container.setRecordCount(count); return IterOutcome.OK; } else { logger.debug("copier returned 0 records"); return IterOutcome.NONE; } } } int totalCount = 0; int totalBatches = 0; // total number of batches received so far try { container.clear(); outer: while (true) { IterOutcome upstream; if (first) { upstream = IterOutcome.OK_NEW_SCHEMA; } else { upstream = next(incoming); } if (upstream == IterOutcome.OK && sorter == null) { upstream = IterOutcome.OK_NEW_SCHEMA; } switch (upstream) { case NONE: if (first) { return upstream; } break outer; case NOT_YET: throw new UnsupportedOperationException(); case STOP: return upstream; case OK_NEW_SCHEMA: case OK: VectorContainer convertedBatch; // only change in the case that the schema truly changes. Artificial schema changes are // ignored. if (upstream == IterOutcome.OK_NEW_SCHEMA && !incoming.getSchema().equals(schema)) { if (schema != null) { if (unionTypeEnabled) { this.schema = SchemaUtil.mergeSchemas(schema, incoming.getSchema()); } else { throw new SchemaChangeException( "Schema changes not supported in External Sort. Please enable Union type"); } } else { schema = incoming.getSchema(); } convertedBatch = SchemaUtil.coerceContainer(incoming, schema, oContext); for (BatchGroup b : batchGroups) { b.setSchema(schema); } for (BatchGroup b : spilledBatchGroups) { b.setSchema(schema); } this.sorter = createNewSorter(context, convertedBatch); } else { convertedBatch = SchemaUtil.coerceContainer(incoming, schema, oContext); } if (first) { first = false; } if (convertedBatch.getRecordCount() == 0) { for (VectorWrapper<?> w : convertedBatch) { w.clear(); } break; } SelectionVector2 sv2; if (incoming.getSchema().getSelectionVectorMode() == BatchSchema.SelectionVectorMode.TWO_BYTE) { sv2 = incoming.getSelectionVector2().clone(); } else { try { sv2 = newSV2(); } catch (InterruptedException e) { return IterOutcome.STOP; } catch (OutOfMemoryException e) { throw new OutOfMemoryException(e); } } int count = sv2.getCount(); totalCount += count; totalBatches++; sorter.setup(context, sv2, convertedBatch); sorter.sort(sv2); RecordBatchData rbd = new RecordBatchData(convertedBatch, oAllocator); boolean success = false; try { rbd.setSv2(sv2); batchGroups.add(new BatchGroup(rbd.getContainer(), rbd.getSv2(), oContext)); if (peakNumBatches < batchGroups.size()) { peakNumBatches = batchGroups.size(); stats.setLongStat(Metric.PEAK_BATCHES_IN_MEMORY, peakNumBatches); } batchesSinceLastSpill++; if ( // If we haven't spilled so far, do we have enough memory for MSorter if this // turns out to be the last incoming batch? (spillCount == 0 && !hasMemoryForInMemorySort(totalCount)) || // If we haven't spilled so far, make sure we don't exceed the maximum number of // batches SV4 can address (spillCount == 0 && totalBatches > Character.MAX_VALUE) || // TODO(DRILL-4438) - consider setting this threshold more intelligently, // lowering caused a failing low memory condition (test in // BasicPhysicalOpUnitTest) // to complete successfully (although it caused perf decrease as there was more // spilling) // current memory used is more than 95% of memory usage limit of this operator (oAllocator.getAllocatedMemory() > .95 * oAllocator.getLimit()) || // Number of incoming batches (BatchGroups) exceed the limit and number of // incoming batches accumulated // since the last spill exceed the defined limit (batchGroups.size() > SPILL_THRESHOLD && batchesSinceLastSpill >= SPILL_BATCH_GROUP_SIZE)) { if (firstSpillBatchCount == 0) { firstSpillBatchCount = batchGroups.size(); } if (spilledBatchGroups.size() > firstSpillBatchCount / 2) { logger.info("Merging spills"); final BatchGroup merged = mergeAndSpill(spilledBatchGroups); if (merged != null) { spilledBatchGroups.addFirst(merged); } } final BatchGroup merged = mergeAndSpill(batchGroups); if (merged != null) { // make sure we don't add null to spilledBatchGroups spilledBatchGroups.add(merged); batchesSinceLastSpill = 0; } } success = true; } finally { if (!success) { rbd.clear(); } } break; case OUT_OF_MEMORY: logger.debug("received OUT_OF_MEMORY, trying to spill"); if (batchesSinceLastSpill > 2) { final BatchGroup merged = mergeAndSpill(batchGroups); if (merged != null) { spilledBatchGroups.add(merged); batchesSinceLastSpill = 0; } } else { logger.debug("not enough batches to spill, sending OUT_OF_MEMORY downstream"); return IterOutcome.OUT_OF_MEMORY; } break; default: throw new UnsupportedOperationException(); } } if (totalCount == 0) { return IterOutcome.NONE; } if (spillCount == 0) { if (builder != null) { builder.clear(); builder.close(); } builder = new SortRecordBatchBuilder(oAllocator); for (BatchGroup group : batchGroups) { RecordBatchData rbd = new RecordBatchData(group.getContainer(), oAllocator); rbd.setSv2(group.getSv2()); builder.add(rbd); } builder.build(context, container); sv4 = builder.getSv4(); mSorter = createNewMSorter(); mSorter.setup(context, oAllocator, getSelectionVector4(), this.container); // For testing memory-leak purpose, inject exception after mSorter finishes setup injector.injectUnchecked(context.getExecutionControls(), INTERRUPTION_AFTER_SETUP); mSorter.sort(this.container); // sort may have prematurely exited due to should continue returning false. if (!context.shouldContinue()) { return IterOutcome.STOP; } // For testing memory-leak purpose, inject exception after mSorter finishes sorting injector.injectUnchecked(context.getExecutionControls(), INTERRUPTION_AFTER_SORT); sv4 = mSorter.getSV4(); container.buildSchema(SelectionVectorMode.FOUR_BYTE); } else { // some batches were spilled final BatchGroup merged = mergeAndSpill(batchGroups); if (merged != null) { spilledBatchGroups.add(merged); } batchGroups.addAll(spilledBatchGroups); spilledBatchGroups = null; // no need to cleanup spilledBatchGroups, all it's batches are in batchGroups now logger.warn( "Starting to merge. {} batch groups. Current allocated memory: {}", batchGroups.size(), oAllocator.getAllocatedMemory()); VectorContainer hyperBatch = constructHyperBatch(batchGroups); createCopier(hyperBatch, batchGroups, container, false); int estimatedRecordSize = 0; for (VectorWrapper<?> w : batchGroups.get(0)) { try { estimatedRecordSize += TypeHelper.getSize(w.getField().getType()); } catch (UnsupportedOperationException e) { estimatedRecordSize += 50; } } targetRecordCount = Math.min(MAX_BATCH_SIZE, Math.max(1, COPIER_BATCH_MEM_LIMIT / estimatedRecordSize)); int count = copier.next(targetRecordCount); container.buildSchema(SelectionVectorMode.NONE); container.setRecordCount(count); } return IterOutcome.OK_NEW_SCHEMA; } catch (SchemaChangeException ex) { kill(false); context.fail( UserException.unsupportedError(ex) .message("Sort doesn't currently support sorts with changing schemas") .build(logger)); return IterOutcome.STOP; } catch (ClassTransformationException | IOException ex) { kill(false); context.fail(ex); return IterOutcome.STOP; } catch (UnsupportedOperationException e) { throw new RuntimeException(e); } }