@Override public void cleanup() { incoming.cleanup(); super.cleanup(); this.partitionVectors.clear(); this.partitionKeyVector.clear(); }
private void getIndex(ClassGenerator<StreamingAggregator> g) { switch (incoming.getSchema().getSelectionVectorMode()) { case FOUR_BYTE: { JVar var = g.declareClassField("sv4_", g.getModel()._ref(SelectionVector4.class)); g.getBlock("setupInterior") .assign(var, JExpr.direct("incoming").invoke("getSelectionVector4")); g.getBlock("getVectorIndex")._return(var.invoke("get").arg(JExpr.direct("recordIndex"))); ; return; } case NONE: { g.getBlock("getVectorIndex")._return(JExpr.direct("recordIndex")); ; return; } case TWO_BYTE: { JVar var = g.declareClassField("sv2_", g.getModel()._ref(SelectionVector2.class)); g.getBlock("setupInterior") .assign(var, JExpr.direct("incoming").invoke("getSelectionVector2")); g.getBlock("getVectorIndex") ._return(var.invoke("getIndex").arg(JExpr.direct("recordIndex"))); ; return; } default: throw new IllegalStateException(); } }
private SelectionVector2 newSV2() throws OutOfMemoryException, InterruptedException { SelectionVector2 sv2 = new SelectionVector2(oAllocator); if (!sv2.allocateNewSafe(incoming.getRecordCount())) { try { final BatchGroup merged = mergeAndSpill(batchGroups); if (merged != null) { spilledBatchGroups.add(merged); } else { throw UserException.memoryError( "Unable to allocate sv2 for %d records, and not enough batchGroups to spill.", incoming.getRecordCount()) .addContext("batchGroups.size", batchGroups.size()) .addContext("spilledBatchGroups.size", spilledBatchGroups.size()) .addContext("allocated memory", oAllocator.getAllocatedMemory()) .addContext("allocator limit", oAllocator.getLimit()) .build(logger); } } catch (SchemaChangeException e) { throw new RuntimeException(e); } int waitTime = 1; while (true) { try { Thread.sleep(waitTime * 1000); } catch (final InterruptedException e) { if (!context.shouldContinue()) { throw e; } } waitTime *= 2; if (sv2.allocateNewSafe(incoming.getRecordCount())) { break; } if (waitTime >= 32) { throw new OutOfMemoryException("Unable to allocate sv2 buffer after repeated attempts"); } } } for (int i = 0; i < incoming.getRecordCount(); i++) { sv2.setIndex(i, (char) i); } sv2.setRecordCount(incoming.getRecordCount()); return sv2; }
private boolean doAlloc() { for (ValueVector v : allocationVectors) { try { AllocationHelper.allocateNew(v, current.getRecordCount()); } catch (OutOfMemoryException ex) { return false; } } return true; }
@Override public void setup( FragmentContext context, RecordBatch incoming, RecordBatch outgoing, TransferPair[] transfers) throws SchemaChangeException { this.transfers = transfers; this.outgoingSelectionVector = outgoing.getSelectionVector2(); this.svMode = incoming.getSchema().getSelectionVectorMode(); switch (svMode) { case NONE: break; case TWO_BYTE: this.incomingSelectionVector = incoming.getSelectionVector2(); break; default: throw new UnsupportedOperationException(); } doSetup(context, incoming, outgoing); }
@Override public LogicalExpression visitSchemaPath(SchemaPath path) { // logger.debug("Visiting schema path {}", path); TypedFieldId tfId = batch.getValueVectorId(path); if (tfId == null) { logger.warn("Unable to find value vector of path {}, returning null instance.", path); return NullExpression.INSTANCE; } else { return new ValueVectorReadExpression(tfId); } }
@Override public void setupRemover( FragmentContext context, RecordBatch incoming, RecordBatch outgoing, VectorAllocator[] allocators) throws SchemaChangeException { this.allocators = allocators; this.incoming = incoming; this.sv4 = incoming.getSelectionVector4(); doSetup(context, incoming, outgoing); }
/** * Creates a new Aggregator based on the current schema. If setup fails, this method is * responsible for cleaning up and informing the context of the failure state, as well is * informing the upstream operators. * * @return true if the aggregator was setup successfully. false if there was a failure. */ private boolean createAggregator() { logger.debug("Creating new aggregator."); try { stats.startSetup(); this.aggregator = createAggregatorInternal(); return true; } catch (SchemaChangeException | ClassTransformationException | IOException ex) { context.fail(ex); container.clear(); incoming.kill(false); return false; } finally { stats.stopSetup(); } }
public IterOutcome nextBatch() { if (upstream == IterOutcome.NONE) { throw new IllegalStateException(String.format("Unknown state %s.", upstream)); } if (upstream == IterOutcome.NOT_YET) { upstream = unionAllRecordBatch.next(recordBatch); return upstream; } else { do { upstream = unionAllRecordBatch.next(recordBatch); } while (upstream == IterOutcome.OK && recordBatch.getRecordCount() == 0); return upstream; } }
@Override protected void killIncoming(boolean sendUpstream) { incoming.kill(sendUpstream); }
@Override public IterOutcome innerNext() { // if a special batch has been sent, we have no data in the incoming so exit early if (specialBatchSent) { return IterOutcome.NONE; } // this is only called on the first batch. Beyond this, the aggregator manages batches. if (aggregator == null || first) { IterOutcome outcome; if (first && incoming.getRecordCount() > 0) { first = false; outcome = IterOutcome.OK_NEW_SCHEMA; } else { outcome = next(incoming); } logger.debug("Next outcome of {}", outcome); switch (outcome) { case NONE: if (first && popConfig.getKeys().length == 0) { // if we have a straight aggregate and empty input batch, we need to handle it in a // different way constructSpecialBatch(); first = false; // set state to indicate the fact that we have sent a special batch and input is empty specialBatchSent = true; return IterOutcome.OK; } case OUT_OF_MEMORY: case NOT_YET: case STOP: return outcome; case OK_NEW_SCHEMA: if (!createAggregator()) { done = true; return IterOutcome.STOP; } break; case OK: break; default: throw new IllegalStateException(String.format("unknown outcome %s", outcome)); } } AggOutcome out = aggregator.doWork(); recordCount = aggregator.getOutputCount(); logger.debug("Aggregator response {}, records {}", out, aggregator.getOutputCount()); switch (out) { case CLEANUP_AND_RETURN: if (!first) { container.zeroVectors(); } done = true; // fall through case RETURN_OUTCOME: IterOutcome outcome = aggregator.getOutcome(); if (outcome == IterOutcome.NONE && first) { first = false; done = true; return IterOutcome.OK_NEW_SCHEMA; } else if (outcome == IterOutcome.OK && first) { outcome = IterOutcome.OK_NEW_SCHEMA; } else if (outcome != IterOutcome.OUT_OF_MEMORY) { first = false; } return outcome; case UPDATE_AGGREGATOR: context.fail( UserException.unsupportedError() .message("Streaming aggregate does not support schema changes") .build(logger)); close(); killIncoming(false); return IterOutcome.STOP; default: throw new IllegalStateException(String.format("Unknown state %s.", out)); } }
@Override public IterOutcome innerNext() { if (schema != null) { if (spillCount == 0) { return (getSelectionVector4().next()) ? IterOutcome.OK : IterOutcome.NONE; } else { Stopwatch w = Stopwatch.createStarted(); int count = copier.next(targetRecordCount); if (count > 0) { long t = w.elapsed(TimeUnit.MICROSECONDS); logger.debug("Took {} us to merge {} records", t, count); container.setRecordCount(count); return IterOutcome.OK; } else { logger.debug("copier returned 0 records"); return IterOutcome.NONE; } } } int totalCount = 0; int totalBatches = 0; // total number of batches received so far try { container.clear(); outer: while (true) { IterOutcome upstream; if (first) { upstream = IterOutcome.OK_NEW_SCHEMA; } else { upstream = next(incoming); } if (upstream == IterOutcome.OK && sorter == null) { upstream = IterOutcome.OK_NEW_SCHEMA; } switch (upstream) { case NONE: if (first) { return upstream; } break outer; case NOT_YET: throw new UnsupportedOperationException(); case STOP: return upstream; case OK_NEW_SCHEMA: case OK: VectorContainer convertedBatch; // only change in the case that the schema truly changes. Artificial schema changes are // ignored. if (upstream == IterOutcome.OK_NEW_SCHEMA && !incoming.getSchema().equals(schema)) { if (schema != null) { if (unionTypeEnabled) { this.schema = SchemaUtil.mergeSchemas(schema, incoming.getSchema()); } else { throw new SchemaChangeException( "Schema changes not supported in External Sort. Please enable Union type"); } } else { schema = incoming.getSchema(); } convertedBatch = SchemaUtil.coerceContainer(incoming, schema, oContext); for (BatchGroup b : batchGroups) { b.setSchema(schema); } for (BatchGroup b : spilledBatchGroups) { b.setSchema(schema); } this.sorter = createNewSorter(context, convertedBatch); } else { convertedBatch = SchemaUtil.coerceContainer(incoming, schema, oContext); } if (first) { first = false; } if (convertedBatch.getRecordCount() == 0) { for (VectorWrapper<?> w : convertedBatch) { w.clear(); } break; } SelectionVector2 sv2; if (incoming.getSchema().getSelectionVectorMode() == BatchSchema.SelectionVectorMode.TWO_BYTE) { sv2 = incoming.getSelectionVector2().clone(); } else { try { sv2 = newSV2(); } catch (InterruptedException e) { return IterOutcome.STOP; } catch (OutOfMemoryException e) { throw new OutOfMemoryException(e); } } int count = sv2.getCount(); totalCount += count; totalBatches++; sorter.setup(context, sv2, convertedBatch); sorter.sort(sv2); RecordBatchData rbd = new RecordBatchData(convertedBatch, oAllocator); boolean success = false; try { rbd.setSv2(sv2); batchGroups.add(new BatchGroup(rbd.getContainer(), rbd.getSv2(), oContext)); if (peakNumBatches < batchGroups.size()) { peakNumBatches = batchGroups.size(); stats.setLongStat(Metric.PEAK_BATCHES_IN_MEMORY, peakNumBatches); } batchesSinceLastSpill++; if ( // If we haven't spilled so far, do we have enough memory for MSorter if this // turns out to be the last incoming batch? (spillCount == 0 && !hasMemoryForInMemorySort(totalCount)) || // If we haven't spilled so far, make sure we don't exceed the maximum number of // batches SV4 can address (spillCount == 0 && totalBatches > Character.MAX_VALUE) || // TODO(DRILL-4438) - consider setting this threshold more intelligently, // lowering caused a failing low memory condition (test in // BasicPhysicalOpUnitTest) // to complete successfully (although it caused perf decrease as there was more // spilling) // current memory used is more than 95% of memory usage limit of this operator (oAllocator.getAllocatedMemory() > .95 * oAllocator.getLimit()) || // Number of incoming batches (BatchGroups) exceed the limit and number of // incoming batches accumulated // since the last spill exceed the defined limit (batchGroups.size() > SPILL_THRESHOLD && batchesSinceLastSpill >= SPILL_BATCH_GROUP_SIZE)) { if (firstSpillBatchCount == 0) { firstSpillBatchCount = batchGroups.size(); } if (spilledBatchGroups.size() > firstSpillBatchCount / 2) { logger.info("Merging spills"); final BatchGroup merged = mergeAndSpill(spilledBatchGroups); if (merged != null) { spilledBatchGroups.addFirst(merged); } } final BatchGroup merged = mergeAndSpill(batchGroups); if (merged != null) { // make sure we don't add null to spilledBatchGroups spilledBatchGroups.add(merged); batchesSinceLastSpill = 0; } } success = true; } finally { if (!success) { rbd.clear(); } } break; case OUT_OF_MEMORY: logger.debug("received OUT_OF_MEMORY, trying to spill"); if (batchesSinceLastSpill > 2) { final BatchGroup merged = mergeAndSpill(batchGroups); if (merged != null) { spilledBatchGroups.add(merged); batchesSinceLastSpill = 0; } } else { logger.debug("not enough batches to spill, sending OUT_OF_MEMORY downstream"); return IterOutcome.OUT_OF_MEMORY; } break; default: throw new UnsupportedOperationException(); } } if (totalCount == 0) { return IterOutcome.NONE; } if (spillCount == 0) { if (builder != null) { builder.clear(); builder.close(); } builder = new SortRecordBatchBuilder(oAllocator); for (BatchGroup group : batchGroups) { RecordBatchData rbd = new RecordBatchData(group.getContainer(), oAllocator); rbd.setSv2(group.getSv2()); builder.add(rbd); } builder.build(context, container); sv4 = builder.getSv4(); mSorter = createNewMSorter(); mSorter.setup(context, oAllocator, getSelectionVector4(), this.container); // For testing memory-leak purpose, inject exception after mSorter finishes setup injector.injectUnchecked(context.getExecutionControls(), INTERRUPTION_AFTER_SETUP); mSorter.sort(this.container); // sort may have prematurely exited due to should continue returning false. if (!context.shouldContinue()) { return IterOutcome.STOP; } // For testing memory-leak purpose, inject exception after mSorter finishes sorting injector.injectUnchecked(context.getExecutionControls(), INTERRUPTION_AFTER_SORT); sv4 = mSorter.getSV4(); container.buildSchema(SelectionVectorMode.FOUR_BYTE); } else { // some batches were spilled final BatchGroup merged = mergeAndSpill(batchGroups); if (merged != null) { spilledBatchGroups.add(merged); } batchGroups.addAll(spilledBatchGroups); spilledBatchGroups = null; // no need to cleanup spilledBatchGroups, all it's batches are in batchGroups now logger.warn( "Starting to merge. {} batch groups. Current allocated memory: {}", batchGroups.size(), oAllocator.getAllocatedMemory()); VectorContainer hyperBatch = constructHyperBatch(batchGroups); createCopier(hyperBatch, batchGroups, container, false); int estimatedRecordSize = 0; for (VectorWrapper<?> w : batchGroups.get(0)) { try { estimatedRecordSize += TypeHelper.getSize(w.getField().getType()); } catch (UnsupportedOperationException e) { estimatedRecordSize += 50; } } targetRecordCount = Math.min(MAX_BATCH_SIZE, Math.max(1, COPIER_BATCH_MEM_LIMIT / estimatedRecordSize)); int count = copier.next(targetRecordCount); container.buildSchema(SelectionVectorMode.NONE); container.setRecordCount(count); } return IterOutcome.OK_NEW_SCHEMA; } catch (SchemaChangeException ex) { kill(false); context.fail( UserException.unsupportedError(ex) .message("Sort doesn't currently support sorts with changing schemas") .build(logger)); return IterOutcome.STOP; } catch (ClassTransformationException | IOException ex) { kill(false); context.fail(ex); return IterOutcome.STOP; } catch (UnsupportedOperationException e) { throw new RuntimeException(e); } }
private IterOutcome doWork() throws ClassTransformationException, IOException, SchemaChangeException { if (allocationVectors != null) { for (ValueVector v : allocationVectors) { v.clear(); } } allocationVectors = Lists.newArrayList(); transfers.clear(); final ClassGenerator<UnionAller> cg = CodeGenerator.getRoot(UnionAller.TEMPLATE_DEFINITION, context.getFunctionRegistry()); int index = 0; for (VectorWrapper<?> vw : current) { ValueVector vvIn = vw.getValueVector(); // get the original input column names SchemaPath inputPath = vvIn.getField().getPath(); // get the renamed column names SchemaPath outputPath = outputFields.get(index).getPath(); final ErrorCollector collector = new ErrorCollectorImpl(); // According to input data names, Minortypes, Datamodes, choose to // transfer directly, // rename columns or // cast data types (Minortype or DataMode) if (hasSameTypeAndMode(outputFields.get(index), vw.getValueVector().getField())) { // Transfer column if (outputFields.get(index).getPath().equals(inputPath)) { final LogicalExpression expr = ExpressionTreeMaterializer.materialize( inputPath, current, collector, context.getFunctionRegistry()); if (collector.hasErrors()) { throw new SchemaChangeException( String.format( "Failure while trying to materialize incoming schema. Errors:\n %s.", collector.toErrorString())); } ValueVectorReadExpression vectorRead = (ValueVectorReadExpression) expr; ValueVector vvOut = container.addOrGet(MaterializedField.create(outputPath, vectorRead.getMajorType())); TransferPair tp = vvIn.makeTransferPair(vvOut); transfers.add(tp); // Copy data in order to rename the column } else { final LogicalExpression expr = ExpressionTreeMaterializer.materialize( inputPath, current, collector, context.getFunctionRegistry()); if (collector.hasErrors()) { throw new SchemaChangeException( String.format( "Failure while trying to materialize incoming schema. Errors:\n %s.", collector.toErrorString())); } MaterializedField outputField = MaterializedField.create(outputPath, expr.getMajorType()); ValueVector vv = container.addOrGet(outputField, callBack); allocationVectors.add(vv); TypedFieldId fid = container.getValueVectorId(outputField.getPath()); ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true); cg.addExpr(write); } // Cast is necessary } else { LogicalExpression expr = ExpressionTreeMaterializer.materialize( inputPath, current, collector, context.getFunctionRegistry()); if (collector.hasErrors()) { throw new SchemaChangeException( String.format( "Failure while trying to materialize incoming schema. Errors:\n %s.", collector.toErrorString())); } // If the inputs' DataMode is required and the outputs' DataMode is not required // cast to the one with the least restriction if (vvIn.getField().getType().getMode() == DataMode.REQUIRED && outputFields.get(index).getType().getMode() != DataMode.REQUIRED) { expr = ExpressionTreeMaterializer.convertToNullableType( expr, vvIn.getField().getType().getMinorType(), context.getFunctionRegistry(), collector); if (collector.hasErrors()) { throw new SchemaChangeException( String.format( "Failure while trying to materialize incoming schema. Errors:\n %s.", collector.toErrorString())); } } // If two inputs' MinorTypes are different, // Insert a cast before the Union operation if (vvIn.getField().getType().getMinorType() != outputFields.get(index).getType().getMinorType()) { expr = ExpressionTreeMaterializer.addCastExpression( expr, outputFields.get(index).getType(), context.getFunctionRegistry(), collector); if (collector.hasErrors()) { throw new SchemaChangeException( String.format( "Failure while trying to materialize incoming schema. Errors:\n %s.", collector.toErrorString())); } } final MaterializedField outputField = MaterializedField.create(outputPath, expr.getMajorType()); ValueVector vector = container.addOrGet(outputField, callBack); allocationVectors.add(vector); TypedFieldId fid = container.getValueVectorId(outputField.getPath()); boolean useSetSafe = !(vector instanceof FixedWidthVector); ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, useSetSafe); cg.addExpr(write); } ++index; } unionall = context.getImplementationClass(cg.getCodeGenerator()); unionall.setup(context, current, this, transfers); if (!schemaAvailable) { container.buildSchema(BatchSchema.SelectionVectorMode.NONE); schemaAvailable = true; } if (!doAlloc()) { return IterOutcome.OUT_OF_MEMORY; } recordCount = unionall.unionRecords(0, current.getRecordCount(), 0); setValueCount(recordCount); return IterOutcome.OK; }
@Override public IterOutcome innerNext() { container.zeroVectors(); // if we got IterOutcome.NONE while getting partition vectors, and there are no batches on the // queue, then we are // done if (upstreamNone && (batchQueue == null || batchQueue.size() == 0)) return IterOutcome.NONE; // if there are batches on the queue, process them first, rather than calling incoming.next() if (batchQueue != null && batchQueue.size() > 0) { VectorContainer vc = batchQueue.poll(); recordCount = vc.getRecordCount(); try { // Must set up a new schema each time, because ValueVectors are not reused between // containers in queue setupNewSchema(vc); } catch (SchemaChangeException ex) { kill(false); logger.error("Failure during query", ex); context.fail(ex); return IterOutcome.STOP; } doWork(vc); vc.zeroVectors(); return IterOutcome.OK_NEW_SCHEMA; } // Reaching this point, either this is the first iteration, or there are no batches left on the // queue and there are // more incoming IterOutcome upstream = next(incoming); if (this.first && upstream == IterOutcome.OK) { throw new RuntimeException("Invalid state: First batch should have OK_NEW_SCHEMA"); } // If this is the first iteration, we need to generate the partition vectors before we can // proceed if (this.first && upstream == IterOutcome.OK_NEW_SCHEMA) { if (!getPartitionVectors()) { cleanup(); return IterOutcome.STOP; } batchQueue = new LinkedBlockingQueue<>(this.sampledIncomingBatches); first = false; // Now that we have the partition vectors, we immediately process the first batch on the queue VectorContainer vc = batchQueue.poll(); try { setupNewSchema(vc); } catch (SchemaChangeException ex) { kill(false); logger.error("Failure during query", ex); context.fail(ex); return IterOutcome.STOP; } doWork(vc); vc.zeroVectors(); recordCount = vc.getRecordCount(); return IterOutcome.OK_NEW_SCHEMA; } // if this now that all the batches on the queue are processed, we begin processing the incoming // batches. For the // first one // we need to generate a new schema, even if the outcome is IterOutcome.OK After that we can // reuse the schema. if (this.startedUnsampledBatches == false) { this.startedUnsampledBatches = true; if (upstream == IterOutcome.OK) upstream = IterOutcome.OK_NEW_SCHEMA; } switch (upstream) { case NONE: case NOT_YET: case STOP: cleanup(); recordCount = 0; return upstream; case OK_NEW_SCHEMA: try { setupNewSchema(incoming); } catch (SchemaChangeException ex) { kill(false); logger.error("Failure during query", ex); context.fail(ex); return IterOutcome.STOP; } // fall through. case OK: doWork(incoming); recordCount = incoming.getRecordCount(); return upstream; // change if upstream changed, otherwise normal. default: throw new UnsupportedOperationException(); } }
private boolean saveSamples() throws SchemaChangeException, ClassTransformationException, IOException { recordsSampled = 0; IterOutcome upstream; // Start collecting batches until recordsToSample records have been collected SortRecordBatchBuilder builder = new SortRecordBatchBuilder(oContext.getAllocator(), MAX_SORT_BYTES); builder.add(incoming); recordsSampled += incoming.getRecordCount(); outer: while (recordsSampled < recordsToSample) { upstream = next(incoming); switch (upstream) { case NONE: case NOT_YET: case STOP: upstreamNone = true; break outer; default: // fall through } builder.add(incoming); recordsSampled += incoming.getRecordCount(); if (upstream == IterOutcome.NONE) break; } VectorContainer sortedSamples = new VectorContainer(); builder.build(context, sortedSamples); // Sort the records according the orderings given in the configuration Sorter sorter = SortBatch.createNewSorter(context, popConfig.getOrderings(), sortedSamples); SelectionVector4 sv4 = builder.getSv4(); sorter.setup(context, sv4, sortedSamples); sorter.sort(sv4, sortedSamples); // Project every Nth record to a new vector container, where N = recordsSampled/(samplingFactor // * partitions). // Uses the // the expressions from the Orderings to populate each column. There is one column for each // Ordering in // popConfig.orderings. VectorContainer containerToCache = new VectorContainer(); List<ValueVector> localAllocationVectors = Lists.newArrayList(); SampleCopier copier = getCopier( sv4, sortedSamples, containerToCache, popConfig.getOrderings(), localAllocationVectors); int allocationSize = 50; while (true) { for (ValueVector vv : localAllocationVectors) { AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize); } if (copier.copyRecords( recordsSampled / (samplingFactor * partitions), 0, samplingFactor * partitions)) { break; } else { containerToCache.zeroVectors(); allocationSize *= 2; } } for (VectorWrapper<?> vw : containerToCache) { vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords()); } containerToCache.setRecordCount(copier.getOutputRecords()); // Get a distributed multimap handle from the distributed cache, and put the vectors from the // new vector container // into a serializable wrapper object, and then add to distributed map WritableBatch batch = WritableBatch.getBatchNoHVWrap(containerToCache.getRecordCount(), containerToCache, false); CachedVectorContainer sampleToSave = new CachedVectorContainer(batch, context.getAllocator()); mmap.put(mapKey, sampleToSave); this.sampledIncomingBatches = builder.getHeldRecordBatches(); builder.clear(); batch.clear(); containerToCache.clear(); sampleToSave.clear(); return true; }