/** * Creates a copier that does a project for every Nth record from a VectorContainer incoming into * VectorContainer outgoing. Each Ordering in orderings generates a column, and evaluation of the * expression associated with each Ordering determines the value of each column. These records * will later be sorted based on the values in each column, in the same order as the orderings. * * @param sv4 * @param incoming * @param outgoing * @param orderings * @return * @throws SchemaChangeException */ private SampleCopier getCopier( SelectionVector4 sv4, VectorContainer incoming, VectorContainer outgoing, List<Ordering> orderings, List<ValueVector> localAllocationVectors) throws SchemaChangeException { final ErrorCollector collector = new ErrorCollectorImpl(); final ClassGenerator<SampleCopier> cg = CodeGenerator.getRoot(SampleCopier.TEMPLATE_DEFINITION, context.getFunctionRegistry()); int i = 0; for (Ordering od : orderings) { final LogicalExpression expr = ExpressionTreeMaterializer.materialize( od.getExpr(), incoming, collector, context.getFunctionRegistry()); SchemaPath schemaPath = SchemaPath.getSimplePath("f" + i++); TypeProtos.MajorType.Builder builder = TypeProtos.MajorType.newBuilder() .mergeFrom(expr.getMajorType()) .clearMode() .setMode(TypeProtos.DataMode.REQUIRED); TypeProtos.MajorType newType = builder.build(); MaterializedField outputField = MaterializedField.create(schemaPath, newType); if (collector.hasErrors()) { throw new SchemaChangeException( String.format( "Failure while trying to materialize incoming schema. Errors:\n %s.", collector.toErrorString())); } ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator()); localAllocationVectors.add(vector); TypedFieldId fid = outgoing.add(vector); ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true); HoldingContainer hc = cg.addExpr(write); cg.getEvalBlock()._if(hc.getValue().eq(JExpr.lit(0)))._then()._return(JExpr.FALSE); } cg.rotateBlock(); cg.getEvalBlock()._return(JExpr.TRUE); outgoing.buildSchema(BatchSchema.SelectionVectorMode.NONE); try { SampleCopier sampleCopier = context.getImplementationClass(cg); sampleCopier.setupCopier(context, sv4, incoming, outgoing); return sampleCopier; } catch (ClassTransformationException | IOException e) { throw new SchemaChangeException(e); } }
private VectorContainer constructHyperBatch(List<BatchGroup> batchGroupList) { VectorContainer cont = new VectorContainer(); for (MaterializedField field : schema) { ValueVector[] vectors = new ValueVector[batchGroupList.size()]; int i = 0; for (BatchGroup group : batchGroupList) { vectors[i++] = group .getValueAccessorById( field.getValueClass(), group.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds()) .getValueVector(); } cont.add(vectors); } cont.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE); return cont; }
@Override public void setNewSchema() throws SchemaChangeException { container.buildSchema(SelectionVectorMode.NONE); }
public BatchGroup mergeAndSpill(LinkedList<BatchGroup> batchGroups) throws SchemaChangeException { logger.debug("Copier allocator current allocation {}", copierAllocator.getAllocatedMemory()); logger.debug( "mergeAndSpill: starting total size in memory = {}", oAllocator.getAllocatedMemory()); VectorContainer outputContainer = new VectorContainer(); List<BatchGroup> batchGroupList = Lists.newArrayList(); int batchCount = batchGroups.size(); for (int i = 0; i < batchCount / 2; i++) { if (batchGroups.size() == 0) { break; } BatchGroup batch = batchGroups.pollLast(); assert batch != null : "Encountered a null batch during merge and spill operation"; batchGroupList.add(batch); } if (batchGroupList.size() == 0) { return null; } int estimatedRecordSize = 0; for (VectorWrapper<?> w : batchGroupList.get(0)) { try { estimatedRecordSize += TypeHelper.getSize(w.getField().getType()); } catch (UnsupportedOperationException e) { estimatedRecordSize += 50; } } int targetRecordCount = Math.max(1, COPIER_BATCH_MEM_LIMIT / estimatedRecordSize); VectorContainer hyperBatch = constructHyperBatch(batchGroupList); createCopier(hyperBatch, batchGroupList, outputContainer, true); int count = copier.next(targetRecordCount); assert count > 0; logger.debug( "mergeAndSpill: estimated record size = {}, target record count = {}", estimatedRecordSize, targetRecordCount); // 1 output container is kept in memory, so we want to hold on to it and transferClone // allows keeping ownership VectorContainer c1 = VectorContainer.getTransferClone(outputContainer, oContext); c1.buildSchema(BatchSchema.SelectionVectorMode.NONE); c1.setRecordCount(count); String spillDir = dirs.next(); Path currSpillPath = new Path(Joiner.on("/").join(spillDir, fileName)); currSpillDirs.add(currSpillPath); String outputFile = Joiner.on("/").join(currSpillPath, spillCount++); try { fs.deleteOnExit(currSpillPath); } catch (IOException e) { // since this is meant to be used in a batches's spilling, we don't propagate the exception logger.warn("Unable to mark spill directory " + currSpillPath + " for deleting on exit", e); } stats.setLongStat(Metric.SPILL_COUNT, spillCount); BatchGroup newGroup = new BatchGroup(c1, fs, outputFile, oContext); try (AutoCloseable a = AutoCloseables.all(batchGroupList)) { logger.info("Merging and spilling to {}", outputFile); while ((count = copier.next(targetRecordCount)) > 0) { outputContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE); outputContainer.setRecordCount(count); // note that addBatch also clears the outputContainer newGroup.addBatch(outputContainer); } injector.injectChecked( context.getExecutionControls(), INTERRUPTION_WHILE_SPILLING, IOException.class); newGroup.closeOutputStream(); } catch (Throwable e) { // we only need to cleanup newGroup if spill failed try { AutoCloseables.close(e, newGroup); } catch (Throwable t) { /* close() may hit the same IO issue; just ignore */ } throw UserException.resourceError(e) .message("External Sort encountered an error while spilling to disk") .addContext(e.getMessage() /* more detail */) .build(logger); } finally { hyperBatch.clear(); } logger.debug("mergeAndSpill: final total size in memory = {}", oAllocator.getAllocatedMemory()); logger.info("Completed spilling to {}", outputFile); return newGroup; }