/** * Sets up projection that will transfer all of the columns in batch, and also populate the * partition column based on which partition a record falls into in the partition table * * @param batch * @throws SchemaChangeException */ protected void setupNewSchema(VectorAccessible batch) throws SchemaChangeException { container.clear(); final ErrorCollector collector = new ErrorCollectorImpl(); final List<TransferPair> transfers = Lists.newArrayList(); final ClassGenerator<OrderedPartitionProjector> cg = CodeGenerator.getRoot( OrderedPartitionProjector.TEMPLATE_DEFINITION, context.getFunctionRegistry()); for (VectorWrapper<?> vw : batch) { TransferPair tp = vw.getValueVector().getTransferPair(); transfers.add(tp); container.add(tp.getTo()); } cg.setMappingSet(mainMapping); int count = 0; for (Ordering od : popConfig.getOrderings()) { final LogicalExpression expr = ExpressionTreeMaterializer.materialize( od.getExpr(), batch, collector, context.getFunctionRegistry()); if (collector.hasErrors()) throw new SchemaChangeException( "Failure while materializing expression. " + collector.toErrorString()); cg.setMappingSet(incomingMapping); ClassGenerator.HoldingContainer left = cg.addExpr(expr, false); cg.setMappingSet(partitionMapping); ClassGenerator.HoldingContainer right = cg.addExpr( new ValueVectorReadExpression(new TypedFieldId(expr.getMajorType(), count++)), false); cg.setMappingSet(mainMapping); LogicalExpression fh = FunctionGenerationHelper.getComparator(left, right, context.getFunctionRegistry()); ClassGenerator.HoldingContainer out = cg.addExpr(fh, false); JConditional jc = cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0))); if (od.getDirection() == Direction.ASCENDING) { jc._then()._return(out.getValue()); } else { jc._then()._return(out.getValue().minus()); } } cg.getEvalBlock()._return(JExpr.lit(0)); container.add(this.partitionKeyVector); container.buildSchema(batch.getSchema().getSelectionVectorMode()); try { this.projector = context.getImplementationClass(cg); projector.setup( context, batch, this, transfers, partitionVectors, partitions, popConfig.getRef()); } catch (ClassTransformationException | IOException e) { throw new SchemaChangeException("Failure while attempting to load generated class", e); } }
private MSorter createNewMSorter( FragmentContext context, List<Ordering> orderings, VectorAccessible batch, MappingSet mainMapping, MappingSet leftMapping, MappingSet rightMapping) throws ClassTransformationException, IOException, SchemaChangeException { CodeGenerator<MSorter> cg = CodeGenerator.get( MSorter.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions()); ClassGenerator<MSorter> g = cg.getRoot(); g.setMappingSet(mainMapping); for (Ordering od : orderings) { // first, we rewrite the evaluation stack for each side of the comparison. ErrorCollector collector = new ErrorCollectorImpl(); final LogicalExpression expr = ExpressionTreeMaterializer.materialize( od.getExpr(), batch, collector, context.getFunctionRegistry()); if (collector.hasErrors()) { throw new SchemaChangeException( "Failure while materializing expression. " + collector.toErrorString()); } g.setMappingSet(leftMapping); HoldingContainer left = g.addExpr(expr, ClassGenerator.BlkCreateMode.FALSE); g.setMappingSet(rightMapping); HoldingContainer right = g.addExpr(expr, ClassGenerator.BlkCreateMode.FALSE); g.setMappingSet(mainMapping); // next we wrap the two comparison sides and add the expression block for the comparison. LogicalExpression fh = FunctionGenerationHelper.getOrderingComparator( od.nullsSortHigh(), left, right, context.getFunctionRegistry()); HoldingContainer out = g.addExpr(fh, ClassGenerator.BlkCreateMode.FALSE); JConditional jc = g.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0))); if (od.getDirection() == Direction.ASCENDING) { jc._then()._return(out.getValue()); } else { jc._then()._return(out.getValue().minus()); } g.rotateBlock(); } g.rotateBlock(); g.getEvalBlock()._return(JExpr.lit(0)); return context.getImplementationClass(cg); }
/** * Creates a copier that does a project for every Nth record from a VectorContainer incoming into * VectorContainer outgoing. Each Ordering in orderings generates a column, and evaluation of the * expression associated with each Ordering determines the value of each column. These records * will later be sorted based on the values in each column, in the same order as the orderings. * * @param sv4 * @param incoming * @param outgoing * @param orderings * @return * @throws SchemaChangeException */ private SampleCopier getCopier( SelectionVector4 sv4, VectorContainer incoming, VectorContainer outgoing, List<Ordering> orderings, List<ValueVector> localAllocationVectors) throws SchemaChangeException { final ErrorCollector collector = new ErrorCollectorImpl(); final ClassGenerator<SampleCopier> cg = CodeGenerator.getRoot(SampleCopier.TEMPLATE_DEFINITION, context.getFunctionRegistry()); int i = 0; for (Ordering od : orderings) { final LogicalExpression expr = ExpressionTreeMaterializer.materialize( od.getExpr(), incoming, collector, context.getFunctionRegistry()); SchemaPath schemaPath = SchemaPath.getSimplePath("f" + i++); TypeProtos.MajorType.Builder builder = TypeProtos.MajorType.newBuilder() .mergeFrom(expr.getMajorType()) .clearMode() .setMode(TypeProtos.DataMode.REQUIRED); TypeProtos.MajorType newType = builder.build(); MaterializedField outputField = MaterializedField.create(schemaPath, newType); if (collector.hasErrors()) { throw new SchemaChangeException( String.format( "Failure while trying to materialize incoming schema. Errors:\n %s.", collector.toErrorString())); } ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator()); localAllocationVectors.add(vector); TypedFieldId fid = outgoing.add(vector); ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true); HoldingContainer hc = cg.addExpr(write); cg.getEvalBlock()._if(hc.getValue().eq(JExpr.lit(0)))._then()._return(JExpr.FALSE); } cg.rotateBlock(); cg.getEvalBlock()._return(JExpr.TRUE); outgoing.buildSchema(BatchSchema.SelectionVectorMode.NONE); try { SampleCopier sampleCopier = context.getImplementationClass(cg); sampleCopier.setupCopier(context, sv4, incoming, outgoing); return sampleCopier; } catch (ClassTransformationException | IOException e) { throw new SchemaChangeException(e); } }
private void generateComparisons(ClassGenerator<?> g, VectorAccessible batch) throws SchemaChangeException { g.setMappingSet(MAIN_MAPPING); for (Ordering od : popConfig.getOrderings()) { // first, we rewrite the evaluation stack for each side of the comparison. ErrorCollector collector = new ErrorCollectorImpl(); final LogicalExpression expr = ExpressionTreeMaterializer.materialize( od.getExpr(), batch, collector, context.getFunctionRegistry()); if (collector.hasErrors()) { throw new SchemaChangeException( "Failure while materializing expression. " + collector.toErrorString()); } g.setMappingSet(LEFT_MAPPING); HoldingContainer left = g.addExpr(expr, ClassGenerator.BlkCreateMode.FALSE); g.setMappingSet(RIGHT_MAPPING); HoldingContainer right = g.addExpr(expr, ClassGenerator.BlkCreateMode.FALSE); g.setMappingSet(MAIN_MAPPING); // next we wrap the two comparison sides and add the expression block for the comparison. LogicalExpression fh = FunctionGenerationHelper.getOrderingComparator( od.nullsSortHigh(), left, right, context.getFunctionRegistry()); HoldingContainer out = g.addExpr(fh, ClassGenerator.BlkCreateMode.FALSE); JConditional jc = g.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0))); if (od.getDirection() == Direction.ASCENDING) { jc._then()._return(out.getValue()); } else { jc._then()._return(out.getValue().minus()); } g.rotateBlock(); } g.rotateBlock(); g.getEvalBlock()._return(JExpr.lit(0)); }
private void buildTable() throws SchemaChangeException, ClassTransformationException, IOException { // Get all samples from distributed map SortRecordBatchBuilder containerBuilder = new SortRecordBatchBuilder(context.getAllocator(), MAX_SORT_BYTES); for (CachedVectorContainer w : mmap.get(mapKey)) { containerBuilder.add(w.get()); } VectorContainer allSamplesContainer = new VectorContainer(); containerBuilder.build(context, allSamplesContainer); List<Ordering> orderDefs = Lists.newArrayList(); int i = 0; for (Ordering od : popConfig.getOrderings()) { SchemaPath sp = SchemaPath.getSimplePath("f" + i++); orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp))); } // sort the data incoming samples. SelectionVector4 newSv4 = containerBuilder.getSv4(); Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer); sorter.setup(context, newSv4, allSamplesContainer); sorter.sort(newSv4, allSamplesContainer); // Copy every Nth record from the samples into a candidate partition table, where N = // totalSampledRecords/partitions // Attempt to push this to the distributed map. Only the first candidate to get pushed will be // used. VectorContainer candidatePartitionTable = new VectorContainer(); SampleCopier copier = null; List<ValueVector> localAllocationVectors = Lists.newArrayList(); copier = getCopier( newSv4, allSamplesContainer, candidatePartitionTable, orderDefs, localAllocationVectors); int allocationSize = 50; while (true) { for (ValueVector vv : localAllocationVectors) { AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize); } int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions; if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) { assert copier.getOutputRecords() == partitions - 1 : String.format( "output records: %d partitions: %d", copier.getOutputRecords(), partitions); for (VectorWrapper<?> vw : candidatePartitionTable) { vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords()); } break; } else { candidatePartitionTable.zeroVectors(); allocationSize *= 2; } } candidatePartitionTable.setRecordCount(copier.getOutputRecords()); WritableBatch batch = WritableBatch.getBatchNoHVWrap( candidatePartitionTable.getRecordCount(), candidatePartitionTable, false); CachedVectorContainer wrap = new CachedVectorContainer(batch, context.getDrillbitContext().getAllocator()); tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES); candidatePartitionTable.clear(); allSamplesContainer.clear(); containerBuilder.clear(); wrap.clear(); }