private void createCopier( VectorAccessible batch, List<BatchGroup> batchGroupList, VectorContainer outputContainer, boolean spilling) throws SchemaChangeException { try { if (copier == null) { CodeGenerator<PriorityQueueCopier> cg = CodeGenerator.get( PriorityQueueCopier.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions()); ClassGenerator<PriorityQueueCopier> g = cg.getRoot(); generateComparisons(g, batch); g.setMappingSet(COPIER_MAPPING_SET); CopyUtil.generateCopies(g, batch, true); g.setMappingSet(MAIN_MAPPING); copier = context.getImplementationClass(cg); } else { copier.close(); } BufferAllocator allocator = spilling ? copierAllocator : oAllocator; for (VectorWrapper<?> i : batch) { ValueVector v = TypeHelper.getNewVector(i.getField(), allocator); outputContainer.add(v); } copier.setup(context, allocator, batch, batchGroupList, outputContainer); } catch (ClassTransformationException | IOException e) { throw new RuntimeException(e); } }
/** * This method is called when the first batch comes in. Incoming batches are collected until a * threshold is met. At that point, the records in the batches are sorted and sampled, and the * sampled records are stored in the distributed cache. Once a sufficient fraction of the * fragments have shared their samples, each fragment grabs all the samples, sorts all the * records, builds a partition table, and attempts to push the partition table to the distributed * cache. Whichever table gets pushed first becomes the table used by all fragments for * partitioning. * * @return True is successful. False if failed. */ private boolean getPartitionVectors() { try { if (!saveSamples()) { return false; } CachedVectorContainer finalTable = null; long val = minorFragmentSampleCount.incrementAndGet(); logger.debug("Incremented mfsc, got {}", val); final long fragmentsBeforeProceed = (long) Math.ceil(sendingMajorFragmentWidth * completionFactor); final String finalTableKey = mapKey + "final"; if (val == fragmentsBeforeProceed) { // we crossed the barrier, build table and get data. buildTable(); finalTable = tableMap.get(finalTableKey); } else { // Wait until sufficient number of fragments have submitted samples, or proceed after xx ms // passed // TODO: this should be polling. if (val < fragmentsBeforeProceed) Thread.sleep(10); for (int i = 0; i < 100 && finalTable == null; i++) { finalTable = tableMap.get(finalTableKey); if (finalTable != null) { break; } Thread.sleep(10); } if (finalTable == null) { buildTable(); } finalTable = tableMap.get(finalTableKey); } Preconditions.checkState(finalTable != null); // Extract vectors from the wrapper, and add to partition vectors. These vectors will be used // for partitioning in // the rest of this operator for (VectorWrapper<?> w : finalTable.get()) { partitionVectors.add(w.getValueVector()); } } catch (ClassTransformationException | IOException | SchemaChangeException | InterruptedException ex) { kill(false); logger.error("Failure while building final partition table.", ex); context.fail(ex); return false; } return true; }
/** * Creates a copier that does a project for every Nth record from a VectorContainer incoming into * VectorContainer outgoing. Each Ordering in orderings generates a column, and evaluation of the * expression associated with each Ordering determines the value of each column. These records * will later be sorted based on the values in each column, in the same order as the orderings. * * @param sv4 * @param incoming * @param outgoing * @param orderings * @return * @throws SchemaChangeException */ private SampleCopier getCopier( SelectionVector4 sv4, VectorContainer incoming, VectorContainer outgoing, List<Ordering> orderings, List<ValueVector> localAllocationVectors) throws SchemaChangeException { final ErrorCollector collector = new ErrorCollectorImpl(); final ClassGenerator<SampleCopier> cg = CodeGenerator.getRoot(SampleCopier.TEMPLATE_DEFINITION, context.getFunctionRegistry()); int i = 0; for (Ordering od : orderings) { final LogicalExpression expr = ExpressionTreeMaterializer.materialize( od.getExpr(), incoming, collector, context.getFunctionRegistry()); SchemaPath schemaPath = SchemaPath.getSimplePath("f" + i++); TypeProtos.MajorType.Builder builder = TypeProtos.MajorType.newBuilder() .mergeFrom(expr.getMajorType()) .clearMode() .setMode(TypeProtos.DataMode.REQUIRED); TypeProtos.MajorType newType = builder.build(); MaterializedField outputField = MaterializedField.create(schemaPath, newType); if (collector.hasErrors()) { throw new SchemaChangeException( String.format( "Failure while trying to materialize incoming schema. Errors:\n %s.", collector.toErrorString())); } ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator()); localAllocationVectors.add(vector); TypedFieldId fid = outgoing.add(vector); ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true); HoldingContainer hc = cg.addExpr(write); cg.getEvalBlock()._if(hc.getValue().eq(JExpr.lit(0)))._then()._return(JExpr.FALSE); } cg.rotateBlock(); cg.getEvalBlock()._return(JExpr.TRUE); outgoing.buildSchema(BatchSchema.SelectionVectorMode.NONE); try { SampleCopier sampleCopier = context.getImplementationClass(cg); sampleCopier.setupCopier(context, sv4, incoming, outgoing); return sampleCopier; } catch (ClassTransformationException | IOException e) { throw new SchemaChangeException(e); } }
private VectorContainer constructHyperBatch(List<BatchGroup> batchGroupList) { VectorContainer cont = new VectorContainer(); for (MaterializedField field : schema) { ValueVector[] vectors = new ValueVector[batchGroupList.size()]; int i = 0; for (BatchGroup group : batchGroupList) { vectors[i++] = group .getValueAccessorById( field.getValueClass(), group.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds()) .getValueVector(); } cont.add(vectors); } cont.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE); return cont; }
public void addField(ValueVector vector) { container.add(vector); fieldVectorMap.put(vector.getField(), vector); }