private VectorContainer getBatch() throws IOException { assert fs != null; assert path != null; if (inputStream == null) { inputStream = fs.open(path); } VectorAccessibleSerializable vas = new VectorAccessibleSerializable(allocator); Stopwatch watch = Stopwatch.createStarted(); vas.readFromStream(inputStream); VectorContainer c = vas.get(); if (schema != null) { c = SchemaUtil.coerceContainer(c, schema, context); } // logger.debug("Took {} us to read {} records", watch.elapsed(TimeUnit.MICROSECONDS), // c.getRecordCount()); spilledBatches--; currentContainer.zeroVectors(); Iterator<VectorWrapper<?>> wrapperIterator = c.iterator(); for (VectorWrapper w : currentContainer) { TransferPair pair = wrapperIterator.next().getValueVector().makeTransferPair(w.getValueVector()); pair.transfer(); } currentContainer.setRecordCount(c.getRecordCount()); c.zeroVectors(); return c; }
/** * Sets up projection that will transfer all of the columns in batch, and also populate the * partition column based on which partition a record falls into in the partition table * * @param batch * @throws SchemaChangeException */ protected void setupNewSchema(VectorAccessible batch) throws SchemaChangeException { container.clear(); final ErrorCollector collector = new ErrorCollectorImpl(); final List<TransferPair> transfers = Lists.newArrayList(); final ClassGenerator<OrderedPartitionProjector> cg = CodeGenerator.getRoot( OrderedPartitionProjector.TEMPLATE_DEFINITION, context.getFunctionRegistry()); for (VectorWrapper<?> vw : batch) { TransferPair tp = vw.getValueVector().getTransferPair(); transfers.add(tp); container.add(tp.getTo()); } cg.setMappingSet(mainMapping); int count = 0; for (Ordering od : popConfig.getOrderings()) { final LogicalExpression expr = ExpressionTreeMaterializer.materialize( od.getExpr(), batch, collector, context.getFunctionRegistry()); if (collector.hasErrors()) throw new SchemaChangeException( "Failure while materializing expression. " + collector.toErrorString()); cg.setMappingSet(incomingMapping); ClassGenerator.HoldingContainer left = cg.addExpr(expr, false); cg.setMappingSet(partitionMapping); ClassGenerator.HoldingContainer right = cg.addExpr( new ValueVectorReadExpression(new TypedFieldId(expr.getMajorType(), count++)), false); cg.setMappingSet(mainMapping); LogicalExpression fh = FunctionGenerationHelper.getComparator(left, right, context.getFunctionRegistry()); ClassGenerator.HoldingContainer out = cg.addExpr(fh, false); JConditional jc = cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0))); if (od.getDirection() == Direction.ASCENDING) { jc._then()._return(out.getValue()); } else { jc._then()._return(out.getValue().minus()); } } cg.getEvalBlock()._return(JExpr.lit(0)); container.add(this.partitionKeyVector); container.buildSchema(batch.getSchema().getSelectionVectorMode()); try { this.projector = context.getImplementationClass(cg); projector.setup( context, batch, this, transfers, partitionVectors, partitions, popConfig.getRef()); } catch (ClassTransformationException | IOException e) { throw new SchemaChangeException("Failure while attempting to load generated class", e); } }
private FieldWriter promoteToUnion() { String name = vector.getField().getLastName(); TransferPair tp = vector.getTransferPair( vector.getField().getType().getMinorType().name().toLowerCase(), vector.getAllocator()); tp.transfer(); if (parentContainer != null) { unionVector = parentContainer.addOrGet(name, Types.optional(MinorType.UNION), UnionVector.class); } else if (listVector != null) { unionVector = listVector.promoteToUnion(); } unionVector.addVector(tp.getTo()); writer = new UnionWriter(unionVector); writer.setPosition(idx()); for (int i = 0; i < idx(); i++) { unionVector.getMutator().setType(i, vector.getField().getType().getMinorType()); } vector = null; state = State.UNION; return writer; }
private void doTransfers() { for (TransferPair t : transfers) { t.transfer(); } }