/**
   * Sets up projection that will transfer all of the columns in batch, and also populate the
   * partition column based on which partition a record falls into in the partition table
   *
   * @param batch
   * @throws SchemaChangeException
   */
  protected void setupNewSchema(VectorAccessible batch) throws SchemaChangeException {
    container.clear();
    final ErrorCollector collector = new ErrorCollectorImpl();
    final List<TransferPair> transfers = Lists.newArrayList();

    final ClassGenerator<OrderedPartitionProjector> cg =
        CodeGenerator.getRoot(
            OrderedPartitionProjector.TEMPLATE_DEFINITION, context.getFunctionRegistry());

    for (VectorWrapper<?> vw : batch) {
      TransferPair tp = vw.getValueVector().getTransferPair();
      transfers.add(tp);
      container.add(tp.getTo());
    }

    cg.setMappingSet(mainMapping);

    int count = 0;
    for (Ordering od : popConfig.getOrderings()) {
      final LogicalExpression expr =
          ExpressionTreeMaterializer.materialize(
              od.getExpr(), batch, collector, context.getFunctionRegistry());
      if (collector.hasErrors())
        throw new SchemaChangeException(
            "Failure while materializing expression. " + collector.toErrorString());
      cg.setMappingSet(incomingMapping);
      ClassGenerator.HoldingContainer left = cg.addExpr(expr, false);
      cg.setMappingSet(partitionMapping);
      ClassGenerator.HoldingContainer right =
          cg.addExpr(
              new ValueVectorReadExpression(new TypedFieldId(expr.getMajorType(), count++)), false);
      cg.setMappingSet(mainMapping);

      LogicalExpression fh =
          FunctionGenerationHelper.getComparator(left, right, context.getFunctionRegistry());
      ClassGenerator.HoldingContainer out = cg.addExpr(fh, false);
      JConditional jc = cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0)));

      if (od.getDirection() == Direction.ASCENDING) {
        jc._then()._return(out.getValue());
      } else {
        jc._then()._return(out.getValue().minus());
      }
    }

    cg.getEvalBlock()._return(JExpr.lit(0));

    container.add(this.partitionKeyVector);
    container.buildSchema(batch.getSchema().getSelectionVectorMode());

    try {
      this.projector = context.getImplementationClass(cg);
      projector.setup(
          context, batch, this, transfers, partitionVectors, partitions, popConfig.getRef());
    } catch (ClassTransformationException | IOException e) {
      throw new SchemaChangeException("Failure while attempting to load generated class", e);
    }
  }
Example #2
0
  private MSorter createNewMSorter(
      FragmentContext context,
      List<Ordering> orderings,
      VectorAccessible batch,
      MappingSet mainMapping,
      MappingSet leftMapping,
      MappingSet rightMapping)
      throws ClassTransformationException, IOException, SchemaChangeException {
    CodeGenerator<MSorter> cg =
        CodeGenerator.get(
            MSorter.TEMPLATE_DEFINITION, context.getFunctionRegistry(), context.getOptions());
    ClassGenerator<MSorter> g = cg.getRoot();
    g.setMappingSet(mainMapping);

    for (Ordering od : orderings) {
      // first, we rewrite the evaluation stack for each side of the comparison.
      ErrorCollector collector = new ErrorCollectorImpl();
      final LogicalExpression expr =
          ExpressionTreeMaterializer.materialize(
              od.getExpr(), batch, collector, context.getFunctionRegistry());
      if (collector.hasErrors()) {
        throw new SchemaChangeException(
            "Failure while materializing expression. " + collector.toErrorString());
      }
      g.setMappingSet(leftMapping);
      HoldingContainer left = g.addExpr(expr, ClassGenerator.BlkCreateMode.FALSE);
      g.setMappingSet(rightMapping);
      HoldingContainer right = g.addExpr(expr, ClassGenerator.BlkCreateMode.FALSE);
      g.setMappingSet(mainMapping);

      // next we wrap the two comparison sides and add the expression block for the comparison.
      LogicalExpression fh =
          FunctionGenerationHelper.getOrderingComparator(
              od.nullsSortHigh(), left, right, context.getFunctionRegistry());
      HoldingContainer out = g.addExpr(fh, ClassGenerator.BlkCreateMode.FALSE);
      JConditional jc = g.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0)));

      if (od.getDirection() == Direction.ASCENDING) {
        jc._then()._return(out.getValue());
      } else {
        jc._then()._return(out.getValue().minus());
      }
      g.rotateBlock();
    }

    g.rotateBlock();
    g.getEvalBlock()._return(JExpr.lit(0));

    return context.getImplementationClass(cg);
  }
  /**
   * Creates a copier that does a project for every Nth record from a VectorContainer incoming into
   * VectorContainer outgoing. Each Ordering in orderings generates a column, and evaluation of the
   * expression associated with each Ordering determines the value of each column. These records
   * will later be sorted based on the values in each column, in the same order as the orderings.
   *
   * @param sv4
   * @param incoming
   * @param outgoing
   * @param orderings
   * @return
   * @throws SchemaChangeException
   */
  private SampleCopier getCopier(
      SelectionVector4 sv4,
      VectorContainer incoming,
      VectorContainer outgoing,
      List<Ordering> orderings,
      List<ValueVector> localAllocationVectors)
      throws SchemaChangeException {
    final ErrorCollector collector = new ErrorCollectorImpl();
    final ClassGenerator<SampleCopier> cg =
        CodeGenerator.getRoot(SampleCopier.TEMPLATE_DEFINITION, context.getFunctionRegistry());

    int i = 0;
    for (Ordering od : orderings) {
      final LogicalExpression expr =
          ExpressionTreeMaterializer.materialize(
              od.getExpr(), incoming, collector, context.getFunctionRegistry());
      SchemaPath schemaPath = SchemaPath.getSimplePath("f" + i++);
      TypeProtos.MajorType.Builder builder =
          TypeProtos.MajorType.newBuilder()
              .mergeFrom(expr.getMajorType())
              .clearMode()
              .setMode(TypeProtos.DataMode.REQUIRED);
      TypeProtos.MajorType newType = builder.build();
      MaterializedField outputField = MaterializedField.create(schemaPath, newType);
      if (collector.hasErrors()) {
        throw new SchemaChangeException(
            String.format(
                "Failure while trying to materialize incoming schema.  Errors:\n %s.",
                collector.toErrorString()));
      }

      ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator());
      localAllocationVectors.add(vector);
      TypedFieldId fid = outgoing.add(vector);
      ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true);
      HoldingContainer hc = cg.addExpr(write);
      cg.getEvalBlock()._if(hc.getValue().eq(JExpr.lit(0)))._then()._return(JExpr.FALSE);
    }
    cg.rotateBlock();
    cg.getEvalBlock()._return(JExpr.TRUE);
    outgoing.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    try {
      SampleCopier sampleCopier = context.getImplementationClass(cg);
      sampleCopier.setupCopier(context, sv4, incoming, outgoing);
      return sampleCopier;
    } catch (ClassTransformationException | IOException e) {
      throw new SchemaChangeException(e);
    }
  }
Example #4
0
  private void generateComparisons(ClassGenerator<?> g, VectorAccessible batch)
      throws SchemaChangeException {
    g.setMappingSet(MAIN_MAPPING);

    for (Ordering od : popConfig.getOrderings()) {
      // first, we rewrite the evaluation stack for each side of the comparison.
      ErrorCollector collector = new ErrorCollectorImpl();
      final LogicalExpression expr =
          ExpressionTreeMaterializer.materialize(
              od.getExpr(), batch, collector, context.getFunctionRegistry());
      if (collector.hasErrors()) {
        throw new SchemaChangeException(
            "Failure while materializing expression. " + collector.toErrorString());
      }
      g.setMappingSet(LEFT_MAPPING);
      HoldingContainer left = g.addExpr(expr, ClassGenerator.BlkCreateMode.FALSE);
      g.setMappingSet(RIGHT_MAPPING);
      HoldingContainer right = g.addExpr(expr, ClassGenerator.BlkCreateMode.FALSE);
      g.setMappingSet(MAIN_MAPPING);

      // next we wrap the two comparison sides and add the expression block for the comparison.
      LogicalExpression fh =
          FunctionGenerationHelper.getOrderingComparator(
              od.nullsSortHigh(), left, right, context.getFunctionRegistry());
      HoldingContainer out = g.addExpr(fh, ClassGenerator.BlkCreateMode.FALSE);
      JConditional jc = g.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0)));

      if (od.getDirection() == Direction.ASCENDING) {
        jc._then()._return(out.getValue());
      } else {
        jc._then()._return(out.getValue().minus());
      }
      g.rotateBlock();
    }

    g.rotateBlock();
    g.getEvalBlock()._return(JExpr.lit(0));
  }
  private void buildTable()
      throws SchemaChangeException, ClassTransformationException, IOException {

    // Get all samples from distributed map

    SortRecordBatchBuilder containerBuilder =
        new SortRecordBatchBuilder(context.getAllocator(), MAX_SORT_BYTES);
    for (CachedVectorContainer w : mmap.get(mapKey)) {
      containerBuilder.add(w.get());
    }
    VectorContainer allSamplesContainer = new VectorContainer();
    containerBuilder.build(context, allSamplesContainer);

    List<Ordering> orderDefs = Lists.newArrayList();
    int i = 0;
    for (Ordering od : popConfig.getOrderings()) {
      SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
      orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
    }

    // sort the data incoming samples.
    SelectionVector4 newSv4 = containerBuilder.getSv4();
    Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
    sorter.setup(context, newSv4, allSamplesContainer);
    sorter.sort(newSv4, allSamplesContainer);

    // Copy every Nth record from the samples into a candidate partition table, where N =
    // totalSampledRecords/partitions
    // Attempt to push this to the distributed map. Only the first candidate to get pushed will be
    // used.
    VectorContainer candidatePartitionTable = new VectorContainer();
    SampleCopier copier = null;
    List<ValueVector> localAllocationVectors = Lists.newArrayList();
    copier =
        getCopier(
            newSv4,
            allSamplesContainer,
            candidatePartitionTable,
            orderDefs,
            localAllocationVectors);
    int allocationSize = 50;
    while (true) {
      for (ValueVector vv : localAllocationVectors) {
        AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
      }
      int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
      if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
        assert copier.getOutputRecords() == partitions - 1
            : String.format(
                "output records: %d partitions: %d", copier.getOutputRecords(), partitions);
        for (VectorWrapper<?> vw : candidatePartitionTable) {
          vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
        }
        break;
      } else {
        candidatePartitionTable.zeroVectors();
        allocationSize *= 2;
      }
    }
    candidatePartitionTable.setRecordCount(copier.getOutputRecords());
    WritableBatch batch =
        WritableBatch.getBatchNoHVWrap(
            candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
    CachedVectorContainer wrap =
        new CachedVectorContainer(batch, context.getDrillbitContext().getAllocator());
    tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);

    candidatePartitionTable.clear();
    allSamplesContainer.clear();
    containerBuilder.clear();
    wrap.clear();
  }