Ejemplo n.º 1
0
 /**
  * Method is invoked when we have a straight aggregate (no group by expression) and our input is
  * empty. In this case we construct an outgoing batch with record count as 1. For the nullable
  * vectors we don't set anything as we want the output to be NULL. For the required vectors (only
  * for count()) we set the value to be zero since we don't zero out our buffers initially while
  * allocating them.
  */
 private void constructSpecialBatch() {
   int exprIndex = 0;
   for (final VectorWrapper<?> vw : container) {
     final ValueVector vv = vw.getValueVector();
     AllocationHelper.allocateNew(vv, SPECIAL_BATCH_COUNT);
     vv.getMutator().setValueCount(SPECIAL_BATCH_COUNT);
     if (vv.getField().getType().getMode() == TypeProtos.DataMode.REQUIRED) {
       if (vv instanceof FixedWidthVector) {
         /*
          * The only case we should have a required vector in the aggregate is for count function whose output is
          * always a FixedWidthVector (BigIntVector). Zero out the vector.
          */
         ((FixedWidthVector) vv).zeroVector();
       } else {
         /*
          * If we are in this else block it means that we have a required vector which is of variable length. We
          * should not be here, raising an error since we have set the record count to be 1 and not cleared the
          * buffer
          */
         throw new DrillRuntimeException(
             "FixedWidth vectors is the expected output vector type. "
                 + "Corresponding expression: "
                 + popConfig.getExprs()[exprIndex].toString());
       }
     }
     exprIndex++;
   }
   container.setRecordCount(SPECIAL_BATCH_COUNT);
   recordCount = SPECIAL_BATCH_COUNT;
 }
 protected void doWork(VectorAccessible batch) {
   int recordCount = batch.getRecordCount();
   AllocationHelper.allocate(partitionKeyVector, recordCount, 50);
   projector.projectRecords(recordCount, 0);
   for (VectorWrapper<?> v : container) {
     ValueVector.Mutator m = v.getValueVector().getMutator();
     m.setValueCount(recordCount);
   }
 }
 @Override
 public void allocate(Map<String, ValueVector> vectorMap) throws OutOfMemoryException {
   try {
     for (final ValueVector v : vectorMap.values()) {
       AllocationHelper.allocate(v, recordsPerBatch, 50, 10);
     }
   } catch (NullPointerException e) {
     throw new OutOfMemoryException();
   }
 }
 private boolean doAlloc() {
   for (ValueVector v : allocationVectors) {
     try {
       AllocationHelper.allocateNew(v, current.getRecordCount());
     } catch (OutOfMemoryException ex) {
       return false;
     }
   }
   return true;
 }
  private void buildTable()
      throws SchemaChangeException, ClassTransformationException, IOException {

    // Get all samples from distributed map

    SortRecordBatchBuilder containerBuilder =
        new SortRecordBatchBuilder(context.getAllocator(), MAX_SORT_BYTES);
    for (CachedVectorContainer w : mmap.get(mapKey)) {
      containerBuilder.add(w.get());
    }
    VectorContainer allSamplesContainer = new VectorContainer();
    containerBuilder.build(context, allSamplesContainer);

    List<Ordering> orderDefs = Lists.newArrayList();
    int i = 0;
    for (Ordering od : popConfig.getOrderings()) {
      SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
      orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
    }

    // sort the data incoming samples.
    SelectionVector4 newSv4 = containerBuilder.getSv4();
    Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
    sorter.setup(context, newSv4, allSamplesContainer);
    sorter.sort(newSv4, allSamplesContainer);

    // Copy every Nth record from the samples into a candidate partition table, where N =
    // totalSampledRecords/partitions
    // Attempt to push this to the distributed map. Only the first candidate to get pushed will be
    // used.
    VectorContainer candidatePartitionTable = new VectorContainer();
    SampleCopier copier = null;
    List<ValueVector> localAllocationVectors = Lists.newArrayList();
    copier =
        getCopier(
            newSv4,
            allSamplesContainer,
            candidatePartitionTable,
            orderDefs,
            localAllocationVectors);
    int allocationSize = 50;
    while (true) {
      for (ValueVector vv : localAllocationVectors) {
        AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
      }
      int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
      if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
        assert copier.getOutputRecords() == partitions - 1
            : String.format(
                "output records: %d partitions: %d", copier.getOutputRecords(), partitions);
        for (VectorWrapper<?> vw : candidatePartitionTable) {
          vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
        }
        break;
      } else {
        candidatePartitionTable.zeroVectors();
        allocationSize *= 2;
      }
    }
    candidatePartitionTable.setRecordCount(copier.getOutputRecords());
    WritableBatch batch =
        WritableBatch.getBatchNoHVWrap(
            candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
    CachedVectorContainer wrap =
        new CachedVectorContainer(batch, context.getDrillbitContext().getAllocator());
    tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);

    candidatePartitionTable.clear();
    allSamplesContainer.clear();
    containerBuilder.clear();
    wrap.clear();
  }
  private boolean saveSamples()
      throws SchemaChangeException, ClassTransformationException, IOException {
    recordsSampled = 0;
    IterOutcome upstream;

    // Start collecting batches until recordsToSample records have been collected

    SortRecordBatchBuilder builder =
        new SortRecordBatchBuilder(oContext.getAllocator(), MAX_SORT_BYTES);
    builder.add(incoming);

    recordsSampled += incoming.getRecordCount();

    outer:
    while (recordsSampled < recordsToSample) {
      upstream = next(incoming);
      switch (upstream) {
        case NONE:
        case NOT_YET:
        case STOP:
          upstreamNone = true;
          break outer;
        default:
          // fall through
      }
      builder.add(incoming);
      recordsSampled += incoming.getRecordCount();
      if (upstream == IterOutcome.NONE) break;
    }
    VectorContainer sortedSamples = new VectorContainer();
    builder.build(context, sortedSamples);

    // Sort the records according the orderings given in the configuration

    Sorter sorter = SortBatch.createNewSorter(context, popConfig.getOrderings(), sortedSamples);
    SelectionVector4 sv4 = builder.getSv4();
    sorter.setup(context, sv4, sortedSamples);
    sorter.sort(sv4, sortedSamples);

    // Project every Nth record to a new vector container, where N = recordsSampled/(samplingFactor
    // * partitions).
    // Uses the
    // the expressions from the Orderings to populate each column. There is one column for each
    // Ordering in
    // popConfig.orderings.

    VectorContainer containerToCache = new VectorContainer();
    List<ValueVector> localAllocationVectors = Lists.newArrayList();
    SampleCopier copier =
        getCopier(
            sv4, sortedSamples, containerToCache, popConfig.getOrderings(), localAllocationVectors);
    int allocationSize = 50;
    while (true) {
      for (ValueVector vv : localAllocationVectors) {
        AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
      }
      if (copier.copyRecords(
          recordsSampled / (samplingFactor * partitions), 0, samplingFactor * partitions)) {
        break;
      } else {
        containerToCache.zeroVectors();
        allocationSize *= 2;
      }
    }
    for (VectorWrapper<?> vw : containerToCache) {
      vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
    }
    containerToCache.setRecordCount(copier.getOutputRecords());

    // Get a distributed multimap handle from the distributed cache, and put the vectors from the
    // new vector container
    // into a serializable wrapper object, and then add to distributed map

    WritableBatch batch =
        WritableBatch.getBatchNoHVWrap(containerToCache.getRecordCount(), containerToCache, false);
    CachedVectorContainer sampleToSave = new CachedVectorContainer(batch, context.getAllocator());

    mmap.put(mapKey, sampleToSave);
    this.sampledIncomingBatches = builder.getHeldRecordBatches();
    builder.clear();
    batch.clear();
    containerToCache.clear();
    sampleToSave.clear();
    return true;
  }