コード例 #1
0
  /**
   * This method is called when the first batch comes in. Incoming batches are collected until a
   * threshold is met. At that point, the records in the batches are sorted and sampled, and the
   * sampled records are stored in the distributed cache. Once a sufficient fraction of the
   * fragments have shared their samples, each fragment grabs all the samples, sorts all the
   * records, builds a partition table, and attempts to push the partition table to the distributed
   * cache. Whichever table gets pushed first becomes the table used by all fragments for
   * partitioning.
   *
   * @return True is successful. False if failed.
   */
  private boolean getPartitionVectors() {

    try {

      if (!saveSamples()) {
        return false;
      }

      CachedVectorContainer finalTable = null;

      long val = minorFragmentSampleCount.incrementAndGet();
      logger.debug("Incremented mfsc, got {}", val);

      final long fragmentsBeforeProceed =
          (long) Math.ceil(sendingMajorFragmentWidth * completionFactor);
      final String finalTableKey = mapKey + "final";

      if (val == fragmentsBeforeProceed) { // we crossed the barrier, build table and get data.
        buildTable();
        finalTable = tableMap.get(finalTableKey);
      } else {
        // Wait until sufficient number of fragments have submitted samples, or proceed after xx ms
        // passed
        // TODO: this should be polling.

        if (val < fragmentsBeforeProceed) Thread.sleep(10);
        for (int i = 0; i < 100 && finalTable == null; i++) {
          finalTable = tableMap.get(finalTableKey);
          if (finalTable != null) {
            break;
          }
          Thread.sleep(10);
        }
        if (finalTable == null) {
          buildTable();
        }
        finalTable = tableMap.get(finalTableKey);
      }

      Preconditions.checkState(finalTable != null);

      // Extract vectors from the wrapper, and add to partition vectors. These vectors will be used
      // for partitioning in
      // the rest of this operator
      for (VectorWrapper<?> w : finalTable.get()) {
        partitionVectors.add(w.getValueVector());
      }
    } catch (ClassTransformationException
        | IOException
        | SchemaChangeException
        | InterruptedException ex) {
      kill(false);
      logger.error("Failure while building final partition table.", ex);
      context.fail(ex);
      return false;
    }
    return true;
  }
コード例 #2
0
  private void buildTable()
      throws SchemaChangeException, ClassTransformationException, IOException {

    // Get all samples from distributed map

    SortRecordBatchBuilder containerBuilder =
        new SortRecordBatchBuilder(context.getAllocator(), MAX_SORT_BYTES);
    for (CachedVectorContainer w : mmap.get(mapKey)) {
      containerBuilder.add(w.get());
    }
    VectorContainer allSamplesContainer = new VectorContainer();
    containerBuilder.build(context, allSamplesContainer);

    List<Ordering> orderDefs = Lists.newArrayList();
    int i = 0;
    for (Ordering od : popConfig.getOrderings()) {
      SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
      orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
    }

    // sort the data incoming samples.
    SelectionVector4 newSv4 = containerBuilder.getSv4();
    Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
    sorter.setup(context, newSv4, allSamplesContainer);
    sorter.sort(newSv4, allSamplesContainer);

    // Copy every Nth record from the samples into a candidate partition table, where N =
    // totalSampledRecords/partitions
    // Attempt to push this to the distributed map. Only the first candidate to get pushed will be
    // used.
    VectorContainer candidatePartitionTable = new VectorContainer();
    SampleCopier copier = null;
    List<ValueVector> localAllocationVectors = Lists.newArrayList();
    copier =
        getCopier(
            newSv4,
            allSamplesContainer,
            candidatePartitionTable,
            orderDefs,
            localAllocationVectors);
    int allocationSize = 50;
    while (true) {
      for (ValueVector vv : localAllocationVectors) {
        AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
      }
      int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
      if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
        assert copier.getOutputRecords() == partitions - 1
            : String.format(
                "output records: %d partitions: %d", copier.getOutputRecords(), partitions);
        for (VectorWrapper<?> vw : candidatePartitionTable) {
          vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
        }
        break;
      } else {
        candidatePartitionTable.zeroVectors();
        allocationSize *= 2;
      }
    }
    candidatePartitionTable.setRecordCount(copier.getOutputRecords());
    WritableBatch batch =
        WritableBatch.getBatchNoHVWrap(
            candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
    CachedVectorContainer wrap =
        new CachedVectorContainer(batch, context.getDrillbitContext().getAllocator());
    tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);

    candidatePartitionTable.clear();
    allSamplesContainer.clear();
    containerBuilder.clear();
    wrap.clear();
  }