Exemplo n.º 1
0
  private void createCopier(
      VectorAccessible batch,
      List<BatchGroup> batchGroupList,
      VectorContainer outputContainer,
      boolean spilling)
      throws SchemaChangeException {
    try {
      if (copier == null) {
        CodeGenerator<PriorityQueueCopier> cg =
            CodeGenerator.get(
                PriorityQueueCopier.TEMPLATE_DEFINITION,
                context.getFunctionRegistry(),
                context.getOptions());
        ClassGenerator<PriorityQueueCopier> g = cg.getRoot();

        generateComparisons(g, batch);

        g.setMappingSet(COPIER_MAPPING_SET);
        CopyUtil.generateCopies(g, batch, true);
        g.setMappingSet(MAIN_MAPPING);
        copier = context.getImplementationClass(cg);
      } else {
        copier.close();
      }

      BufferAllocator allocator = spilling ? copierAllocator : oAllocator;
      for (VectorWrapper<?> i : batch) {
        ValueVector v = TypeHelper.getNewVector(i.getField(), allocator);
        outputContainer.add(v);
      }
      copier.setup(context, allocator, batch, batchGroupList, outputContainer);
    } catch (ClassTransformationException | IOException e) {
      throw new RuntimeException(e);
    }
  }
  /**
   * This method is called when the first batch comes in. Incoming batches are collected until a
   * threshold is met. At that point, the records in the batches are sorted and sampled, and the
   * sampled records are stored in the distributed cache. Once a sufficient fraction of the
   * fragments have shared their samples, each fragment grabs all the samples, sorts all the
   * records, builds a partition table, and attempts to push the partition table to the distributed
   * cache. Whichever table gets pushed first becomes the table used by all fragments for
   * partitioning.
   *
   * @return True is successful. False if failed.
   */
  private boolean getPartitionVectors() {

    try {

      if (!saveSamples()) {
        return false;
      }

      CachedVectorContainer finalTable = null;

      long val = minorFragmentSampleCount.incrementAndGet();
      logger.debug("Incremented mfsc, got {}", val);

      final long fragmentsBeforeProceed =
          (long) Math.ceil(sendingMajorFragmentWidth * completionFactor);
      final String finalTableKey = mapKey + "final";

      if (val == fragmentsBeforeProceed) { // we crossed the barrier, build table and get data.
        buildTable();
        finalTable = tableMap.get(finalTableKey);
      } else {
        // Wait until sufficient number of fragments have submitted samples, or proceed after xx ms
        // passed
        // TODO: this should be polling.

        if (val < fragmentsBeforeProceed) Thread.sleep(10);
        for (int i = 0; i < 100 && finalTable == null; i++) {
          finalTable = tableMap.get(finalTableKey);
          if (finalTable != null) {
            break;
          }
          Thread.sleep(10);
        }
        if (finalTable == null) {
          buildTable();
        }
        finalTable = tableMap.get(finalTableKey);
      }

      Preconditions.checkState(finalTable != null);

      // Extract vectors from the wrapper, and add to partition vectors. These vectors will be used
      // for partitioning in
      // the rest of this operator
      for (VectorWrapper<?> w : finalTable.get()) {
        partitionVectors.add(w.getValueVector());
      }
    } catch (ClassTransformationException
        | IOException
        | SchemaChangeException
        | InterruptedException ex) {
      kill(false);
      logger.error("Failure while building final partition table.", ex);
      context.fail(ex);
      return false;
    }
    return true;
  }
  /**
   * Creates a copier that does a project for every Nth record from a VectorContainer incoming into
   * VectorContainer outgoing. Each Ordering in orderings generates a column, and evaluation of the
   * expression associated with each Ordering determines the value of each column. These records
   * will later be sorted based on the values in each column, in the same order as the orderings.
   *
   * @param sv4
   * @param incoming
   * @param outgoing
   * @param orderings
   * @return
   * @throws SchemaChangeException
   */
  private SampleCopier getCopier(
      SelectionVector4 sv4,
      VectorContainer incoming,
      VectorContainer outgoing,
      List<Ordering> orderings,
      List<ValueVector> localAllocationVectors)
      throws SchemaChangeException {
    final ErrorCollector collector = new ErrorCollectorImpl();
    final ClassGenerator<SampleCopier> cg =
        CodeGenerator.getRoot(SampleCopier.TEMPLATE_DEFINITION, context.getFunctionRegistry());

    int i = 0;
    for (Ordering od : orderings) {
      final LogicalExpression expr =
          ExpressionTreeMaterializer.materialize(
              od.getExpr(), incoming, collector, context.getFunctionRegistry());
      SchemaPath schemaPath = SchemaPath.getSimplePath("f" + i++);
      TypeProtos.MajorType.Builder builder =
          TypeProtos.MajorType.newBuilder()
              .mergeFrom(expr.getMajorType())
              .clearMode()
              .setMode(TypeProtos.DataMode.REQUIRED);
      TypeProtos.MajorType newType = builder.build();
      MaterializedField outputField = MaterializedField.create(schemaPath, newType);
      if (collector.hasErrors()) {
        throw new SchemaChangeException(
            String.format(
                "Failure while trying to materialize incoming schema.  Errors:\n %s.",
                collector.toErrorString()));
      }

      ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator());
      localAllocationVectors.add(vector);
      TypedFieldId fid = outgoing.add(vector);
      ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true);
      HoldingContainer hc = cg.addExpr(write);
      cg.getEvalBlock()._if(hc.getValue().eq(JExpr.lit(0)))._then()._return(JExpr.FALSE);
    }
    cg.rotateBlock();
    cg.getEvalBlock()._return(JExpr.TRUE);
    outgoing.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    try {
      SampleCopier sampleCopier = context.getImplementationClass(cg);
      sampleCopier.setupCopier(context, sv4, incoming, outgoing);
      return sampleCopier;
    } catch (ClassTransformationException | IOException e) {
      throw new SchemaChangeException(e);
    }
  }
Exemplo n.º 4
0
 private VectorContainer constructHyperBatch(List<BatchGroup> batchGroupList) {
   VectorContainer cont = new VectorContainer();
   for (MaterializedField field : schema) {
     ValueVector[] vectors = new ValueVector[batchGroupList.size()];
     int i = 0;
     for (BatchGroup group : batchGroupList) {
       vectors[i++] =
           group
               .getValueAccessorById(
                   field.getValueClass(),
                   group.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds())
               .getValueVector();
     }
     cont.add(vectors);
   }
   cont.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
   return cont;
 }
 public void addField(ValueVector vector) {
   container.add(vector);
   fieldVectorMap.put(vector.getField(), vector);
 }