Java RecordBatch Beispiele, org.apache.drill.exec.record.RecordBatch Java Beispiele

Beispiel #1

0

Datei anzeigen

Datei: OrderedPartitionRecordBatch.java Projekt: kameshb/incubator-drill

 @Override
 public void cleanup() {
   incoming.cleanup();
   super.cleanup();
   this.partitionVectors.clear();
   this.partitionKeyVector.clear();
 }

Beispiel #2

0

Datei anzeigen

Datei: StreamingAggBatch.java Projekt: akshatthakar/drill

  private void getIndex(ClassGenerator<StreamingAggregator> g) {
    switch (incoming.getSchema().getSelectionVectorMode()) {
      case FOUR_BYTE:
        {
          JVar var = g.declareClassField("sv4_", g.getModel()._ref(SelectionVector4.class));
          g.getBlock("setupInterior")
              .assign(var, JExpr.direct("incoming").invoke("getSelectionVector4"));
          g.getBlock("getVectorIndex")._return(var.invoke("get").arg(JExpr.direct("recordIndex")));
          ;
          return;
        }
      case NONE:
        {
          g.getBlock("getVectorIndex")._return(JExpr.direct("recordIndex"));
          ;
          return;
        }
      case TWO_BYTE:
        {
          JVar var = g.declareClassField("sv2_", g.getModel()._ref(SelectionVector2.class));
          g.getBlock("setupInterior")
              .assign(var, JExpr.direct("incoming").invoke("getSelectionVector2"));
          g.getBlock("getVectorIndex")
              ._return(var.invoke("getIndex").arg(JExpr.direct("recordIndex")));
          ;
          return;
        }

      default:
        throw new IllegalStateException();
    }
  }

Beispiel #3

0

Datei anzeigen

Datei: ExternalSortBatch.java Projekt: laurentgo/drill

 private SelectionVector2 newSV2() throws OutOfMemoryException, InterruptedException {
   SelectionVector2 sv2 = new SelectionVector2(oAllocator);
   if (!sv2.allocateNewSafe(incoming.getRecordCount())) {
     try {
       final BatchGroup merged = mergeAndSpill(batchGroups);
       if (merged != null) {
         spilledBatchGroups.add(merged);
       } else {
         throw UserException.memoryError(
                 "Unable to allocate sv2 for %d records, and not enough batchGroups to spill.",
                 incoming.getRecordCount())
             .addContext("batchGroups.size", batchGroups.size())
             .addContext("spilledBatchGroups.size", spilledBatchGroups.size())
             .addContext("allocated memory", oAllocator.getAllocatedMemory())
             .addContext("allocator limit", oAllocator.getLimit())
             .build(logger);
       }
     } catch (SchemaChangeException e) {
       throw new RuntimeException(e);
     }
     int waitTime = 1;
     while (true) {
       try {
         Thread.sleep(waitTime * 1000);
       } catch (final InterruptedException e) {
         if (!context.shouldContinue()) {
           throw e;
         }
       }
       waitTime *= 2;
       if (sv2.allocateNewSafe(incoming.getRecordCount())) {
         break;
       }
       if (waitTime >= 32) {
         throw new OutOfMemoryException("Unable to allocate sv2 buffer after repeated attempts");
       }
     }
   }
   for (int i = 0; i < incoming.getRecordCount(); i++) {
     sv2.setIndex(i, (char) i);
   }
   sv2.setRecordCount(incoming.getRecordCount());
   return sv2;
 }

Beispiel #4

0

Datei anzeigen

Datei: UnionAllRecordBatch.java Projekt: hnfgns/incubator-drill

 private boolean doAlloc() {
   for (ValueVector v : allocationVectors) {
     try {
       AllocationHelper.allocateNew(v, current.getRecordCount());
     } catch (OutOfMemoryException ex) {
       return false;
     }
   }
   return true;
 }

Beispiel #5

0

Datei anzeigen

Datei: FilterTemplate.java Projekt: JoaoVasques/incubator-drill

  @Override
  public void setup(
      FragmentContext context, RecordBatch incoming, RecordBatch outgoing, TransferPair[] transfers)
      throws SchemaChangeException {
    this.transfers = transfers;
    this.outgoingSelectionVector = outgoing.getSelectionVector2();
    this.svMode = incoming.getSchema().getSelectionVectorMode();

    switch (svMode) {
      case NONE:
        break;
      case TWO_BYTE:
        this.incomingSelectionVector = incoming.getSelectionVector2();
        break;
      default:
        throw new UnsupportedOperationException();
    }
    doSetup(context, incoming, outgoing);
  }

Beispiel #6

0

Datei anzeigen

Datei: ExpressionTreeMaterializer.java Projekt: jweaver/incubator-drill

 @Override
 public LogicalExpression visitSchemaPath(SchemaPath path) {
   //      logger.debug("Visiting schema path {}", path);
   TypedFieldId tfId = batch.getValueVectorId(path);
   if (tfId == null) {
     logger.warn("Unable to find value vector of path {}, returning null instance.", path);
     return NullExpression.INSTANCE;
   } else {
     return new ValueVectorReadExpression(tfId);
   }
 }

Beispiel #7

0

Datei anzeigen

Datei: CopierTemplate4.java Projekt: krystaln/incubator-drill

 @Override
 public void setupRemover(
     FragmentContext context,
     RecordBatch incoming,
     RecordBatch outgoing,
     VectorAllocator[] allocators)
     throws SchemaChangeException {
   this.allocators = allocators;
   this.incoming = incoming;
   this.sv4 = incoming.getSelectionVector4();
   doSetup(context, incoming, outgoing);
 }

Beispiel #8

0

Datei anzeigen

Datei: StreamingAggBatch.java Projekt: akshatthakar/drill

 /**
  * Creates a new Aggregator based on the current schema. If setup fails, this method is
  * responsible for cleaning up and informing the context of the failure state, as well is
  * informing the upstream operators.
  *
  * @return true if the aggregator was setup successfully. false if there was a failure.
  */
 private boolean createAggregator() {
   logger.debug("Creating new aggregator.");
   try {
     stats.startSetup();
     this.aggregator = createAggregatorInternal();
     return true;
   } catch (SchemaChangeException | ClassTransformationException | IOException ex) {
     context.fail(ex);
     container.clear();
     incoming.kill(false);
     return false;
   } finally {
     stats.stopSetup();
   }
 }

Beispiel #9

0

Datei anzeigen

Datei: UnionAllRecordBatch.java Projekt: hnfgns/incubator-drill

      public IterOutcome nextBatch() {
        if (upstream == IterOutcome.NONE) {
          throw new IllegalStateException(String.format("Unknown state %s.", upstream));
        }

        if (upstream == IterOutcome.NOT_YET) {
          upstream = unionAllRecordBatch.next(recordBatch);

          return upstream;
        } else {
          do {
            upstream = unionAllRecordBatch.next(recordBatch);
          } while (upstream == IterOutcome.OK && recordBatch.getRecordCount() == 0);

          return upstream;
        }
      }

Beispiel #10

0

Datei anzeigen

Datei: StreamingAggBatch.java Projekt: akshatthakar/drill

 @Override
 protected void killIncoming(boolean sendUpstream) {
   incoming.kill(sendUpstream);
 }

Beispiel #11

0

Datei anzeigen

Datei: StreamingAggBatch.java Projekt: akshatthakar/drill

  @Override
  public IterOutcome innerNext() {

    // if a special batch has been sent, we have no data in the incoming so exit early
    if (specialBatchSent) {
      return IterOutcome.NONE;
    }

    // this is only called on the first batch. Beyond this, the aggregator manages batches.
    if (aggregator == null || first) {
      IterOutcome outcome;
      if (first && incoming.getRecordCount() > 0) {
        first = false;
        outcome = IterOutcome.OK_NEW_SCHEMA;
      } else {
        outcome = next(incoming);
      }
      logger.debug("Next outcome of {}", outcome);
      switch (outcome) {
        case NONE:
          if (first && popConfig.getKeys().length == 0) {
            // if we have a straight aggregate and empty input batch, we need to handle it in a
            // different way
            constructSpecialBatch();
            first = false;
            // set state to indicate the fact that we have sent a special batch and input is empty
            specialBatchSent = true;
            return IterOutcome.OK;
          }
        case OUT_OF_MEMORY:
        case NOT_YET:
        case STOP:
          return outcome;
        case OK_NEW_SCHEMA:
          if (!createAggregator()) {
            done = true;
            return IterOutcome.STOP;
          }
          break;
        case OK:
          break;
        default:
          throw new IllegalStateException(String.format("unknown outcome %s", outcome));
      }
    }

    AggOutcome out = aggregator.doWork();
    recordCount = aggregator.getOutputCount();
    logger.debug("Aggregator response {}, records {}", out, aggregator.getOutputCount());
    switch (out) {
      case CLEANUP_AND_RETURN:
        if (!first) {
          container.zeroVectors();
        }
        done = true;
        // fall through
      case RETURN_OUTCOME:
        IterOutcome outcome = aggregator.getOutcome();
        if (outcome == IterOutcome.NONE && first) {
          first = false;
          done = true;
          return IterOutcome.OK_NEW_SCHEMA;
        } else if (outcome == IterOutcome.OK && first) {
          outcome = IterOutcome.OK_NEW_SCHEMA;
        } else if (outcome != IterOutcome.OUT_OF_MEMORY) {
          first = false;
        }
        return outcome;
      case UPDATE_AGGREGATOR:
        context.fail(
            UserException.unsupportedError()
                .message("Streaming aggregate does not support schema changes")
                .build(logger));
        close();
        killIncoming(false);
        return IterOutcome.STOP;
      default:
        throw new IllegalStateException(String.format("Unknown state %s.", out));
    }
  }

Beispiel #12

0

Datei anzeigen

Datei: ExternalSortBatch.java Projekt: laurentgo/drill

  @Override
  public IterOutcome innerNext() {
    if (schema != null) {
      if (spillCount == 0) {
        return (getSelectionVector4().next()) ? IterOutcome.OK : IterOutcome.NONE;
      } else {
        Stopwatch w = Stopwatch.createStarted();
        int count = copier.next(targetRecordCount);
        if (count > 0) {
          long t = w.elapsed(TimeUnit.MICROSECONDS);
          logger.debug("Took {} us to merge {} records", t, count);
          container.setRecordCount(count);
          return IterOutcome.OK;
        } else {
          logger.debug("copier returned 0 records");
          return IterOutcome.NONE;
        }
      }
    }

    int totalCount = 0;
    int totalBatches = 0; // total number of batches received so far

    try {
      container.clear();
      outer:
      while (true) {
        IterOutcome upstream;
        if (first) {
          upstream = IterOutcome.OK_NEW_SCHEMA;
        } else {
          upstream = next(incoming);
        }
        if (upstream == IterOutcome.OK && sorter == null) {
          upstream = IterOutcome.OK_NEW_SCHEMA;
        }
        switch (upstream) {
          case NONE:
            if (first) {
              return upstream;
            }
            break outer;
          case NOT_YET:
            throw new UnsupportedOperationException();
          case STOP:
            return upstream;
          case OK_NEW_SCHEMA:
          case OK:
            VectorContainer convertedBatch;
            // only change in the case that the schema truly changes.  Artificial schema changes are
            // ignored.
            if (upstream == IterOutcome.OK_NEW_SCHEMA && !incoming.getSchema().equals(schema)) {
              if (schema != null) {
                if (unionTypeEnabled) {
                  this.schema = SchemaUtil.mergeSchemas(schema, incoming.getSchema());
                } else {
                  throw new SchemaChangeException(
                      "Schema changes not supported in External Sort. Please enable Union type");
                }
              } else {
                schema = incoming.getSchema();
              }
              convertedBatch = SchemaUtil.coerceContainer(incoming, schema, oContext);
              for (BatchGroup b : batchGroups) {
                b.setSchema(schema);
              }
              for (BatchGroup b : spilledBatchGroups) {
                b.setSchema(schema);
              }
              this.sorter = createNewSorter(context, convertedBatch);
            } else {
              convertedBatch = SchemaUtil.coerceContainer(incoming, schema, oContext);
            }
            if (first) {
              first = false;
            }
            if (convertedBatch.getRecordCount() == 0) {
              for (VectorWrapper<?> w : convertedBatch) {
                w.clear();
              }
              break;
            }
            SelectionVector2 sv2;
            if (incoming.getSchema().getSelectionVectorMode()
                == BatchSchema.SelectionVectorMode.TWO_BYTE) {
              sv2 = incoming.getSelectionVector2().clone();
            } else {
              try {
                sv2 = newSV2();
              } catch (InterruptedException e) {
                return IterOutcome.STOP;
              } catch (OutOfMemoryException e) {
                throw new OutOfMemoryException(e);
              }
            }

            int count = sv2.getCount();
            totalCount += count;
            totalBatches++;
            sorter.setup(context, sv2, convertedBatch);
            sorter.sort(sv2);
            RecordBatchData rbd = new RecordBatchData(convertedBatch, oAllocator);
            boolean success = false;
            try {
              rbd.setSv2(sv2);
              batchGroups.add(new BatchGroup(rbd.getContainer(), rbd.getSv2(), oContext));
              if (peakNumBatches < batchGroups.size()) {
                peakNumBatches = batchGroups.size();
                stats.setLongStat(Metric.PEAK_BATCHES_IN_MEMORY, peakNumBatches);
              }

              batchesSinceLastSpill++;
              if ( // If we haven't spilled so far, do we have enough memory for MSorter if this
              // turns out to be the last incoming batch?
              (spillCount == 0 && !hasMemoryForInMemorySort(totalCount))
                  ||
                  // If we haven't spilled so far, make sure we don't exceed the maximum number of
                  // batches SV4 can address
                  (spillCount == 0 && totalBatches > Character.MAX_VALUE)
                  ||
                  // TODO(DRILL-4438) - consider setting this threshold more intelligently,
                  // lowering caused a failing low memory condition (test in
                  // BasicPhysicalOpUnitTest)
                  // to complete successfully (although it caused perf decrease as there was more
                  // spilling)

                  // current memory used is more than 95% of memory usage limit of this operator
                  (oAllocator.getAllocatedMemory() > .95 * oAllocator.getLimit())
                  ||
                  // Number of incoming batches (BatchGroups) exceed the limit and number of
                  // incoming batches accumulated
                  // since the last spill exceed the defined limit
                  (batchGroups.size() > SPILL_THRESHOLD
                      && batchesSinceLastSpill >= SPILL_BATCH_GROUP_SIZE)) {

                if (firstSpillBatchCount == 0) {
                  firstSpillBatchCount = batchGroups.size();
                }

                if (spilledBatchGroups.size() > firstSpillBatchCount / 2) {
                  logger.info("Merging spills");
                  final BatchGroup merged = mergeAndSpill(spilledBatchGroups);
                  if (merged != null) {
                    spilledBatchGroups.addFirst(merged);
                  }
                }
                final BatchGroup merged = mergeAndSpill(batchGroups);
                if (merged != null) { // make sure we don't add null to spilledBatchGroups
                  spilledBatchGroups.add(merged);
                  batchesSinceLastSpill = 0;
                }
              }
              success = true;
            } finally {
              if (!success) {
                rbd.clear();
              }
            }
            break;
          case OUT_OF_MEMORY:
            logger.debug("received OUT_OF_MEMORY, trying to spill");
            if (batchesSinceLastSpill > 2) {
              final BatchGroup merged = mergeAndSpill(batchGroups);
              if (merged != null) {
                spilledBatchGroups.add(merged);
                batchesSinceLastSpill = 0;
              }
            } else {
              logger.debug("not enough batches to spill, sending OUT_OF_MEMORY downstream");
              return IterOutcome.OUT_OF_MEMORY;
            }
            break;
          default:
            throw new UnsupportedOperationException();
        }
      }

      if (totalCount == 0) {
        return IterOutcome.NONE;
      }
      if (spillCount == 0) {

        if (builder != null) {
          builder.clear();
          builder.close();
        }
        builder = new SortRecordBatchBuilder(oAllocator);

        for (BatchGroup group : batchGroups) {
          RecordBatchData rbd = new RecordBatchData(group.getContainer(), oAllocator);
          rbd.setSv2(group.getSv2());
          builder.add(rbd);
        }

        builder.build(context, container);
        sv4 = builder.getSv4();
        mSorter = createNewMSorter();
        mSorter.setup(context, oAllocator, getSelectionVector4(), this.container);

        // For testing memory-leak purpose, inject exception after mSorter finishes setup
        injector.injectUnchecked(context.getExecutionControls(), INTERRUPTION_AFTER_SETUP);
        mSorter.sort(this.container);

        // sort may have prematurely exited due to should continue returning false.
        if (!context.shouldContinue()) {
          return IterOutcome.STOP;
        }

        // For testing memory-leak purpose, inject exception after mSorter finishes sorting
        injector.injectUnchecked(context.getExecutionControls(), INTERRUPTION_AFTER_SORT);
        sv4 = mSorter.getSV4();

        container.buildSchema(SelectionVectorMode.FOUR_BYTE);
      } else { // some batches were spilled
        final BatchGroup merged = mergeAndSpill(batchGroups);
        if (merged != null) {
          spilledBatchGroups.add(merged);
        }
        batchGroups.addAll(spilledBatchGroups);
        spilledBatchGroups =
            null; // no need to cleanup spilledBatchGroups, all it's batches are in batchGroups now

        logger.warn(
            "Starting to merge. {} batch groups. Current allocated memory: {}",
            batchGroups.size(),
            oAllocator.getAllocatedMemory());
        VectorContainer hyperBatch = constructHyperBatch(batchGroups);
        createCopier(hyperBatch, batchGroups, container, false);

        int estimatedRecordSize = 0;
        for (VectorWrapper<?> w : batchGroups.get(0)) {
          try {
            estimatedRecordSize += TypeHelper.getSize(w.getField().getType());
          } catch (UnsupportedOperationException e) {
            estimatedRecordSize += 50;
          }
        }
        targetRecordCount =
            Math.min(MAX_BATCH_SIZE, Math.max(1, COPIER_BATCH_MEM_LIMIT / estimatedRecordSize));
        int count = copier.next(targetRecordCount);
        container.buildSchema(SelectionVectorMode.NONE);
        container.setRecordCount(count);
      }

      return IterOutcome.OK_NEW_SCHEMA;

    } catch (SchemaChangeException ex) {
      kill(false);
      context.fail(
          UserException.unsupportedError(ex)
              .message("Sort doesn't currently support sorts with changing schemas")
              .build(logger));
      return IterOutcome.STOP;
    } catch (ClassTransformationException | IOException ex) {
      kill(false);
      context.fail(ex);
      return IterOutcome.STOP;
    } catch (UnsupportedOperationException e) {
      throw new RuntimeException(e);
    }
  }

Beispiel #13

0

Datei anzeigen

Datei: UnionAllRecordBatch.java Projekt: hnfgns/incubator-drill

  private IterOutcome doWork()
      throws ClassTransformationException, IOException, SchemaChangeException {
    if (allocationVectors != null) {
      for (ValueVector v : allocationVectors) {
        v.clear();
      }
    }

    allocationVectors = Lists.newArrayList();
    transfers.clear();

    final ClassGenerator<UnionAller> cg =
        CodeGenerator.getRoot(UnionAller.TEMPLATE_DEFINITION, context.getFunctionRegistry());
    int index = 0;
    for (VectorWrapper<?> vw : current) {
      ValueVector vvIn = vw.getValueVector();
      // get the original input column names
      SchemaPath inputPath = vvIn.getField().getPath();
      // get the renamed column names
      SchemaPath outputPath = outputFields.get(index).getPath();

      final ErrorCollector collector = new ErrorCollectorImpl();
      // According to input data names, Minortypes, Datamodes, choose to
      // transfer directly,
      // rename columns or
      // cast data types (Minortype or DataMode)
      if (hasSameTypeAndMode(outputFields.get(index), vw.getValueVector().getField())) {
        // Transfer column
        if (outputFields.get(index).getPath().equals(inputPath)) {
          final LogicalExpression expr =
              ExpressionTreeMaterializer.materialize(
                  inputPath, current, collector, context.getFunctionRegistry());
          if (collector.hasErrors()) {
            throw new SchemaChangeException(
                String.format(
                    "Failure while trying to materialize incoming schema.  Errors:\n %s.",
                    collector.toErrorString()));
          }

          ValueVectorReadExpression vectorRead = (ValueVectorReadExpression) expr;
          ValueVector vvOut =
              container.addOrGet(MaterializedField.create(outputPath, vectorRead.getMajorType()));
          TransferPair tp = vvIn.makeTransferPair(vvOut);
          transfers.add(tp);
          // Copy data in order to rename the column
        } else {
          final LogicalExpression expr =
              ExpressionTreeMaterializer.materialize(
                  inputPath, current, collector, context.getFunctionRegistry());
          if (collector.hasErrors()) {
            throw new SchemaChangeException(
                String.format(
                    "Failure while trying to materialize incoming schema.  Errors:\n %s.",
                    collector.toErrorString()));
          }

          MaterializedField outputField = MaterializedField.create(outputPath, expr.getMajorType());
          ValueVector vv = container.addOrGet(outputField, callBack);
          allocationVectors.add(vv);
          TypedFieldId fid = container.getValueVectorId(outputField.getPath());
          ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true);
          cg.addExpr(write);
        }
        // Cast is necessary
      } else {
        LogicalExpression expr =
            ExpressionTreeMaterializer.materialize(
                inputPath, current, collector, context.getFunctionRegistry());
        if (collector.hasErrors()) {
          throw new SchemaChangeException(
              String.format(
                  "Failure while trying to materialize incoming schema.  Errors:\n %s.",
                  collector.toErrorString()));
        }

        // If the inputs' DataMode is required and the outputs' DataMode is not required
        // cast to the one with the least restriction
        if (vvIn.getField().getType().getMode() == DataMode.REQUIRED
            && outputFields.get(index).getType().getMode() != DataMode.REQUIRED) {
          expr =
              ExpressionTreeMaterializer.convertToNullableType(
                  expr,
                  vvIn.getField().getType().getMinorType(),
                  context.getFunctionRegistry(),
                  collector);
          if (collector.hasErrors()) {
            throw new SchemaChangeException(
                String.format(
                    "Failure while trying to materialize incoming schema.  Errors:\n %s.",
                    collector.toErrorString()));
          }
        }

        // If two inputs' MinorTypes are different,
        // Insert a cast before the Union operation
        if (vvIn.getField().getType().getMinorType()
            != outputFields.get(index).getType().getMinorType()) {
          expr =
              ExpressionTreeMaterializer.addCastExpression(
                  expr,
                  outputFields.get(index).getType(),
                  context.getFunctionRegistry(),
                  collector);
          if (collector.hasErrors()) {
            throw new SchemaChangeException(
                String.format(
                    "Failure while trying to materialize incoming schema.  Errors:\n %s.",
                    collector.toErrorString()));
          }
        }

        final MaterializedField outputField =
            MaterializedField.create(outputPath, expr.getMajorType());
        ValueVector vector = container.addOrGet(outputField, callBack);
        allocationVectors.add(vector);
        TypedFieldId fid = container.getValueVectorId(outputField.getPath());

        boolean useSetSafe = !(vector instanceof FixedWidthVector);
        ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, useSetSafe);
        cg.addExpr(write);
      }
      ++index;
    }

    unionall = context.getImplementationClass(cg.getCodeGenerator());
    unionall.setup(context, current, this, transfers);

    if (!schemaAvailable) {
      container.buildSchema(BatchSchema.SelectionVectorMode.NONE);
      schemaAvailable = true;
    }

    if (!doAlloc()) {
      return IterOutcome.OUT_OF_MEMORY;
    }

    recordCount = unionall.unionRecords(0, current.getRecordCount(), 0);
    setValueCount(recordCount);
    return IterOutcome.OK;
  }

Beispiel #14

0

Datei anzeigen

Datei: OrderedPartitionRecordBatch.java Projekt: kameshb/incubator-drill

  @Override
  public IterOutcome innerNext() {
    container.zeroVectors();

    // if we got IterOutcome.NONE while getting partition vectors, and there are no batches on the
    // queue, then we are
    // done
    if (upstreamNone && (batchQueue == null || batchQueue.size() == 0)) return IterOutcome.NONE;

    // if there are batches on the queue, process them first, rather than calling incoming.next()
    if (batchQueue != null && batchQueue.size() > 0) {
      VectorContainer vc = batchQueue.poll();
      recordCount = vc.getRecordCount();
      try {

        // Must set up a new schema each time, because ValueVectors are not reused between
        // containers in queue
        setupNewSchema(vc);
      } catch (SchemaChangeException ex) {
        kill(false);
        logger.error("Failure during query", ex);
        context.fail(ex);
        return IterOutcome.STOP;
      }
      doWork(vc);
      vc.zeroVectors();
      return IterOutcome.OK_NEW_SCHEMA;
    }

    // Reaching this point, either this is the first iteration, or there are no batches left on the
    // queue and there are
    // more incoming
    IterOutcome upstream = next(incoming);

    if (this.first && upstream == IterOutcome.OK) {
      throw new RuntimeException("Invalid state: First batch should have OK_NEW_SCHEMA");
    }

    // If this is the first iteration, we need to generate the partition vectors before we can
    // proceed
    if (this.first && upstream == IterOutcome.OK_NEW_SCHEMA) {
      if (!getPartitionVectors()) {
        cleanup();
        return IterOutcome.STOP;
      }

      batchQueue = new LinkedBlockingQueue<>(this.sampledIncomingBatches);
      first = false;

      // Now that we have the partition vectors, we immediately process the first batch on the queue
      VectorContainer vc = batchQueue.poll();
      try {
        setupNewSchema(vc);
      } catch (SchemaChangeException ex) {
        kill(false);
        logger.error("Failure during query", ex);
        context.fail(ex);
        return IterOutcome.STOP;
      }
      doWork(vc);
      vc.zeroVectors();
      recordCount = vc.getRecordCount();
      return IterOutcome.OK_NEW_SCHEMA;
    }

    // if this now that all the batches on the queue are processed, we begin processing the incoming
    // batches. For the
    // first one
    // we need to generate a new schema, even if the outcome is IterOutcome.OK After that we can
    // reuse the schema.
    if (this.startedUnsampledBatches == false) {
      this.startedUnsampledBatches = true;
      if (upstream == IterOutcome.OK) upstream = IterOutcome.OK_NEW_SCHEMA;
    }
    switch (upstream) {
      case NONE:
      case NOT_YET:
      case STOP:
        cleanup();
        recordCount = 0;
        return upstream;
      case OK_NEW_SCHEMA:
        try {
          setupNewSchema(incoming);
        } catch (SchemaChangeException ex) {
          kill(false);
          logger.error("Failure during query", ex);
          context.fail(ex);
          return IterOutcome.STOP;
        }
        // fall through.
      case OK:
        doWork(incoming);
        recordCount = incoming.getRecordCount();
        return upstream; // change if upstream changed, otherwise normal.
      default:
        throw new UnsupportedOperationException();
    }
  }

Beispiel #15

0

Datei anzeigen

Datei: OrderedPartitionRecordBatch.java Projekt: kameshb/incubator-drill

  private boolean saveSamples()
      throws SchemaChangeException, ClassTransformationException, IOException {
    recordsSampled = 0;
    IterOutcome upstream;

    // Start collecting batches until recordsToSample records have been collected

    SortRecordBatchBuilder builder =
        new SortRecordBatchBuilder(oContext.getAllocator(), MAX_SORT_BYTES);
    builder.add(incoming);

    recordsSampled += incoming.getRecordCount();

    outer:
    while (recordsSampled < recordsToSample) {
      upstream = next(incoming);
      switch (upstream) {
        case NONE:
        case NOT_YET:
        case STOP:
          upstreamNone = true;
          break outer;
        default:
          // fall through
      }
      builder.add(incoming);
      recordsSampled += incoming.getRecordCount();
      if (upstream == IterOutcome.NONE) break;
    }
    VectorContainer sortedSamples = new VectorContainer();
    builder.build(context, sortedSamples);

    // Sort the records according the orderings given in the configuration

    Sorter sorter = SortBatch.createNewSorter(context, popConfig.getOrderings(), sortedSamples);
    SelectionVector4 sv4 = builder.getSv4();
    sorter.setup(context, sv4, sortedSamples);
    sorter.sort(sv4, sortedSamples);

    // Project every Nth record to a new vector container, where N = recordsSampled/(samplingFactor
    // * partitions).
    // Uses the
    // the expressions from the Orderings to populate each column. There is one column for each
    // Ordering in
    // popConfig.orderings.

    VectorContainer containerToCache = new VectorContainer();
    List<ValueVector> localAllocationVectors = Lists.newArrayList();
    SampleCopier copier =
        getCopier(
            sv4, sortedSamples, containerToCache, popConfig.getOrderings(), localAllocationVectors);
    int allocationSize = 50;
    while (true) {
      for (ValueVector vv : localAllocationVectors) {
        AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
      }
      if (copier.copyRecords(
          recordsSampled / (samplingFactor * partitions), 0, samplingFactor * partitions)) {
        break;
      } else {
        containerToCache.zeroVectors();
        allocationSize *= 2;
      }
    }
    for (VectorWrapper<?> vw : containerToCache) {
      vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
    }
    containerToCache.setRecordCount(copier.getOutputRecords());

    // Get a distributed multimap handle from the distributed cache, and put the vectors from the
    // new vector container
    // into a serializable wrapper object, and then add to distributed map

    WritableBatch batch =
        WritableBatch.getBatchNoHVWrap(containerToCache.getRecordCount(), containerToCache, false);
    CachedVectorContainer sampleToSave = new CachedVectorContainer(batch, context.getAllocator());

    mmap.put(mapKey, sampleToSave);
    this.sampledIncomingBatches = builder.getHeldRecordBatches();
    builder.clear();
    batch.clear();
    containerToCache.clear();
    sampleToSave.clear();
    return true;
  }