Пример #1
0
 @Override
 public void buildSchema() throws SchemaChangeException {
   IterOutcome outcome = next(incoming);
   switch (outcome) {
     case OK:
     case OK_NEW_SCHEMA:
       for (VectorWrapper<?> w : incoming) {
         ValueVector v = container.addOrGet(w.getField());
         if (v instanceof AbstractContainerVector) {
           w.getValueVector().makeTransferPair(v); // Can we remove this hack?
           v.clear();
         }
         v.allocateNew(); // Can we remove this? - SVR fails with NPE (TODO)
       }
       container.buildSchema(SelectionVectorMode.NONE);
       container.setRecordCount(0);
       break;
     case STOP:
       state = BatchState.STOP;
       break;
     case OUT_OF_MEMORY:
       state = BatchState.OUT_OF_MEMORY;
       break;
     case NONE:
       state = BatchState.DONE;
       break;
     default:
       break;
   }
 }
Пример #2
0
  public void testCommon(String[] expectedResults, String physicalPlan, String resourceFile)
      throws Exception {
    try (RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
        Drillbit bit = new Drillbit(CONFIG, serviceSet);
        DrillClient client = new DrillClient(CONFIG, serviceSet.getCoordinator())) {

      // run query.
      bit.run();
      client.connect();
      List<QueryDataBatch> results =
          client.runQuery(
              org.apache.drill.exec.proto.UserBitShared.QueryType.PHYSICAL,
              Files.toString(FileUtils.getResourceAsFile(physicalPlan), Charsets.UTF_8)
                  .replace("#{TEST_FILE}", resourceFile));

      RecordBatchLoader batchLoader = new RecordBatchLoader(bit.getContext().getAllocator());

      QueryDataBatch batch = results.get(0);
      assertTrue(batchLoader.load(batch.getHeader().getDef(), batch.getData()));

      int i = 0;
      for (VectorWrapper<?> v : batchLoader) {

        ValueVector.Accessor accessor = v.getValueVector().getAccessor();
        System.out.println(accessor.getObject(0));
        assertEquals(expectedResults[i++], accessor.getObject(0).toString());
      }

      batchLoader.clear();
      for (QueryDataBatch b : results) {
        b.release();
      }
    }
  }
Пример #3
0
 /**
  * Method is invoked when we have a straight aggregate (no group by expression) and our input is
  * empty. In this case we construct an outgoing batch with record count as 1. For the nullable
  * vectors we don't set anything as we want the output to be NULL. For the required vectors (only
  * for count()) we set the value to be zero since we don't zero out our buffers initially while
  * allocating them.
  */
 private void constructSpecialBatch() {
   int exprIndex = 0;
   for (final VectorWrapper<?> vw : container) {
     final ValueVector vv = vw.getValueVector();
     AllocationHelper.allocateNew(vv, SPECIAL_BATCH_COUNT);
     vv.getMutator().setValueCount(SPECIAL_BATCH_COUNT);
     if (vv.getField().getType().getMode() == TypeProtos.DataMode.REQUIRED) {
       if (vv instanceof FixedWidthVector) {
         /*
          * The only case we should have a required vector in the aggregate is for count function whose output is
          * always a FixedWidthVector (BigIntVector). Zero out the vector.
          */
         ((FixedWidthVector) vv).zeroVector();
       } else {
         /*
          * If we are in this else block it means that we have a required vector which is of variable length. We
          * should not be here, raising an error since we have set the record count to be 1 and not cleared the
          * buffer
          */
         throw new DrillRuntimeException(
             "FixedWidth vectors is the expected output vector type. "
                 + "Corresponding expression: "
                 + popConfig.getExprs()[exprIndex].toString());
       }
     }
     exprIndex++;
   }
   container.setRecordCount(SPECIAL_BATCH_COUNT);
   recordCount = SPECIAL_BATCH_COUNT;
 }
Пример #4
0
  private void createCopier(
      VectorAccessible batch,
      List<BatchGroup> batchGroupList,
      VectorContainer outputContainer,
      boolean spilling)
      throws SchemaChangeException {
    try {
      if (copier == null) {
        CodeGenerator<PriorityQueueCopier> cg =
            CodeGenerator.get(
                PriorityQueueCopier.TEMPLATE_DEFINITION,
                context.getFunctionRegistry(),
                context.getOptions());
        ClassGenerator<PriorityQueueCopier> g = cg.getRoot();

        generateComparisons(g, batch);

        g.setMappingSet(COPIER_MAPPING_SET);
        CopyUtil.generateCopies(g, batch, true);
        g.setMappingSet(MAIN_MAPPING);
        copier = context.getImplementationClass(cg);
      } else {
        copier.close();
      }

      BufferAllocator allocator = spilling ? copierAllocator : oAllocator;
      for (VectorWrapper<?> i : batch) {
        ValueVector v = TypeHelper.getNewVector(i.getField(), allocator);
        outputContainer.add(v);
      }
      copier.setup(context, allocator, batch, batchGroupList, outputContainer);
    } catch (ClassTransformationException | IOException e) {
      throw new RuntimeException(e);
    }
  }
Пример #5
0
 private VectorContainer getBatch() throws IOException {
   assert fs != null;
   assert path != null;
   if (inputStream == null) {
     inputStream = fs.open(path);
   }
   VectorAccessibleSerializable vas = new VectorAccessibleSerializable(allocator);
   Stopwatch watch = Stopwatch.createStarted();
   vas.readFromStream(inputStream);
   VectorContainer c = vas.get();
   if (schema != null) {
     c = SchemaUtil.coerceContainer(c, schema, context);
   }
   //    logger.debug("Took {} us to read {} records", watch.elapsed(TimeUnit.MICROSECONDS),
   // c.getRecordCount());
   spilledBatches--;
   currentContainer.zeroVectors();
   Iterator<VectorWrapper<?>> wrapperIterator = c.iterator();
   for (VectorWrapper w : currentContainer) {
     TransferPair pair =
         wrapperIterator.next().getValueVector().makeTransferPair(w.getValueVector());
     pair.transfer();
   }
   currentContainer.setRecordCount(c.getRecordCount());
   c.zeroVectors();
   return c;
 }
 @Override
 public void removeAllFields() {
   for (VectorWrapper<?> vw : container) {
     vw.clear();
   }
   container.clear();
   fieldVectorMap.clear();
 }
  /**
   * This method is called when the first batch comes in. Incoming batches are collected until a
   * threshold is met. At that point, the records in the batches are sorted and sampled, and the
   * sampled records are stored in the distributed cache. Once a sufficient fraction of the
   * fragments have shared their samples, each fragment grabs all the samples, sorts all the
   * records, builds a partition table, and attempts to push the partition table to the distributed
   * cache. Whichever table gets pushed first becomes the table used by all fragments for
   * partitioning.
   *
   * @return True is successful. False if failed.
   */
  private boolean getPartitionVectors() {

    try {

      if (!saveSamples()) {
        return false;
      }

      CachedVectorContainer finalTable = null;

      long val = minorFragmentSampleCount.incrementAndGet();
      logger.debug("Incremented mfsc, got {}", val);

      final long fragmentsBeforeProceed =
          (long) Math.ceil(sendingMajorFragmentWidth * completionFactor);
      final String finalTableKey = mapKey + "final";

      if (val == fragmentsBeforeProceed) { // we crossed the barrier, build table and get data.
        buildTable();
        finalTable = tableMap.get(finalTableKey);
      } else {
        // Wait until sufficient number of fragments have submitted samples, or proceed after xx ms
        // passed
        // TODO: this should be polling.

        if (val < fragmentsBeforeProceed) Thread.sleep(10);
        for (int i = 0; i < 100 && finalTable == null; i++) {
          finalTable = tableMap.get(finalTableKey);
          if (finalTable != null) {
            break;
          }
          Thread.sleep(10);
        }
        if (finalTable == null) {
          buildTable();
        }
        finalTable = tableMap.get(finalTableKey);
      }

      Preconditions.checkState(finalTable != null);

      // Extract vectors from the wrapper, and add to partition vectors. These vectors will be used
      // for partitioning in
      // the rest of this operator
      for (VectorWrapper<?> w : finalTable.get()) {
        partitionVectors.add(w.getValueVector());
      }
    } catch (ClassTransformationException
        | IOException
        | SchemaChangeException
        | InterruptedException ex) {
      kill(false);
      logger.error("Failure while building final partition table.", ex);
      context.fail(ex);
      return false;
    }
    return true;
  }
  /**
   * Sets up projection that will transfer all of the columns in batch, and also populate the
   * partition column based on which partition a record falls into in the partition table
   *
   * @param batch
   * @throws SchemaChangeException
   */
  protected void setupNewSchema(VectorAccessible batch) throws SchemaChangeException {
    container.clear();
    final ErrorCollector collector = new ErrorCollectorImpl();
    final List<TransferPair> transfers = Lists.newArrayList();

    final ClassGenerator<OrderedPartitionProjector> cg =
        CodeGenerator.getRoot(
            OrderedPartitionProjector.TEMPLATE_DEFINITION, context.getFunctionRegistry());

    for (VectorWrapper<?> vw : batch) {
      TransferPair tp = vw.getValueVector().getTransferPair();
      transfers.add(tp);
      container.add(tp.getTo());
    }

    cg.setMappingSet(mainMapping);

    int count = 0;
    for (Ordering od : popConfig.getOrderings()) {
      final LogicalExpression expr =
          ExpressionTreeMaterializer.materialize(
              od.getExpr(), batch, collector, context.getFunctionRegistry());
      if (collector.hasErrors())
        throw new SchemaChangeException(
            "Failure while materializing expression. " + collector.toErrorString());
      cg.setMappingSet(incomingMapping);
      ClassGenerator.HoldingContainer left = cg.addExpr(expr, false);
      cg.setMappingSet(partitionMapping);
      ClassGenerator.HoldingContainer right =
          cg.addExpr(
              new ValueVectorReadExpression(new TypedFieldId(expr.getMajorType(), count++)), false);
      cg.setMappingSet(mainMapping);

      LogicalExpression fh =
          FunctionGenerationHelper.getComparator(left, right, context.getFunctionRegistry());
      ClassGenerator.HoldingContainer out = cg.addExpr(fh, false);
      JConditional jc = cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0)));

      if (od.getDirection() == Direction.ASCENDING) {
        jc._then()._return(out.getValue());
      } else {
        jc._then()._return(out.getValue().minus());
      }
    }

    cg.getEvalBlock()._return(JExpr.lit(0));

    container.add(this.partitionKeyVector);
    container.buildSchema(batch.getSchema().getSelectionVectorMode());

    try {
      this.projector = context.getImplementationClass(cg);
      projector.setup(
          context, batch, this, transfers, partitionVectors, partitions, popConfig.getRef());
    } catch (ClassTransformationException | IOException e) {
      throw new SchemaChangeException("Failure while attempting to load generated class", e);
    }
  }
 protected void doWork(VectorAccessible batch) {
   int recordCount = batch.getRecordCount();
   AllocationHelper.allocate(partitionKeyVector, recordCount, 50);
   projector.projectRecords(recordCount, 0);
   for (VectorWrapper<?> v : container) {
     ValueVector.Mutator m = v.getValueVector().getMutator();
     m.setValueCount(recordCount);
   }
 }
Пример #10
0
  @Override
  public void copyRecords() {
    //    logger.debug("Copying records.");
    final int recordCount = sv4.getCount();
    allocateVectors(recordCount);
    int outgoingPosition = 0;
    for (int svIndex = 0; svIndex < sv4.getCount(); svIndex++, outgoingPosition++) {
      int deRefIndex = sv4.get(svIndex);
      doEval(deRefIndex, outgoingPosition);
    }

    for (VectorWrapper<?> v : incoming) {
      v.clear();
    }
  }
  @Test
  public void twoBitTwoExchange() throws Exception {
    RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();

    try (Drillbit bit1 = new Drillbit(CONFIG, serviceSet);
        Drillbit bit2 = new Drillbit(CONFIG, serviceSet);
        DrillClient client = new DrillClient(CONFIG, serviceSet.getCoordinator()); ) {

      bit1.run();
      bit2.run();
      client.connect();
      List<QueryResultBatch> results =
          client.runQuery(
              org.apache.drill.exec.proto.UserBitShared.QueryType.PHYSICAL,
              Files.toString(
                  FileUtils.getResourceAsFile("/mergerecv/merging_receiver.json"), Charsets.UTF_8));
      int count = 0;
      RecordBatchLoader batchLoader = new RecordBatchLoader(client.getAllocator());
      // print the results
      for (QueryResultBatch b : results) {
        count += b.getHeader().getRowCount();
        for (int valueIdx = 0; valueIdx < b.getHeader().getRowCount(); valueIdx++) {
          List<Object> row = Lists.newArrayList();
          batchLoader.load(b.getHeader().getDef(), b.getData());
          for (VectorWrapper<?> vw : batchLoader)
            row.add(
                vw.getValueVector().getField().toExpr()
                    + ":"
                    + vw.getValueVector().getAccessor().getObject(valueIdx));
          for (Object cell : row) {
            if (cell == null) {
              System.out.print("<null>    ");
              continue;
            }
            int len = cell.toString().length();
            System.out.print(cell + " ");
            for (int i = 0; i < (30 - len); ++i) System.out.print(" ");
          }
          System.out.println();
        }
        b.release();
        batchLoader.clear();
      }
      assertEquals(200, count);
    }
  }
  @Test
  public void testMultipleProvidersMixedSizes() throws Exception {
    RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();

    try (Drillbit bit1 = new Drillbit(CONFIG, serviceSet);
        Drillbit bit2 = new Drillbit(CONFIG, serviceSet);
        DrillClient client = new DrillClient(CONFIG, serviceSet.getCoordinator()); ) {

      bit1.run();
      bit2.run();
      client.connect();
      List<QueryResultBatch> results =
          client.runQuery(
              org.apache.drill.exec.proto.UserBitShared.QueryType.PHYSICAL,
              Files.toString(
                  FileUtils.getResourceAsFile("/mergerecv/multiple_providers.json"),
                  Charsets.UTF_8));
      int count = 0;
      RecordBatchLoader batchLoader = new RecordBatchLoader(client.getAllocator());
      // print the results
      Long lastBlueValue = null;
      for (QueryResultBatch b : results) {
        count += b.getHeader().getRowCount();
        for (int valueIdx = 0; valueIdx < b.getHeader().getRowCount(); valueIdx++) {
          List<Object> row = Lists.newArrayList();
          batchLoader.load(b.getHeader().getDef(), b.getData());
          for (VectorWrapper vw : batchLoader) {
            row.add(
                vw.getValueVector().getField().toExpr()
                    + ":"
                    + vw.getValueVector().getAccessor().getObject(valueIdx));
            if (vw.getValueVector()
                .getField()
                .getAsSchemaPath()
                .getRootSegment()
                .getPath()
                .equals("blue")) {
              // assert order is ascending
              if (((Long) vw.getValueVector().getAccessor().getObject(valueIdx)).longValue() == 0)
                continue; // ignore initial 0's from sort
              if (lastBlueValue != null)
                assertTrue(
                    ((Long) vw.getValueVector().getAccessor().getObject(valueIdx)).longValue()
                        >= ((Long) lastBlueValue).longValue());
              lastBlueValue = (Long) vw.getValueVector().getAccessor().getObject(valueIdx);
            }
          }
          for (Object cell : row) {
            int len = cell.toString().length();
            System.out.print(cell + " ");
            for (int i = 0; i < (30 - len); ++i) System.out.print(" ");
          }
          System.out.println();
        }
        b.release();
        batchLoader.clear();
      }
      assertEquals(400, count);
    }
  }
  @Test
  @Ignore
  public void testParseParquetPhysicalPlan() throws Exception {
    RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
    DrillConfig config = DrillConfig.create();

    try (Drillbit bit1 = new Drillbit(config, serviceSet);
        DrillClient client = new DrillClient(config, serviceSet.getCoordinator()); ) {
      bit1.run();
      client.connect();
      List<QueryDataBatch> results =
          client.runQuery(
              org.apache.drill.exec.proto.UserBitShared.QueryType.PHYSICAL,
              Resources.toString(Resources.getResource(fileName), Charsets.UTF_8));
      RecordBatchLoader loader = new RecordBatchLoader(bit1.getContext().getAllocator());
      int count = 0;
      for (QueryDataBatch b : results) {
        System.out.println(String.format("Got %d results", b.getHeader().getRowCount()));
        count += b.getHeader().getRowCount();
        loader.load(b.getHeader().getDef(), b.getData());
        for (VectorWrapper vw : loader) {
          System.out.print(vw.getValueVector().getField().toExpr() + ": ");
          ValueVector vv = vw.getValueVector();
          for (int i = 0; i < vv.getAccessor().getValueCount(); i++) {
            Object o = vv.getAccessor().getObject(i);
            if (o instanceof byte[]) {
              System.out.print(" [" + new String((byte[]) o) + "]");
            } else {
              System.out.print(" [" + vv.getAccessor().getObject(i) + "]");
            }
            //            break;
          }
          System.out.println();
        }
        loader.clear();
        b.release();
      }
      client.close();
      System.out.println(String.format("Got %d total results", count));
    }
  }
Пример #14
0
  @Override
  public void buildSchema() throws SchemaChangeException {
    IterOutcome outcome = next(incoming);
    switch (outcome) {
      case NONE:
        state = BatchState.DONE;
        container.buildSchema(SelectionVectorMode.NONE);
        return;
      case OUT_OF_MEMORY:
        state = BatchState.OUT_OF_MEMORY;
        return;
      case STOP:
        state = BatchState.STOP;
        return;
    }

    if (!createAggregator()) {
      state = BatchState.DONE;
    }
    for (final VectorWrapper<?> w : container) {
      w.getValueVector().allocateNew();
    }
  }
Пример #15
0
 @Override
 public void dataArrived(QueryDataBatch result, ConnectionThrottle throttle) {
   try {
     final int rows = result.getHeader().getRowCount();
     if (result.hasData()) {
       RecordBatchLoader loader = null;
       try {
         loader = new RecordBatchLoader(allocator);
         loader.load(result.getHeader().getDef(), result.getData());
         // TODO:  Clean:  DRILL-2933:  That load(...) no longer throws
         // SchemaChangeException, so check/clean catch clause below.
         for (int i = 0; i < loader.getSchema().getFieldCount(); ++i) {
           columns.add(loader.getSchema().getColumn(i).getPath());
         }
         for (int i = 0; i < rows; ++i) {
           final Map<String, String> record = Maps.newHashMap();
           for (VectorWrapper<?> vw : loader) {
             final String field = vw.getValueVector().getMetadata().getNamePart().getName();
             final ValueVector.Accessor accessor = vw.getValueVector().getAccessor();
             final Object value = i < accessor.getValueCount() ? accessor.getObject(i) : null;
             final String display = value == null ? null : value.toString();
             record.put(field, display);
           }
           results.add(record);
         }
       } finally {
         if (loader != null) {
           loader.clear();
         }
       }
     }
   } catch (SchemaChangeException e) {
     throw new RuntimeException(e);
   } finally {
     result.release();
   }
 }
  private IterOutcome doWork()
      throws ClassTransformationException, IOException, SchemaChangeException {
    if (allocationVectors != null) {
      for (ValueVector v : allocationVectors) {
        v.clear();
      }
    }

    allocationVectors = Lists.newArrayList();
    transfers.clear();

    final ClassGenerator<UnionAller> cg =
        CodeGenerator.getRoot(UnionAller.TEMPLATE_DEFINITION, context.getFunctionRegistry());
    int index = 0;
    for (VectorWrapper<?> vw : current) {
      ValueVector vvIn = vw.getValueVector();
      // get the original input column names
      SchemaPath inputPath = vvIn.getField().getPath();
      // get the renamed column names
      SchemaPath outputPath = outputFields.get(index).getPath();

      final ErrorCollector collector = new ErrorCollectorImpl();
      // According to input data names, Minortypes, Datamodes, choose to
      // transfer directly,
      // rename columns or
      // cast data types (Minortype or DataMode)
      if (hasSameTypeAndMode(outputFields.get(index), vw.getValueVector().getField())) {
        // Transfer column
        if (outputFields.get(index).getPath().equals(inputPath)) {
          final LogicalExpression expr =
              ExpressionTreeMaterializer.materialize(
                  inputPath, current, collector, context.getFunctionRegistry());
          if (collector.hasErrors()) {
            throw new SchemaChangeException(
                String.format(
                    "Failure while trying to materialize incoming schema.  Errors:\n %s.",
                    collector.toErrorString()));
          }

          ValueVectorReadExpression vectorRead = (ValueVectorReadExpression) expr;
          ValueVector vvOut =
              container.addOrGet(MaterializedField.create(outputPath, vectorRead.getMajorType()));
          TransferPair tp = vvIn.makeTransferPair(vvOut);
          transfers.add(tp);
          // Copy data in order to rename the column
        } else {
          final LogicalExpression expr =
              ExpressionTreeMaterializer.materialize(
                  inputPath, current, collector, context.getFunctionRegistry());
          if (collector.hasErrors()) {
            throw new SchemaChangeException(
                String.format(
                    "Failure while trying to materialize incoming schema.  Errors:\n %s.",
                    collector.toErrorString()));
          }

          MaterializedField outputField = MaterializedField.create(outputPath, expr.getMajorType());
          ValueVector vv = container.addOrGet(outputField, callBack);
          allocationVectors.add(vv);
          TypedFieldId fid = container.getValueVectorId(outputField.getPath());
          ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true);
          cg.addExpr(write);
        }
        // Cast is necessary
      } else {
        LogicalExpression expr =
            ExpressionTreeMaterializer.materialize(
                inputPath, current, collector, context.getFunctionRegistry());
        if (collector.hasErrors()) {
          throw new SchemaChangeException(
              String.format(
                  "Failure while trying to materialize incoming schema.  Errors:\n %s.",
                  collector.toErrorString()));
        }

        // If the inputs' DataMode is required and the outputs' DataMode is not required
        // cast to the one with the least restriction
        if (vvIn.getField().getType().getMode() == DataMode.REQUIRED
            && outputFields.get(index).getType().getMode() != DataMode.REQUIRED) {
          expr =
              ExpressionTreeMaterializer.convertToNullableType(
                  expr,
                  vvIn.getField().getType().getMinorType(),
                  context.getFunctionRegistry(),
                  collector);
          if (collector.hasErrors()) {
            throw new SchemaChangeException(
                String.format(
                    "Failure while trying to materialize incoming schema.  Errors:\n %s.",
                    collector.toErrorString()));
          }
        }

        // If two inputs' MinorTypes are different,
        // Insert a cast before the Union operation
        if (vvIn.getField().getType().getMinorType()
            != outputFields.get(index).getType().getMinorType()) {
          expr =
              ExpressionTreeMaterializer.addCastExpression(
                  expr,
                  outputFields.get(index).getType(),
                  context.getFunctionRegistry(),
                  collector);
          if (collector.hasErrors()) {
            throw new SchemaChangeException(
                String.format(
                    "Failure while trying to materialize incoming schema.  Errors:\n %s.",
                    collector.toErrorString()));
          }
        }

        final MaterializedField outputField =
            MaterializedField.create(outputPath, expr.getMajorType());
        ValueVector vector = container.addOrGet(outputField, callBack);
        allocationVectors.add(vector);
        TypedFieldId fid = container.getValueVectorId(outputField.getPath());

        boolean useSetSafe = !(vector instanceof FixedWidthVector);
        ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, useSetSafe);
        cg.addExpr(write);
      }
      ++index;
    }

    unionall = context.getImplementationClass(cg.getCodeGenerator());
    unionall.setup(context, current, this, transfers);

    if (!schemaAvailable) {
      container.buildSchema(BatchSchema.SelectionVectorMode.NONE);
      schemaAvailable = true;
    }

    if (!doAlloc()) {
      return IterOutcome.OUT_OF_MEMORY;
    }

    recordCount = unionall.unionRecords(0, current.getRecordCount(), 0);
    setValueCount(recordCount);
    return IterOutcome.OK;
  }
  private void buildTable()
      throws SchemaChangeException, ClassTransformationException, IOException {

    // Get all samples from distributed map

    SortRecordBatchBuilder containerBuilder =
        new SortRecordBatchBuilder(context.getAllocator(), MAX_SORT_BYTES);
    for (CachedVectorContainer w : mmap.get(mapKey)) {
      containerBuilder.add(w.get());
    }
    VectorContainer allSamplesContainer = new VectorContainer();
    containerBuilder.build(context, allSamplesContainer);

    List<Ordering> orderDefs = Lists.newArrayList();
    int i = 0;
    for (Ordering od : popConfig.getOrderings()) {
      SchemaPath sp = SchemaPath.getSimplePath("f" + i++);
      orderDefs.add(new Ordering(od.getDirection(), new FieldReference(sp)));
    }

    // sort the data incoming samples.
    SelectionVector4 newSv4 = containerBuilder.getSv4();
    Sorter sorter = SortBatch.createNewSorter(context, orderDefs, allSamplesContainer);
    sorter.setup(context, newSv4, allSamplesContainer);
    sorter.sort(newSv4, allSamplesContainer);

    // Copy every Nth record from the samples into a candidate partition table, where N =
    // totalSampledRecords/partitions
    // Attempt to push this to the distributed map. Only the first candidate to get pushed will be
    // used.
    VectorContainer candidatePartitionTable = new VectorContainer();
    SampleCopier copier = null;
    List<ValueVector> localAllocationVectors = Lists.newArrayList();
    copier =
        getCopier(
            newSv4,
            allSamplesContainer,
            candidatePartitionTable,
            orderDefs,
            localAllocationVectors);
    int allocationSize = 50;
    while (true) {
      for (ValueVector vv : localAllocationVectors) {
        AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
      }
      int skipRecords = containerBuilder.getSv4().getTotalCount() / partitions;
      if (copier.copyRecords(skipRecords, skipRecords, partitions - 1)) {
        assert copier.getOutputRecords() == partitions - 1
            : String.format(
                "output records: %d partitions: %d", copier.getOutputRecords(), partitions);
        for (VectorWrapper<?> vw : candidatePartitionTable) {
          vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
        }
        break;
      } else {
        candidatePartitionTable.zeroVectors();
        allocationSize *= 2;
      }
    }
    candidatePartitionTable.setRecordCount(copier.getOutputRecords());
    WritableBatch batch =
        WritableBatch.getBatchNoHVWrap(
            candidatePartitionTable.getRecordCount(), candidatePartitionTable, false);
    CachedVectorContainer wrap =
        new CachedVectorContainer(batch, context.getDrillbitContext().getAllocator());
    tableMap.putIfAbsent(mapKey + "final", wrap, 1, TimeUnit.MINUTES);

    candidatePartitionTable.clear();
    allSamplesContainer.clear();
    containerBuilder.clear();
    wrap.clear();
  }
Пример #18
0
  @Override
  public IterOutcome innerNext() {
    if (schema != null) {
      if (spillCount == 0) {
        return (getSelectionVector4().next()) ? IterOutcome.OK : IterOutcome.NONE;
      } else {
        Stopwatch w = Stopwatch.createStarted();
        int count = copier.next(targetRecordCount);
        if (count > 0) {
          long t = w.elapsed(TimeUnit.MICROSECONDS);
          logger.debug("Took {} us to merge {} records", t, count);
          container.setRecordCount(count);
          return IterOutcome.OK;
        } else {
          logger.debug("copier returned 0 records");
          return IterOutcome.NONE;
        }
      }
    }

    int totalCount = 0;
    int totalBatches = 0; // total number of batches received so far

    try {
      container.clear();
      outer:
      while (true) {
        IterOutcome upstream;
        if (first) {
          upstream = IterOutcome.OK_NEW_SCHEMA;
        } else {
          upstream = next(incoming);
        }
        if (upstream == IterOutcome.OK && sorter == null) {
          upstream = IterOutcome.OK_NEW_SCHEMA;
        }
        switch (upstream) {
          case NONE:
            if (first) {
              return upstream;
            }
            break outer;
          case NOT_YET:
            throw new UnsupportedOperationException();
          case STOP:
            return upstream;
          case OK_NEW_SCHEMA:
          case OK:
            VectorContainer convertedBatch;
            // only change in the case that the schema truly changes.  Artificial schema changes are
            // ignored.
            if (upstream == IterOutcome.OK_NEW_SCHEMA && !incoming.getSchema().equals(schema)) {
              if (schema != null) {
                if (unionTypeEnabled) {
                  this.schema = SchemaUtil.mergeSchemas(schema, incoming.getSchema());
                } else {
                  throw new SchemaChangeException(
                      "Schema changes not supported in External Sort. Please enable Union type");
                }
              } else {
                schema = incoming.getSchema();
              }
              convertedBatch = SchemaUtil.coerceContainer(incoming, schema, oContext);
              for (BatchGroup b : batchGroups) {
                b.setSchema(schema);
              }
              for (BatchGroup b : spilledBatchGroups) {
                b.setSchema(schema);
              }
              this.sorter = createNewSorter(context, convertedBatch);
            } else {
              convertedBatch = SchemaUtil.coerceContainer(incoming, schema, oContext);
            }
            if (first) {
              first = false;
            }
            if (convertedBatch.getRecordCount() == 0) {
              for (VectorWrapper<?> w : convertedBatch) {
                w.clear();
              }
              break;
            }
            SelectionVector2 sv2;
            if (incoming.getSchema().getSelectionVectorMode()
                == BatchSchema.SelectionVectorMode.TWO_BYTE) {
              sv2 = incoming.getSelectionVector2().clone();
            } else {
              try {
                sv2 = newSV2();
              } catch (InterruptedException e) {
                return IterOutcome.STOP;
              } catch (OutOfMemoryException e) {
                throw new OutOfMemoryException(e);
              }
            }

            int count = sv2.getCount();
            totalCount += count;
            totalBatches++;
            sorter.setup(context, sv2, convertedBatch);
            sorter.sort(sv2);
            RecordBatchData rbd = new RecordBatchData(convertedBatch, oAllocator);
            boolean success = false;
            try {
              rbd.setSv2(sv2);
              batchGroups.add(new BatchGroup(rbd.getContainer(), rbd.getSv2(), oContext));
              if (peakNumBatches < batchGroups.size()) {
                peakNumBatches = batchGroups.size();
                stats.setLongStat(Metric.PEAK_BATCHES_IN_MEMORY, peakNumBatches);
              }

              batchesSinceLastSpill++;
              if ( // If we haven't spilled so far, do we have enough memory for MSorter if this
              // turns out to be the last incoming batch?
              (spillCount == 0 && !hasMemoryForInMemorySort(totalCount))
                  ||
                  // If we haven't spilled so far, make sure we don't exceed the maximum number of
                  // batches SV4 can address
                  (spillCount == 0 && totalBatches > Character.MAX_VALUE)
                  ||
                  // TODO(DRILL-4438) - consider setting this threshold more intelligently,
                  // lowering caused a failing low memory condition (test in
                  // BasicPhysicalOpUnitTest)
                  // to complete successfully (although it caused perf decrease as there was more
                  // spilling)

                  // current memory used is more than 95% of memory usage limit of this operator
                  (oAllocator.getAllocatedMemory() > .95 * oAllocator.getLimit())
                  ||
                  // Number of incoming batches (BatchGroups) exceed the limit and number of
                  // incoming batches accumulated
                  // since the last spill exceed the defined limit
                  (batchGroups.size() > SPILL_THRESHOLD
                      && batchesSinceLastSpill >= SPILL_BATCH_GROUP_SIZE)) {

                if (firstSpillBatchCount == 0) {
                  firstSpillBatchCount = batchGroups.size();
                }

                if (spilledBatchGroups.size() > firstSpillBatchCount / 2) {
                  logger.info("Merging spills");
                  final BatchGroup merged = mergeAndSpill(spilledBatchGroups);
                  if (merged != null) {
                    spilledBatchGroups.addFirst(merged);
                  }
                }
                final BatchGroup merged = mergeAndSpill(batchGroups);
                if (merged != null) { // make sure we don't add null to spilledBatchGroups
                  spilledBatchGroups.add(merged);
                  batchesSinceLastSpill = 0;
                }
              }
              success = true;
            } finally {
              if (!success) {
                rbd.clear();
              }
            }
            break;
          case OUT_OF_MEMORY:
            logger.debug("received OUT_OF_MEMORY, trying to spill");
            if (batchesSinceLastSpill > 2) {
              final BatchGroup merged = mergeAndSpill(batchGroups);
              if (merged != null) {
                spilledBatchGroups.add(merged);
                batchesSinceLastSpill = 0;
              }
            } else {
              logger.debug("not enough batches to spill, sending OUT_OF_MEMORY downstream");
              return IterOutcome.OUT_OF_MEMORY;
            }
            break;
          default:
            throw new UnsupportedOperationException();
        }
      }

      if (totalCount == 0) {
        return IterOutcome.NONE;
      }
      if (spillCount == 0) {

        if (builder != null) {
          builder.clear();
          builder.close();
        }
        builder = new SortRecordBatchBuilder(oAllocator);

        for (BatchGroup group : batchGroups) {
          RecordBatchData rbd = new RecordBatchData(group.getContainer(), oAllocator);
          rbd.setSv2(group.getSv2());
          builder.add(rbd);
        }

        builder.build(context, container);
        sv4 = builder.getSv4();
        mSorter = createNewMSorter();
        mSorter.setup(context, oAllocator, getSelectionVector4(), this.container);

        // For testing memory-leak purpose, inject exception after mSorter finishes setup
        injector.injectUnchecked(context.getExecutionControls(), INTERRUPTION_AFTER_SETUP);
        mSorter.sort(this.container);

        // sort may have prematurely exited due to should continue returning false.
        if (!context.shouldContinue()) {
          return IterOutcome.STOP;
        }

        // For testing memory-leak purpose, inject exception after mSorter finishes sorting
        injector.injectUnchecked(context.getExecutionControls(), INTERRUPTION_AFTER_SORT);
        sv4 = mSorter.getSV4();

        container.buildSchema(SelectionVectorMode.FOUR_BYTE);
      } else { // some batches were spilled
        final BatchGroup merged = mergeAndSpill(batchGroups);
        if (merged != null) {
          spilledBatchGroups.add(merged);
        }
        batchGroups.addAll(spilledBatchGroups);
        spilledBatchGroups =
            null; // no need to cleanup spilledBatchGroups, all it's batches are in batchGroups now

        logger.warn(
            "Starting to merge. {} batch groups. Current allocated memory: {}",
            batchGroups.size(),
            oAllocator.getAllocatedMemory());
        VectorContainer hyperBatch = constructHyperBatch(batchGroups);
        createCopier(hyperBatch, batchGroups, container, false);

        int estimatedRecordSize = 0;
        for (VectorWrapper<?> w : batchGroups.get(0)) {
          try {
            estimatedRecordSize += TypeHelper.getSize(w.getField().getType());
          } catch (UnsupportedOperationException e) {
            estimatedRecordSize += 50;
          }
        }
        targetRecordCount =
            Math.min(MAX_BATCH_SIZE, Math.max(1, COPIER_BATCH_MEM_LIMIT / estimatedRecordSize));
        int count = copier.next(targetRecordCount);
        container.buildSchema(SelectionVectorMode.NONE);
        container.setRecordCount(count);
      }

      return IterOutcome.OK_NEW_SCHEMA;

    } catch (SchemaChangeException ex) {
      kill(false);
      context.fail(
          UserException.unsupportedError(ex)
              .message("Sort doesn't currently support sorts with changing schemas")
              .build(logger));
      return IterOutcome.STOP;
    } catch (ClassTransformationException | IOException ex) {
      kill(false);
      context.fail(ex);
      return IterOutcome.STOP;
    } catch (UnsupportedOperationException e) {
      throw new RuntimeException(e);
    }
  }
Пример #19
0
  public BatchGroup mergeAndSpill(LinkedList<BatchGroup> batchGroups) throws SchemaChangeException {
    logger.debug("Copier allocator current allocation {}", copierAllocator.getAllocatedMemory());
    logger.debug(
        "mergeAndSpill: starting total size in memory = {}", oAllocator.getAllocatedMemory());
    VectorContainer outputContainer = new VectorContainer();
    List<BatchGroup> batchGroupList = Lists.newArrayList();
    int batchCount = batchGroups.size();
    for (int i = 0; i < batchCount / 2; i++) {
      if (batchGroups.size() == 0) {
        break;
      }
      BatchGroup batch = batchGroups.pollLast();
      assert batch != null : "Encountered a null batch during merge and spill operation";
      batchGroupList.add(batch);
    }

    if (batchGroupList.size() == 0) {
      return null;
    }
    int estimatedRecordSize = 0;
    for (VectorWrapper<?> w : batchGroupList.get(0)) {
      try {
        estimatedRecordSize += TypeHelper.getSize(w.getField().getType());
      } catch (UnsupportedOperationException e) {
        estimatedRecordSize += 50;
      }
    }
    int targetRecordCount = Math.max(1, COPIER_BATCH_MEM_LIMIT / estimatedRecordSize);
    VectorContainer hyperBatch = constructHyperBatch(batchGroupList);
    createCopier(hyperBatch, batchGroupList, outputContainer, true);

    int count = copier.next(targetRecordCount);
    assert count > 0;

    logger.debug(
        "mergeAndSpill: estimated record size = {}, target record count = {}",
        estimatedRecordSize,
        targetRecordCount);

    // 1 output container is kept in memory, so we want to hold on to it and transferClone
    // allows keeping ownership
    VectorContainer c1 = VectorContainer.getTransferClone(outputContainer, oContext);
    c1.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    c1.setRecordCount(count);

    String spillDir = dirs.next();
    Path currSpillPath = new Path(Joiner.on("/").join(spillDir, fileName));
    currSpillDirs.add(currSpillPath);
    String outputFile = Joiner.on("/").join(currSpillPath, spillCount++);
    try {
      fs.deleteOnExit(currSpillPath);
    } catch (IOException e) {
      // since this is meant to be used in a batches's spilling, we don't propagate the exception
      logger.warn("Unable to mark spill directory " + currSpillPath + " for deleting on exit", e);
    }
    stats.setLongStat(Metric.SPILL_COUNT, spillCount);
    BatchGroup newGroup = new BatchGroup(c1, fs, outputFile, oContext);
    try (AutoCloseable a = AutoCloseables.all(batchGroupList)) {
      logger.info("Merging and spilling to {}", outputFile);
      while ((count = copier.next(targetRecordCount)) > 0) {
        outputContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
        outputContainer.setRecordCount(count);
        // note that addBatch also clears the outputContainer
        newGroup.addBatch(outputContainer);
      }
      injector.injectChecked(
          context.getExecutionControls(), INTERRUPTION_WHILE_SPILLING, IOException.class);
      newGroup.closeOutputStream();
    } catch (Throwable e) {
      // we only need to cleanup newGroup if spill failed
      try {
        AutoCloseables.close(e, newGroup);
      } catch (Throwable t) {
        /* close() may hit the same IO issue; just ignore */
      }
      throw UserException.resourceError(e)
          .message("External Sort encountered an error while spilling to disk")
          .addContext(e.getMessage() /* more detail */)
          .build(logger);
    } finally {
      hyperBatch.clear();
    }
    logger.debug("mergeAndSpill: final total size in memory = {}", oAllocator.getAllocatedMemory());
    logger.info("Completed spilling to {}", outputFile);
    return newGroup;
  }
 // This method is used by inner class to clear the current record batch
 private void clearCurrentRecordBatch() {
   for (VectorWrapper<?> v : current) {
     v.clear();
   }
 }
  private boolean saveSamples()
      throws SchemaChangeException, ClassTransformationException, IOException {
    recordsSampled = 0;
    IterOutcome upstream;

    // Start collecting batches until recordsToSample records have been collected

    SortRecordBatchBuilder builder =
        new SortRecordBatchBuilder(oContext.getAllocator(), MAX_SORT_BYTES);
    builder.add(incoming);

    recordsSampled += incoming.getRecordCount();

    outer:
    while (recordsSampled < recordsToSample) {
      upstream = next(incoming);
      switch (upstream) {
        case NONE:
        case NOT_YET:
        case STOP:
          upstreamNone = true;
          break outer;
        default:
          // fall through
      }
      builder.add(incoming);
      recordsSampled += incoming.getRecordCount();
      if (upstream == IterOutcome.NONE) break;
    }
    VectorContainer sortedSamples = new VectorContainer();
    builder.build(context, sortedSamples);

    // Sort the records according the orderings given in the configuration

    Sorter sorter = SortBatch.createNewSorter(context, popConfig.getOrderings(), sortedSamples);
    SelectionVector4 sv4 = builder.getSv4();
    sorter.setup(context, sv4, sortedSamples);
    sorter.sort(sv4, sortedSamples);

    // Project every Nth record to a new vector container, where N = recordsSampled/(samplingFactor
    // * partitions).
    // Uses the
    // the expressions from the Orderings to populate each column. There is one column for each
    // Ordering in
    // popConfig.orderings.

    VectorContainer containerToCache = new VectorContainer();
    List<ValueVector> localAllocationVectors = Lists.newArrayList();
    SampleCopier copier =
        getCopier(
            sv4, sortedSamples, containerToCache, popConfig.getOrderings(), localAllocationVectors);
    int allocationSize = 50;
    while (true) {
      for (ValueVector vv : localAllocationVectors) {
        AllocationHelper.allocate(vv, samplingFactor * partitions, allocationSize);
      }
      if (copier.copyRecords(
          recordsSampled / (samplingFactor * partitions), 0, samplingFactor * partitions)) {
        break;
      } else {
        containerToCache.zeroVectors();
        allocationSize *= 2;
      }
    }
    for (VectorWrapper<?> vw : containerToCache) {
      vw.getValueVector().getMutator().setValueCount(copier.getOutputRecords());
    }
    containerToCache.setRecordCount(copier.getOutputRecords());

    // Get a distributed multimap handle from the distributed cache, and put the vectors from the
    // new vector container
    // into a serializable wrapper object, and then add to distributed map

    WritableBatch batch =
        WritableBatch.getBatchNoHVWrap(containerToCache.getRecordCount(), containerToCache, false);
    CachedVectorContainer sampleToSave = new CachedVectorContainer(batch, context.getAllocator());

    mmap.put(mapKey, sampleToSave);
    this.sampledIncomingBatches = builder.getHeldRecordBatches();
    builder.clear();
    batch.clear();
    containerToCache.clear();
    sampleToSave.clear();
    return true;
  }