Example #1
0
  @Override
  public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
    this.output = output;
    this.context = context;
    try {
      KuduTable table = client.openTable(scanSpec.getTableName());

      KuduScannerBuilder builder = client.newScannerBuilder(table);
      if (!isStarQuery()) {
        List<String> colNames = Lists.newArrayList();
        for (SchemaPath p : this.getColumns()) {
          colNames.add(p.getAsUnescapedPath());
        }
        builder.setProjectedColumnNames(colNames);
      }

      context.getStats().startWait();
      try {
        scanner =
            builder
                .lowerBoundPartitionKeyRaw(scanSpec.getStartKey())
                .exclusiveUpperBoundPartitionKeyRaw(scanSpec.getEndKey())
                .build();
      } finally {
        context.getStats().stopWait();
      }
    } catch (Exception e) {
      throw new ExecutionSetupException(e);
    }
  }
Example #2
0
  /**
   * Creates a copy a record batch, converting any fields as necessary to coerce it into the
   * provided schema
   *
   * @param in
   * @param toSchema
   * @param context
   * @return
   */
  public static VectorContainer coerceContainer(
      VectorAccessible in, BatchSchema toSchema, OperatorContext context) {
    int recordCount = in.getRecordCount();
    Map<SchemaPath, ValueVector> vectorMap = Maps.newHashMap();
    for (VectorWrapper w : in) {
      ValueVector v = w.getValueVector();
      vectorMap.put(v.getField().getPath(), v);
    }

    VectorContainer c = new VectorContainer(context);

    for (MaterializedField field : toSchema) {
      ValueVector v = vectorMap.remove(field.getPath());
      if (v != null) {
        int valueCount = v.getAccessor().getValueCount();
        TransferPair tp = v.getTransferPair();
        tp.transfer();
        if (v.getField().getType().getMinorType().equals(field.getType().getMinorType())) {
          if (field.getType().getMinorType() == MinorType.UNION) {
            UnionVector u = (UnionVector) tp.getTo();
            for (MinorType t : field.getType().getSubTypeList()) {
              if (u.getField().getType().getSubTypeList().contains(t)) {
                continue;
              }
              u.addSubType(t);
            }
          }
          c.add(tp.getTo());
        } else {
          ValueVector newVector = TypeHelper.getNewVector(field, context.getAllocator());
          Preconditions.checkState(
              field.getType().getMinorType() == MinorType.UNION,
              "Can only convert vector to Union vector");
          UnionVector u = (UnionVector) newVector;
          u.addVector(tp.getTo());
          MinorType type = v.getField().getType().getMinorType();
          for (int i = 0; i < valueCount; i++) {
            u.getMutator().setType(i, type);
          }
          for (MinorType t : field.getType().getSubTypeList()) {
            if (u.getField().getType().getSubTypeList().contains(t)) {
              continue;
            }
            u.addSubType(t);
          }
          u.getMutator().setValueCount(valueCount);
          c.add(u);
        }
      } else {
        v = TypeHelper.getNewVector(field, context.getAllocator());
        v.allocateNew();
        v.getMutator().setValueCount(recordCount);
        c.add(v);
      }
    }
    c.buildSchema(in.getSchema().getSelectionVectorMode());
    c.setRecordCount(recordCount);
    Preconditions.checkState(vectorMap.size() == 0, "Leftover vector from incoming batch");
    return c;
  }
Example #3
0
 @Override
 public int next() {
   int rowCount = 0;
   try {
     while (iterator == null || !iterator.hasNext()) {
       if (!scanner.hasMoreRows()) {
         iterator = null;
         return 0;
       }
       context.getStats().startWait();
       try {
         iterator = scanner.nextRows();
       } finally {
         context.getStats().stopWait();
       }
     }
     for (; rowCount < TARGET_RECORD_COUNT && iterator.hasNext(); rowCount++) {
       addRowResult(iterator.next(), rowCount);
     }
   } catch (Exception ex) {
     throw new RuntimeException(ex);
   }
   for (ProjectedColumnInfo pci : projectedCols) {
     pci.vv.getMutator().setValueCount(rowCount);
   }
   return rowCount;
 }
Example #4
0
 public BatchGroup(
     VectorContainer container, FileSystem fs, String path, OperatorContext context) {
   currentContainer = container;
   this.fs = fs;
   this.path = new Path(path);
   this.allocator = context.getAllocator();
   this.context = context;
 }
  /**
   * Performs the initial setup required for the record reader. Initializes the input stream,
   * handling of the output record batch and the actual reader to be used.
   *
   * @param context operator context from which buffer's will be allocated and managed
   * @param outputMutator Used to create the schema in the output record batch
   * @throws ExecutionSetupException
   */
  @Override
  public void setup(OperatorContext context, OutputMutator outputMutator)
      throws ExecutionSetupException {

    oContext = context;
    readBuffer = context.getManagedBuffer(READ_BUFFER);
    whitespaceBuffer = context.getManagedBuffer(WHITE_SPACE_BUFFER);

    // setup Output, Input, and Reader
    try {
      TextOutput output = null;
      TextInput input = null;
      InputStream stream = null;

      // setup Output using OutputMutator
      if (settings.isHeaderExtractionEnabled()) {
        // extract header and use that to setup a set of VarCharVectors
        String[] fieldNames = extractHeader();
        output = new FieldVarCharOutput(outputMutator, fieldNames, getColumns(), isStarQuery());
      } else {
        // simply use RepeatedVarCharVector
        output = new RepeatedVarCharOutput(outputMutator, getColumns(), isStarQuery());
      }

      // setup Input using InputStream
      stream = dfs.openPossiblyCompressedStream(split.getPath());
      input =
          new TextInput(
              settings, stream, readBuffer, split.getStart(), split.getStart() + split.getLength());

      // setup Reader using Input and Output
      reader = new TextReader(settings, input, output, whitespaceBuffer);
      reader.start();

    } catch (SchemaChangeException | IOException e) {
      throw new ExecutionSetupException(
          String.format("Failure while setting up text reader for file %s", split.getPath()), e);
    } catch (IllegalArgumentException e) {
      throw UserException.dataReadError(e)
          .addContext("File Path", split.getPath().toString())
          .build(logger);
    }
  }
  /**
   * This method is responsible to implement logic for extracting header from text file Currently it
   * is assumed to be first line if headerExtractionEnabled is set to true TODO: enhance to support
   * more common header patterns
   *
   * @return field name strings
   */
  private String[] extractHeader()
      throws SchemaChangeException, IOException, ExecutionSetupException {
    assert (settings.isHeaderExtractionEnabled());
    assert (oContext != null);

    // don't skip header in case skipFirstLine is set true
    settings.setSkipFirstLine(false);

    // setup Output using OutputMutator
    // we should use a separate output mutator to avoid reshaping query output with header data
    HeaderOutputMutator hOutputMutator = new HeaderOutputMutator();
    TextOutput hOutput = new RepeatedVarCharOutput(hOutputMutator, getColumns(), true);
    this.allocate(hOutputMutator.fieldVectorMap);

    // setup Input using InputStream
    // we should read file header irrespective of split given given to this reader
    InputStream hStream = dfs.openPossiblyCompressedStream(split.getPath());
    TextInput hInput =
        new TextInput(
            settings, hStream, oContext.getManagedBuffer(READ_BUFFER), 0, split.getLength());

    // setup Reader using Input and Output
    this.reader =
        new TextReader(settings, hInput, hOutput, oContext.getManagedBuffer(WHITE_SPACE_BUFFER));
    reader.start();

    // extract first row only
    reader.parseNext();

    // grab the field names from output
    String[] fieldNames = ((RepeatedVarCharOutput) hOutput).getTextOutput();

    // cleanup and set to skip the first line next time we read input
    reader.close();
    hOutputMutator.close();
    settings.setSkipFirstLine(true);

    return fieldNames;
  }
Example #7
0
 public DrillBuf reallocIfNeeded(int size) {
   if (this.capacity() >= size) {
     return this;
   }
   if (context != null) {
     return context.replace(this, size);
   } else if (fContext != null) {
     return fContext.replace(this, size);
   } else if (bufManager != null) {
     return bufManager.replace(this, size);
   } else {
     throw new UnsupportedOperationException(
         "Realloc is only available in the context of an operator's UDFs");
   }
 }
  private void updateStats() {

    operatorContext
        .getStats()
        .setLongStat(Metric.NUM_DICT_PAGE_LOADS, parquetReaderStats.numDictPageLoads.longValue());
    operatorContext
        .getStats()
        .setLongStat(Metric.NUM_DATA_PAGE_lOADS, parquetReaderStats.numDataPageLoads.longValue());
    operatorContext
        .getStats()
        .setLongStat(
            Metric.NUM_DATA_PAGES_DECODED, parquetReaderStats.numDataPagesDecoded.longValue());
    operatorContext
        .getStats()
        .setLongStat(
            Metric.NUM_DICT_PAGES_DECOMPRESSED,
            parquetReaderStats.numDictPagesDecompressed.longValue());
    operatorContext
        .getStats()
        .setLongStat(
            Metric.NUM_DATA_PAGES_DECOMPRESSED,
            parquetReaderStats.numDataPagesDecompressed.longValue());
    operatorContext
        .getStats()
        .setLongStat(
            Metric.TOTAL_DICT_PAGE_READ_BYTES,
            parquetReaderStats.totalDictPageReadBytes.longValue());
    operatorContext
        .getStats()
        .setLongStat(
            Metric.TOTAL_DATA_PAGE_READ_BYTES,
            parquetReaderStats.totalDataPageReadBytes.longValue());
    operatorContext
        .getStats()
        .setLongStat(
            Metric.TOTAL_DICT_DECOMPRESSED_BYTES,
            parquetReaderStats.totalDictDecompressedBytes.longValue());
    operatorContext
        .getStats()
        .setLongStat(
            Metric.TOTAL_DATA_DECOMPRESSED_BYTES,
            parquetReaderStats.totalDataDecompressedBytes.longValue());
    operatorContext
        .getStats()
        .setLongStat(Metric.TIME_DICT_PAGE_LOADS, parquetReaderStats.timeDictPageLoads.longValue());
    operatorContext
        .getStats()
        .setLongStat(Metric.TIME_DATA_PAGE_LOADS, parquetReaderStats.timeDataPageLoads.longValue());
    operatorContext
        .getStats()
        .setLongStat(
            Metric.TIME_DATA_PAGE_DECODE, parquetReaderStats.timeDataPageDecode.longValue());
    operatorContext
        .getStats()
        .setLongStat(
            Metric.TIME_DICT_PAGE_DECODE, parquetReaderStats.timeDictPageDecode.longValue());
    operatorContext
        .getStats()
        .setLongStat(
            Metric.TIME_DICT_PAGES_DECOMPRESSED,
            parquetReaderStats.timeDictPagesDecompressed.longValue());
    operatorContext
        .getStats()
        .setLongStat(
            Metric.TIME_DATA_PAGES_DECOMPRESSED,
            parquetReaderStats.timeDataPagesDecompressed.longValue());
    operatorContext
        .getStats()
        .setLongStat(Metric.TIME_DISK_SCAN_WAIT, parquetReaderStats.timeDiskScanWait.longValue());
    operatorContext
        .getStats()
        .setLongStat(Metric.TIME_DISK_SCAN, parquetReaderStats.timeDiskScan.longValue());
  }
Example #9
0
  @Test
  public void testAllocators() throws Exception {
    // Setup a drillbit (initializes a root allocator)
    final DrillConfig config = DrillConfig.create(TEST_CONFIGURATIONS);
    final RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
    final Drillbit bit = new Drillbit(config, serviceSet);
    bit.run();
    final DrillbitContext bitContext = bit.getContext();
    FunctionImplementationRegistry functionRegistry =
        bitContext.getFunctionImplementationRegistry();
    StoragePluginRegistry storageRegistry = new StoragePluginRegistry(bitContext);

    // Create a few Fragment Contexts

    BitControl.PlanFragment.Builder pfBuilder1 = BitControl.PlanFragment.newBuilder();
    pfBuilder1.setMemInitial(1500000);
    BitControl.PlanFragment pf1 = pfBuilder1.build();
    BitControl.PlanFragment.Builder pfBuilder2 = BitControl.PlanFragment.newBuilder();
    pfBuilder2.setMemInitial(500000);
    BitControl.PlanFragment pf2 = pfBuilder1.build();

    FragmentContext fragmentContext1 = new FragmentContext(bitContext, pf1, null, functionRegistry);
    FragmentContext fragmentContext2 = new FragmentContext(bitContext, pf2, null, functionRegistry);

    // Get a few physical operators. Easiest way is to read a physical plan.
    PhysicalPlanReader planReader =
        new PhysicalPlanReader(
            config,
            config.getMapper(),
            CoordinationProtos.DrillbitEndpoint.getDefaultInstance(),
            storageRegistry);
    PhysicalPlan plan =
        planReader.readPhysicalPlan(
            Files.toString(FileUtils.getResourceAsFile(planFile), Charsets.UTF_8));
    List<PhysicalOperator> physicalOperators = plan.getSortedOperators();
    Iterator<PhysicalOperator> physicalOperatorIterator = physicalOperators.iterator();

    PhysicalOperator physicalOperator1 = physicalOperatorIterator.next();
    PhysicalOperator physicalOperator2 = physicalOperatorIterator.next();
    PhysicalOperator physicalOperator3 = physicalOperatorIterator.next();
    PhysicalOperator physicalOperator4 = physicalOperatorIterator.next();
    PhysicalOperator physicalOperator5 = physicalOperatorIterator.next();
    PhysicalOperator physicalOperator6 = physicalOperatorIterator.next();

    // Create some bogus Operator profile defs and stats to create operator contexts
    OpProfileDef def;
    OperatorStats stats;

    // Use some bogus operator type to create a new operator context.
    def =
        new OpProfileDef(
            physicalOperator1.getOperatorId(),
            UserBitShared.CoreOperatorType.MOCK_SUB_SCAN_VALUE,
            OperatorContext.getChildCount(physicalOperator1));
    stats = fragmentContext1.getStats().getOperatorStats(def, fragmentContext1.getAllocator());

    // Add a couple of Operator Contexts
    // Initial allocation = 1000000 bytes for all operators
    OperatorContext oContext11 = fragmentContext1.newOperatorContext(physicalOperator1, true);
    DrillBuf b11 = oContext11.getAllocator().buffer(1000000);

    OperatorContext oContext12 =
        fragmentContext1.newOperatorContext(physicalOperator2, stats, true);
    DrillBuf b12 = oContext12.getAllocator().buffer(500000);

    OperatorContext oContext21 = fragmentContext1.newOperatorContext(physicalOperator3, true);

    def =
        new OpProfileDef(
            physicalOperator4.getOperatorId(),
            UserBitShared.CoreOperatorType.TEXT_WRITER_VALUE,
            OperatorContext.getChildCount(physicalOperator4));
    stats = fragmentContext2.getStats().getOperatorStats(def, fragmentContext2.getAllocator());
    OperatorContext oContext22 =
        fragmentContext2.newOperatorContext(physicalOperator4, stats, true);
    DrillBuf b22 = oContext22.getAllocator().buffer(2000000);

    // New Fragment begins
    BitControl.PlanFragment.Builder pfBuilder3 = BitControl.PlanFragment.newBuilder();
    pfBuilder3.setMemInitial(1000000);
    BitControl.PlanFragment pf3 = pfBuilder3.build();

    FragmentContext fragmentContext3 = new FragmentContext(bitContext, pf3, null, functionRegistry);

    // New fragment starts an operator that allocates an amount within the limit
    def =
        new OpProfileDef(
            physicalOperator5.getOperatorId(),
            UserBitShared.CoreOperatorType.UNION_VALUE,
            OperatorContext.getChildCount(physicalOperator5));
    stats = fragmentContext3.getStats().getOperatorStats(def, fragmentContext3.getAllocator());
    OperatorContext oContext31 =
        fragmentContext3.newOperatorContext(physicalOperator5, stats, true);

    DrillBuf b31a = oContext31.getAllocator().buffer(200000);

    // Previously running operator completes
    b22.release();
    ((AutoCloseable) oContext22).close();

    // Fragment 3 asks for more and fails
    boolean outOfMem = false;
    try {
      DrillBuf b31b = oContext31.getAllocator().buffer(4400000);
      if (b31b != null) {
        b31b.release();
      } else {
        outOfMem = true;
      }
    } catch (Exception e) {
      outOfMem = true;
    }
    assertEquals(true, (boolean) outOfMem);

    // Operator is Exempt from Fragment limits. Fragment 3 asks for more and succeeds
    outOfMem = false;
    OperatorContext oContext32 = fragmentContext3.newOperatorContext(physicalOperator6, false);
    DrillBuf b32 = null;
    try {
      b32 = oContext32.getAllocator().buffer(4400000);
    } catch (Exception e) {
      outOfMem = true;
    } finally {
      if (b32 != null) {
        b32.release();
      } else {
        outOfMem = true;
      }
      closeOp(oContext32);
    }
    assertEquals(false, (boolean) outOfMem);

    b11.release();
    closeOp(oContext11);
    b12.release();
    closeOp(oContext12);
    closeOp(oContext21);
    b31a.release();
    closeOp(oContext31);

    fragmentContext1.close();
    fragmentContext2.close();
    fragmentContext3.close();

    bit.close();
    serviceSet.close();
  }