@Override public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException { this.output = output; this.context = context; try { KuduTable table = client.openTable(scanSpec.getTableName()); KuduScannerBuilder builder = client.newScannerBuilder(table); if (!isStarQuery()) { List<String> colNames = Lists.newArrayList(); for (SchemaPath p : this.getColumns()) { colNames.add(p.getAsUnescapedPath()); } builder.setProjectedColumnNames(colNames); } context.getStats().startWait(); try { scanner = builder .lowerBoundPartitionKeyRaw(scanSpec.getStartKey()) .exclusiveUpperBoundPartitionKeyRaw(scanSpec.getEndKey()) .build(); } finally { context.getStats().stopWait(); } } catch (Exception e) { throw new ExecutionSetupException(e); } }
/** * Creates a copy a record batch, converting any fields as necessary to coerce it into the * provided schema * * @param in * @param toSchema * @param context * @return */ public static VectorContainer coerceContainer( VectorAccessible in, BatchSchema toSchema, OperatorContext context) { int recordCount = in.getRecordCount(); Map<SchemaPath, ValueVector> vectorMap = Maps.newHashMap(); for (VectorWrapper w : in) { ValueVector v = w.getValueVector(); vectorMap.put(v.getField().getPath(), v); } VectorContainer c = new VectorContainer(context); for (MaterializedField field : toSchema) { ValueVector v = vectorMap.remove(field.getPath()); if (v != null) { int valueCount = v.getAccessor().getValueCount(); TransferPair tp = v.getTransferPair(); tp.transfer(); if (v.getField().getType().getMinorType().equals(field.getType().getMinorType())) { if (field.getType().getMinorType() == MinorType.UNION) { UnionVector u = (UnionVector) tp.getTo(); for (MinorType t : field.getType().getSubTypeList()) { if (u.getField().getType().getSubTypeList().contains(t)) { continue; } u.addSubType(t); } } c.add(tp.getTo()); } else { ValueVector newVector = TypeHelper.getNewVector(field, context.getAllocator()); Preconditions.checkState( field.getType().getMinorType() == MinorType.UNION, "Can only convert vector to Union vector"); UnionVector u = (UnionVector) newVector; u.addVector(tp.getTo()); MinorType type = v.getField().getType().getMinorType(); for (int i = 0; i < valueCount; i++) { u.getMutator().setType(i, type); } for (MinorType t : field.getType().getSubTypeList()) { if (u.getField().getType().getSubTypeList().contains(t)) { continue; } u.addSubType(t); } u.getMutator().setValueCount(valueCount); c.add(u); } } else { v = TypeHelper.getNewVector(field, context.getAllocator()); v.allocateNew(); v.getMutator().setValueCount(recordCount); c.add(v); } } c.buildSchema(in.getSchema().getSelectionVectorMode()); c.setRecordCount(recordCount); Preconditions.checkState(vectorMap.size() == 0, "Leftover vector from incoming batch"); return c; }
@Override public int next() { int rowCount = 0; try { while (iterator == null || !iterator.hasNext()) { if (!scanner.hasMoreRows()) { iterator = null; return 0; } context.getStats().startWait(); try { iterator = scanner.nextRows(); } finally { context.getStats().stopWait(); } } for (; rowCount < TARGET_RECORD_COUNT && iterator.hasNext(); rowCount++) { addRowResult(iterator.next(), rowCount); } } catch (Exception ex) { throw new RuntimeException(ex); } for (ProjectedColumnInfo pci : projectedCols) { pci.vv.getMutator().setValueCount(rowCount); } return rowCount; }
public BatchGroup( VectorContainer container, FileSystem fs, String path, OperatorContext context) { currentContainer = container; this.fs = fs; this.path = new Path(path); this.allocator = context.getAllocator(); this.context = context; }
/** * Performs the initial setup required for the record reader. Initializes the input stream, * handling of the output record batch and the actual reader to be used. * * @param context operator context from which buffer's will be allocated and managed * @param outputMutator Used to create the schema in the output record batch * @throws ExecutionSetupException */ @Override public void setup(OperatorContext context, OutputMutator outputMutator) throws ExecutionSetupException { oContext = context; readBuffer = context.getManagedBuffer(READ_BUFFER); whitespaceBuffer = context.getManagedBuffer(WHITE_SPACE_BUFFER); // setup Output, Input, and Reader try { TextOutput output = null; TextInput input = null; InputStream stream = null; // setup Output using OutputMutator if (settings.isHeaderExtractionEnabled()) { // extract header and use that to setup a set of VarCharVectors String[] fieldNames = extractHeader(); output = new FieldVarCharOutput(outputMutator, fieldNames, getColumns(), isStarQuery()); } else { // simply use RepeatedVarCharVector output = new RepeatedVarCharOutput(outputMutator, getColumns(), isStarQuery()); } // setup Input using InputStream stream = dfs.openPossiblyCompressedStream(split.getPath()); input = new TextInput( settings, stream, readBuffer, split.getStart(), split.getStart() + split.getLength()); // setup Reader using Input and Output reader = new TextReader(settings, input, output, whitespaceBuffer); reader.start(); } catch (SchemaChangeException | IOException e) { throw new ExecutionSetupException( String.format("Failure while setting up text reader for file %s", split.getPath()), e); } catch (IllegalArgumentException e) { throw UserException.dataReadError(e) .addContext("File Path", split.getPath().toString()) .build(logger); } }
/** * This method is responsible to implement logic for extracting header from text file Currently it * is assumed to be first line if headerExtractionEnabled is set to true TODO: enhance to support * more common header patterns * * @return field name strings */ private String[] extractHeader() throws SchemaChangeException, IOException, ExecutionSetupException { assert (settings.isHeaderExtractionEnabled()); assert (oContext != null); // don't skip header in case skipFirstLine is set true settings.setSkipFirstLine(false); // setup Output using OutputMutator // we should use a separate output mutator to avoid reshaping query output with header data HeaderOutputMutator hOutputMutator = new HeaderOutputMutator(); TextOutput hOutput = new RepeatedVarCharOutput(hOutputMutator, getColumns(), true); this.allocate(hOutputMutator.fieldVectorMap); // setup Input using InputStream // we should read file header irrespective of split given given to this reader InputStream hStream = dfs.openPossiblyCompressedStream(split.getPath()); TextInput hInput = new TextInput( settings, hStream, oContext.getManagedBuffer(READ_BUFFER), 0, split.getLength()); // setup Reader using Input and Output this.reader = new TextReader(settings, hInput, hOutput, oContext.getManagedBuffer(WHITE_SPACE_BUFFER)); reader.start(); // extract first row only reader.parseNext(); // grab the field names from output String[] fieldNames = ((RepeatedVarCharOutput) hOutput).getTextOutput(); // cleanup and set to skip the first line next time we read input reader.close(); hOutputMutator.close(); settings.setSkipFirstLine(true); return fieldNames; }
public DrillBuf reallocIfNeeded(int size) { if (this.capacity() >= size) { return this; } if (context != null) { return context.replace(this, size); } else if (fContext != null) { return fContext.replace(this, size); } else if (bufManager != null) { return bufManager.replace(this, size); } else { throw new UnsupportedOperationException( "Realloc is only available in the context of an operator's UDFs"); } }
private void updateStats() { operatorContext .getStats() .setLongStat(Metric.NUM_DICT_PAGE_LOADS, parquetReaderStats.numDictPageLoads.longValue()); operatorContext .getStats() .setLongStat(Metric.NUM_DATA_PAGE_lOADS, parquetReaderStats.numDataPageLoads.longValue()); operatorContext .getStats() .setLongStat( Metric.NUM_DATA_PAGES_DECODED, parquetReaderStats.numDataPagesDecoded.longValue()); operatorContext .getStats() .setLongStat( Metric.NUM_DICT_PAGES_DECOMPRESSED, parquetReaderStats.numDictPagesDecompressed.longValue()); operatorContext .getStats() .setLongStat( Metric.NUM_DATA_PAGES_DECOMPRESSED, parquetReaderStats.numDataPagesDecompressed.longValue()); operatorContext .getStats() .setLongStat( Metric.TOTAL_DICT_PAGE_READ_BYTES, parquetReaderStats.totalDictPageReadBytes.longValue()); operatorContext .getStats() .setLongStat( Metric.TOTAL_DATA_PAGE_READ_BYTES, parquetReaderStats.totalDataPageReadBytes.longValue()); operatorContext .getStats() .setLongStat( Metric.TOTAL_DICT_DECOMPRESSED_BYTES, parquetReaderStats.totalDictDecompressedBytes.longValue()); operatorContext .getStats() .setLongStat( Metric.TOTAL_DATA_DECOMPRESSED_BYTES, parquetReaderStats.totalDataDecompressedBytes.longValue()); operatorContext .getStats() .setLongStat(Metric.TIME_DICT_PAGE_LOADS, parquetReaderStats.timeDictPageLoads.longValue()); operatorContext .getStats() .setLongStat(Metric.TIME_DATA_PAGE_LOADS, parquetReaderStats.timeDataPageLoads.longValue()); operatorContext .getStats() .setLongStat( Metric.TIME_DATA_PAGE_DECODE, parquetReaderStats.timeDataPageDecode.longValue()); operatorContext .getStats() .setLongStat( Metric.TIME_DICT_PAGE_DECODE, parquetReaderStats.timeDictPageDecode.longValue()); operatorContext .getStats() .setLongStat( Metric.TIME_DICT_PAGES_DECOMPRESSED, parquetReaderStats.timeDictPagesDecompressed.longValue()); operatorContext .getStats() .setLongStat( Metric.TIME_DATA_PAGES_DECOMPRESSED, parquetReaderStats.timeDataPagesDecompressed.longValue()); operatorContext .getStats() .setLongStat(Metric.TIME_DISK_SCAN_WAIT, parquetReaderStats.timeDiskScanWait.longValue()); operatorContext .getStats() .setLongStat(Metric.TIME_DISK_SCAN, parquetReaderStats.timeDiskScan.longValue()); }
@Test public void testAllocators() throws Exception { // Setup a drillbit (initializes a root allocator) final DrillConfig config = DrillConfig.create(TEST_CONFIGURATIONS); final RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet(); final Drillbit bit = new Drillbit(config, serviceSet); bit.run(); final DrillbitContext bitContext = bit.getContext(); FunctionImplementationRegistry functionRegistry = bitContext.getFunctionImplementationRegistry(); StoragePluginRegistry storageRegistry = new StoragePluginRegistry(bitContext); // Create a few Fragment Contexts BitControl.PlanFragment.Builder pfBuilder1 = BitControl.PlanFragment.newBuilder(); pfBuilder1.setMemInitial(1500000); BitControl.PlanFragment pf1 = pfBuilder1.build(); BitControl.PlanFragment.Builder pfBuilder2 = BitControl.PlanFragment.newBuilder(); pfBuilder2.setMemInitial(500000); BitControl.PlanFragment pf2 = pfBuilder1.build(); FragmentContext fragmentContext1 = new FragmentContext(bitContext, pf1, null, functionRegistry); FragmentContext fragmentContext2 = new FragmentContext(bitContext, pf2, null, functionRegistry); // Get a few physical operators. Easiest way is to read a physical plan. PhysicalPlanReader planReader = new PhysicalPlanReader( config, config.getMapper(), CoordinationProtos.DrillbitEndpoint.getDefaultInstance(), storageRegistry); PhysicalPlan plan = planReader.readPhysicalPlan( Files.toString(FileUtils.getResourceAsFile(planFile), Charsets.UTF_8)); List<PhysicalOperator> physicalOperators = plan.getSortedOperators(); Iterator<PhysicalOperator> physicalOperatorIterator = physicalOperators.iterator(); PhysicalOperator physicalOperator1 = physicalOperatorIterator.next(); PhysicalOperator physicalOperator2 = physicalOperatorIterator.next(); PhysicalOperator physicalOperator3 = physicalOperatorIterator.next(); PhysicalOperator physicalOperator4 = physicalOperatorIterator.next(); PhysicalOperator physicalOperator5 = physicalOperatorIterator.next(); PhysicalOperator physicalOperator6 = physicalOperatorIterator.next(); // Create some bogus Operator profile defs and stats to create operator contexts OpProfileDef def; OperatorStats stats; // Use some bogus operator type to create a new operator context. def = new OpProfileDef( physicalOperator1.getOperatorId(), UserBitShared.CoreOperatorType.MOCK_SUB_SCAN_VALUE, OperatorContext.getChildCount(physicalOperator1)); stats = fragmentContext1.getStats().getOperatorStats(def, fragmentContext1.getAllocator()); // Add a couple of Operator Contexts // Initial allocation = 1000000 bytes for all operators OperatorContext oContext11 = fragmentContext1.newOperatorContext(physicalOperator1, true); DrillBuf b11 = oContext11.getAllocator().buffer(1000000); OperatorContext oContext12 = fragmentContext1.newOperatorContext(physicalOperator2, stats, true); DrillBuf b12 = oContext12.getAllocator().buffer(500000); OperatorContext oContext21 = fragmentContext1.newOperatorContext(physicalOperator3, true); def = new OpProfileDef( physicalOperator4.getOperatorId(), UserBitShared.CoreOperatorType.TEXT_WRITER_VALUE, OperatorContext.getChildCount(physicalOperator4)); stats = fragmentContext2.getStats().getOperatorStats(def, fragmentContext2.getAllocator()); OperatorContext oContext22 = fragmentContext2.newOperatorContext(physicalOperator4, stats, true); DrillBuf b22 = oContext22.getAllocator().buffer(2000000); // New Fragment begins BitControl.PlanFragment.Builder pfBuilder3 = BitControl.PlanFragment.newBuilder(); pfBuilder3.setMemInitial(1000000); BitControl.PlanFragment pf3 = pfBuilder3.build(); FragmentContext fragmentContext3 = new FragmentContext(bitContext, pf3, null, functionRegistry); // New fragment starts an operator that allocates an amount within the limit def = new OpProfileDef( physicalOperator5.getOperatorId(), UserBitShared.CoreOperatorType.UNION_VALUE, OperatorContext.getChildCount(physicalOperator5)); stats = fragmentContext3.getStats().getOperatorStats(def, fragmentContext3.getAllocator()); OperatorContext oContext31 = fragmentContext3.newOperatorContext(physicalOperator5, stats, true); DrillBuf b31a = oContext31.getAllocator().buffer(200000); // Previously running operator completes b22.release(); ((AutoCloseable) oContext22).close(); // Fragment 3 asks for more and fails boolean outOfMem = false; try { DrillBuf b31b = oContext31.getAllocator().buffer(4400000); if (b31b != null) { b31b.release(); } else { outOfMem = true; } } catch (Exception e) { outOfMem = true; } assertEquals(true, (boolean) outOfMem); // Operator is Exempt from Fragment limits. Fragment 3 asks for more and succeeds outOfMem = false; OperatorContext oContext32 = fragmentContext3.newOperatorContext(physicalOperator6, false); DrillBuf b32 = null; try { b32 = oContext32.getAllocator().buffer(4400000); } catch (Exception e) { outOfMem = true; } finally { if (b32 != null) { b32.release(); } else { outOfMem = true; } closeOp(oContext32); } assertEquals(false, (boolean) outOfMem); b11.release(); closeOp(oContext11); b12.release(); closeOp(oContext12); closeOp(oContext21); b31a.release(); closeOp(oContext31); fragmentContext1.close(); fragmentContext2.close(); fragmentContext3.close(); bit.close(); serviceSet.close(); }