/** Initialization when invoked from QL. */ @Override public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) { super.initialize(conf, queryPlan, driverContext); job = new JobConf(conf, ExecDriver.class); // NOTE: initialize is only called if it is in non-local mode. // In case it's in non-local mode, we need to move the SessionState files // and jars to jobConf. // In case it's in local mode, MapRedTask will set the jobConf. // // "tmpfiles" and "tmpjars" are set by the method ExecDriver.execute(), // which will be called by both local and NON-local mode. String addedFiles = Utilities.getResourceFiles(job, SessionState.ResourceType.FILE); if (StringUtils.isNotBlank(addedFiles)) { HiveConf.setVar(job, ConfVars.HIVEADDEDFILES, addedFiles); } String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR); if (StringUtils.isNotBlank(addedJars)) { HiveConf.setVar(job, ConfVars.HIVEADDEDJARS, addedJars); } String addedArchives = Utilities.getResourceFiles(job, SessionState.ResourceType.ARCHIVE); if (StringUtils.isNotBlank(addedArchives)) { HiveConf.setVar(job, ConfVars.HIVEADDEDARCHIVES, addedArchives); } conf.stripHiddenConfigurations(job); this.jobExecHelper = new HadoopJobExecHelper(job, console, this, this); }
/** * handle partial scan command. * * <p>It is composed of PartialScanTask followed by StatsTask. */ private void handlePartialScanCommand( TableScanOperator tableScan, ParseContext parseContext, StatsWork statsWork, GenTezProcContext context, Task<StatsWork> statsTask) throws SemanticException { String aggregationKey = tableScan.getConf().getStatsAggPrefix(); StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey); List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(tableScan, aggregationKeyBuffer); aggregationKey = aggregationKeyBuffer.toString(); // scan work PartialScanWork scanWork = new PartialScanWork(inputPaths); scanWork.setMapperCannotSpanPartns(true); scanWork.setAggKey(aggregationKey); scanWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir(), parseContext.getConf()); // stats work statsWork.setPartialScanAnalyzeCommand(true); // partial scan task DriverContext driverCxt = new DriverContext(); Task<PartialScanWork> partialScanTask = TaskFactory.get(scanWork, parseContext.getConf()); partialScanTask.initialize(parseContext.getConf(), null, driverCxt); partialScanTask.setWork(scanWork); statsWork.setSourceTask(partialScanTask); // task dependency context.rootTasks.remove(context.currentTask); context.rootTasks.add(partialScanTask); partialScanTask.addDependentTask(statsTask); }
/** * handle partial scan command. It is composed of PartialScanTask followed by StatsTask . * * @param op * @param ctx * @param parseCtx * @param currTask * @param parseInfo * @param statsWork * @param statsTask * @throws SemanticException */ private void handlePartialScanCommand( TableScanOperator op, GenMRProcContext ctx, ParseContext parseCtx, Task<? extends Serializable> currTask, StatsWork statsWork, Task<StatsWork> statsTask) throws SemanticException { String aggregationKey = op.getConf().getStatsAggPrefix(); StringBuffer aggregationKeyBuffer = new StringBuffer(aggregationKey); List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(op, aggregationKeyBuffer); aggregationKey = aggregationKeyBuffer.toString(); // scan work PartialScanWork scanWork = new PartialScanWork(inputPaths); scanWork.setMapperCannotSpanPartns(true); scanWork.setAggKey(aggregationKey); // stats work statsWork.setPartialScanAnalyzeCommand(true); // partial scan task DriverContext driverCxt = new DriverContext(); Task<PartialScanWork> psTask = TaskFactory.get(scanWork, parseCtx.getConf()); psTask.initialize(parseCtx.getConf(), null, driverCxt); psTask.setWork(scanWork); // task dependency ctx.getRootTasks().remove(currTask); ctx.getRootTasks().add(psTask); psTask.addDependentTask(statsTask); List<Task<? extends Serializable>> parentTasks = new ArrayList<Task<? extends Serializable>>(); parentTasks.add(psTask); statsTask.setParentTasks(parentTasks); }
@Override public void initialize( HiveConf conf, QueryPlan queryPlan, DriverContext ctx, CompilationOpContext opContext) { super.initialize(conf, queryPlan, ctx, opContext); work.initializeForFetch(opContext); try { // Create a file system handle JobConf job = new JobConf(conf); Operator<?> source = work.getSource(); if (source instanceof TableScanOperator) { TableScanOperator ts = (TableScanOperator) source; // push down projections ColumnProjectionUtils.appendReadColumns( job, ts.getNeededColumnIDs(), ts.getNeededColumns()); // push down filters HiveInputFormat.pushFilters(job, ts); } sink = work.getSink(); fetch = new FetchOperator(work, job, source, getVirtualColumns(source)); source.initialize(conf, new ObjectInspector[] {fetch.getOutputObjectInspector()}); totalRows = 0; ExecMapper.setDone(false); } catch (Exception e) { // Bail out ungracefully - we should never hit // this here - but would have hit it in SemanticAnalyzer LOG.error(StringUtils.stringifyException(e)); throw new RuntimeException(e); } }
@Override public void initialize( HiveConf conf, QueryPlan queryPlan, DriverContext driverContext, CompilationOpContext opContext) { super.initialize(conf, queryPlan, driverContext, opContext); job = new JobConf(conf, ExecDriver.class); execContext = new ExecMapperContext(job); // we don't use the HadoopJobExecHooks for local tasks this.jobExecHelper = new HadoopJobExecHelper(job, console, this, null); }
@Override public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) { super.initialize(conf, queryPlan, driverContext); job = new JobConf(conf, BlockMergeTask.class); jobExecHelper = new HadoopJobExecHelper(job, this.console, this, this); }