private void startForward(boolean inputFileChangeSenstive, String bigTableBucket) throws Exception { for (Operator<?> source : work.getAliasToWork().values()) { source.reset(); } if (inputFileChangeSenstive) { execContext.setCurrentBigBucketFile(bigTableBucket); } for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) { String alias = entry.getKey(); FetchOperator fetchOp = entry.getValue(); if (inputFileChangeSenstive) { fetchOp.clearFetchContext(); setUpFetchOpContext(fetchOp, alias, bigTableBucket); } // get the root operator Operator<? extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias); // walk through the operator tree while (!forwardOp.getDone()) { InspectableObject row = fetchOp.getNextRow(); if (row == null) { break; } forwardOp.process(row.o, 0); } forwardOp.flush(); } for (Operator<?> source : work.getAliasToWork().values()) { source.close(false); } }
private void handleSampling(DriverContext context, MapWork mWork, JobConf job, HiveConf conf) throws Exception { assert mWork.getAliasToWork().keySet().size() == 1; String alias = mWork.getAliases().get(0); Operator<?> topOp = mWork.getAliasToWork().get(alias); PartitionDesc partDesc = mWork.getAliasToPartnInfo().get(alias); ArrayList<String> paths = mWork.getPaths(); ArrayList<PartitionDesc> parts = mWork.getPartitionDescs(); List<Path> inputPaths = new ArrayList<Path>(paths.size()); for (String path : paths) { inputPaths.add(new Path(path)); } Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0)); Path partitionFile = new Path(tmpPath, ".partitions"); ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile); PartitionKeySampler sampler = new PartitionKeySampler(); if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) { console.printInfo("Use sampling data created in previous MR"); // merges sampling data from previous MR and make partition keys for total sort for (Path path : inputPaths) { FileSystem fs = path.getFileSystem(job); for (FileStatus status : fs.globStatus(new Path(path, ".sampling*"))) { sampler.addSampleFile(status.getPath(), job); } } } else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) { console.printInfo("Creating sampling data.."); assert topOp instanceof TableScanOperator; TableScanOperator ts = (TableScanOperator) topOp; FetchWork fetchWork; if (!partDesc.isPartitioned()) { assert paths.size() == 1; fetchWork = new FetchWork(inputPaths.get(0), partDesc.getTableDesc()); } else { fetchWork = new FetchWork(inputPaths, parts, partDesc.getTableDesc()); } fetchWork.setSource(ts); // random sampling FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, conf, job, ts); try { ts.initialize(conf, new ObjectInspector[] {fetcher.getOutputObjectInspector()}); OperatorUtils.setChildrenCollector(ts.getChildOperators(), sampler); while (fetcher.pushRow()) {} } finally { fetcher.clearFetchContext(); } } else { throw new IllegalArgumentException("Invalid sampling type " + mWork.getSamplingType()); } sampler.writePartitionKeys(partitionFile, conf, job); }
private void initializeOperators(Map<FetchOperator, JobConf> fetchOpJobConfMap) throws HiveException { for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : work.getAliasToWork().entrySet()) { LOG.debug( "initializeOperators: " + entry.getKey() + ", children = " + entry.getValue().getChildOperators()); } // this mapper operator is used to initialize all the operators for (Map.Entry<String, FetchWork> entry : work.getAliasToFetchWork().entrySet()) { if (entry.getValue() == null) { continue; } JobConf jobClone = new JobConf(job); TableScanOperator ts = (TableScanOperator) work.getAliasToWork().get(entry.getKey()); // push down projections ColumnProjectionUtils.appendReadColumns( jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns()); // push down filters HiveInputFormat.pushFilters(jobClone, ts); // create a fetch operator FetchOperator fetchOp = new FetchOperator(entry.getValue(), jobClone); fetchOpJobConfMap.put(fetchOp, jobClone); fetchOperators.put(entry.getKey(), fetchOp); l4j.info("fetchoperator for " + entry.getKey() + " created"); } // initialize all forward operator for (Map.Entry<String, FetchOperator> entry : fetchOperators.entrySet()) { // get the forward op String alias = entry.getKey(); Operator<? extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias); // put the exe context into all the operators forwardOp.passExecContext(execContext); // All the operators need to be initialized before process FetchOperator fetchOp = entry.getValue(); JobConf jobConf = fetchOpJobConfMap.get(fetchOp); if (jobConf == null) { jobConf = job; } // initialize the forward operator ObjectInspector objectInspector = fetchOp.getOutputObjectInspector(); forwardOp.initialize(jobConf, new ObjectInspector[] {objectInspector}); l4j.info("fetchoperator for " + entry.getKey() + " initialized"); } }
private void setUpFetchOpContext(FetchOperator fetchOp, String alias, String currentInputFile) throws Exception { BucketMapJoinContext bucketMatcherCxt = this.work.getBucketMapjoinContext(); Class<? extends BucketMatcher> bucketMatcherCls = bucketMatcherCxt.getBucketMatcherClass(); BucketMatcher bucketMatcher = ReflectionUtils.newInstance(bucketMatcherCls, null); bucketMatcher.setAliasBucketFileNameMapping(bucketMatcherCxt.getAliasBucketFileNameMapping()); List<Path> aliasFiles = bucketMatcher.getAliasBucketFiles( currentInputFile, bucketMatcherCxt.getMapJoinBigTableAlias(), alias); fetchOp.setupContext(aliasFiles); }