private void handleSampling(Context context, MapWork mWork, JobConf job) throws Exception { assert mWork.getAliasToWork().keySet().size() == 1; String alias = mWork.getAliases().get(0); Operator<?> topOp = mWork.getAliasToWork().get(alias); PartitionDesc partDesc = mWork.getAliasToPartnInfo().get(alias); ArrayList<String> paths = mWork.getPaths(); ArrayList<PartitionDesc> parts = mWork.getPartitionDescs(); List<Path> inputPaths = new ArrayList<Path>(paths.size()); for (String path : paths) { inputPaths.add(new Path(path)); } Path tmpPath = context.getExternalTmpPath(inputPaths.get(0)); Path partitionFile = new Path(tmpPath, ".partitions"); ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile); PartitionKeySampler sampler = new PartitionKeySampler(); if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) { console.printInfo("Use sampling data created in previous MR"); // merges sampling data from previous MR and make partition keys for total sort for (Path path : inputPaths) { FileSystem fs = path.getFileSystem(job); for (FileStatus status : fs.globStatus(new Path(path, ".sampling*"))) { sampler.addSampleFile(status.getPath(), job); } } } else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) { console.printInfo("Creating sampling data.."); assert topOp instanceof TableScanOperator; TableScanOperator ts = (TableScanOperator) topOp; FetchWork fetchWork; if (!partDesc.isPartitioned()) { assert paths.size() == 1; fetchWork = new FetchWork(inputPaths.get(0), partDesc.getTableDesc()); } else { fetchWork = new FetchWork(inputPaths, parts, partDesc.getTableDesc()); } fetchWork.setSource(ts); // random sampling FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, job, ts); try { ts.initialize(job, new ObjectInspector[] {fetcher.getOutputObjectInspector()}); OperatorUtils.setChildrenCollector(ts.getChildOperators(), sampler); while (fetcher.pushRow()) {} } finally { fetcher.clearFetchContext(); } } else { throw new IllegalArgumentException("Invalid sampling type " + mWork.getSamplingType()); } sampler.writePartitionKeys(partitionFile, job); }
/** * Given a Hive Configuration object - generate a command line fragment for passing such * configuration information to ExecDriver. */ public static String generateCmdLine(HiveConf hconf, Context ctx) throws IOException { HiveConf tempConf = new HiveConf(); Path hConfFilePath = new Path(ctx.getLocalTmpPath(), JOBCONF_FILENAME); OutputStream out = null; Properties deltaP = hconf.getChangedProperties(); boolean hadoopLocalMode = ShimLoader.getHadoopShims().isLocalMode(hconf); String hadoopSysDir = "mapred.system.dir"; String hadoopWorkDir = "mapred.local.dir"; for (Object one : deltaP.keySet()) { String oneProp = (String) one; if (hadoopLocalMode && (oneProp.equals(hadoopSysDir) || oneProp.equals(hadoopWorkDir))) { continue; } tempConf.set(oneProp, hconf.get(oneProp)); } // Multiple concurrent local mode job submissions can cause collisions in // working dirs and system dirs // Workaround is to rename map red working dir to a temp dir in such cases if (hadoopLocalMode) { tempConf.set(hadoopSysDir, hconf.get(hadoopSysDir) + "/" + Utilities.randGen.nextInt()); tempConf.set(hadoopWorkDir, hconf.get(hadoopWorkDir) + "/" + Utilities.randGen.nextInt()); } try { out = FileSystem.getLocal(hconf).create(hConfFilePath); tempConf.writeXml(out); } finally { if (out != null) { out.close(); } } return " -jobconffile " + hConfFilePath.toString(); }
/** * Process the FileSink operator to generate a MoveTask if necessary. * * @param nd current FileSink operator * @param stack parent operators * @param opProcCtx * @param chDir whether the operator should be first output to a tmp dir and then merged to the * final dir later * @return the final file name to which the FileSinkOperator should store. * @throws SemanticException */ private String processFS(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, boolean chDir) throws SemanticException { // Is it the dummy file sink after the mapjoin FileSinkOperator fsOp = (FileSinkOperator) nd; if ((fsOp.getParentOperators().size() == 1) && (fsOp.getParentOperators().get(0) instanceof MapJoinOperator)) { return null; } GenMRProcContext ctx = (GenMRProcContext) opProcCtx; List<FileSinkOperator> seenFSOps = ctx.getSeenFileSinkOps(); if (seenFSOps == null) { seenFSOps = new ArrayList<FileSinkOperator>(); } if (!seenFSOps.contains(fsOp)) { seenFSOps.add(fsOp); } ctx.setSeenFileSinkOps(seenFSOps); Task<? extends Serializable> currTask = ctx.getCurrTask(); // If the directory needs to be changed, send the new directory String dest = null; if (chDir) { dest = fsOp.getConf().getDirName(); // generate the temporary file // it must be on the same file system as the current destination ParseContext parseCtx = ctx.getParseCtx(); Context baseCtx = parseCtx.getContext(); String tmpDir = baseCtx.getExternalTmpFileURI((new Path(dest)).toUri()); fsOp.getConf().setDirName(tmpDir); } Task<? extends Serializable> mvTask = null; if (!chDir) { mvTask = findMoveTask(ctx.getMvTask(), fsOp); } Operator<? extends Serializable> currTopOp = ctx.getCurrTopOp(); String currAliasId = ctx.getCurrAliasId(); HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx.getOpTaskMap(); List<Operator<? extends Serializable>> seenOps = ctx.getSeenOps(); List<Task<? extends Serializable>> rootTasks = ctx.getRootTasks(); // Set the move task to be dependent on the current task if (mvTask != null) { currTask.addDependentTask(mvTask); } // In case of multi-table insert, the path to alias mapping is needed for // all the sources. Since there is no // reducer, treat it as a plan with null reducer // If it is a map-only job, the task needs to be processed if (currTopOp != null) { Task<? extends Serializable> mapTask = opTaskMap.get(null); if (mapTask == null) { assert (!seenOps.contains(currTopOp)); seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan( currAliasId, currTopOp, (MapredWork) currTask.getWork(), false, ctx); opTaskMap.put(null, currTask); rootTasks.add(currTask); } else { if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan( currAliasId, currTopOp, (MapredWork) mapTask.getWork(), false, ctx); } // mapTask and currTask should be merged by and join/union operator // (e.g., GenMRUnion1j) which has multiple topOps. assert mapTask == currTask : "mapTask.id = " + mapTask.getId() + "; currTask.id = " + currTask.getId(); } return dest; } UnionOperator currUnionOp = ctx.getCurrUnionOp(); if (currUnionOp != null) { opTaskMap.put(null, currTask); GenMapRedUtils.initUnionPlan(ctx, currTask, false); return dest; } AbstractMapJoinOperator<? extends MapJoinDesc> currMapJoinOp = ctx.getCurrMapJoinOp(); if (currMapJoinOp != null) { opTaskMap.put(null, currTask); GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(currMapJoinOp); MapredWork plan = (MapredWork) currTask.getWork(); String taskTmpDir = mjCtx.getTaskTmpDir(); TableDesc tt_desc = mjCtx.getTTDesc(); assert plan.getPathToAliases().get(taskTmpDir) == null; plan.getPathToAliases().put(taskTmpDir, new ArrayList<String>()); plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir); plan.getPathToPartitionInfo().put(taskTmpDir, new PartitionDesc(tt_desc, null)); plan.getAliasToWork().put(taskTmpDir, mjCtx.getRootMapJoinOp()); return dest; } return dest; }
@SuppressWarnings("nls") /** * Merge the tasks - by creating a temporary file between them. * * @param op reduce sink operator being processed * @param oldTask the parent task * @param task the child task * @param opProcCtx context * @param setReducer does the reducer needs to be set * @param pos position of the parent */ public static void splitTasks( Operator<? extends Serializable> op, Task<? extends Serializable> parentTask, Task<? extends Serializable> childTask, GenMRProcContext opProcCtx, boolean setReducer, boolean local, int posn) throws SemanticException { childTask.getWork(); Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp(); ParseContext parseCtx = opProcCtx.getParseCtx(); parentTask.addDependentTask(childTask); // Root Task cannot depend on any other task, therefore childTask cannot be // a root Task List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks(); if (rootTasks.contains(childTask)) { rootTasks.remove(childTask); } // generate the temporary file Context baseCtx = parseCtx.getContext(); String taskTmpDir = baseCtx.getMRTmpFileURI(); Operator<? extends Serializable> parent = op.getParentOperators().get(posn); TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc( PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol")); // Create a file sink operator for this file name boolean compressIntermediate = parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE); FileSinkDesc desc = new FileSinkDesc(taskTmpDir, tt_desc, compressIntermediate); if (compressIntermediate) { desc.setCompressCodec(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC)); desc.setCompressType(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE)); } Operator<? extends Serializable> fs_op = putOpInsertMap(OperatorFactory.get(desc, parent.getSchema()), null, parseCtx); // replace the reduce child with this operator List<Operator<? extends Serializable>> childOpList = parent.getChildOperators(); for (int pos = 0; pos < childOpList.size(); pos++) { if (childOpList.get(pos) == op) { childOpList.set(pos, fs_op); break; } } List<Operator<? extends Serializable>> parentOpList = new ArrayList<Operator<? extends Serializable>>(); parentOpList.add(parent); fs_op.setParentOperators(parentOpList); // create a dummy tableScan operator on top of op // TableScanOperator is implicitly created here for each MapOperator RowResolver rowResolver = opProcCtx.getParseCtx().getOpParseCtx().get(parent).getRowResolver(); Operator<? extends Serializable> ts_op = putOpInsertMap( OperatorFactory.get(TableScanDesc.class, parent.getSchema()), rowResolver, parseCtx); childOpList = new ArrayList<Operator<? extends Serializable>>(); childOpList.add(op); ts_op.setChildOperators(childOpList); op.getParentOperators().set(posn, ts_op); Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx(); mapCurrCtx.put(ts_op, new GenMapRedCtx(childTask, null, null)); String streamDesc = taskTmpDir; MapredWork cplan = (MapredWork) childTask.getWork(); if (setReducer) { Operator<? extends Serializable> reducer = op.getChildOperators().get(0); if (reducer.getClass() == JoinOperator.class) { String origStreamDesc; streamDesc = "$INTNAME"; origStreamDesc = streamDesc; int pos = 0; while (cplan.getAliasToWork().get(streamDesc) != null) { streamDesc = origStreamDesc.concat(String.valueOf(++pos)); } } // TODO: Allocate work to remove the temporary files and make that // dependent on the redTask if (reducer.getClass() == JoinOperator.class) { cplan.setNeedsTagging(true); } } // Add the path to alias mapping setTaskPlan(taskTmpDir, streamDesc, ts_op, cplan, local, tt_desc); // This can be cleaned up as a function table in future if (op instanceof AbstractMapJoinOperator<?>) { AbstractMapJoinOperator<? extends MapJoinDesc> mjOp = (AbstractMapJoinOperator<? extends MapJoinDesc>) op; opProcCtx.setCurrMapJoinOp(mjOp); GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(mjOp); if (mjCtx == null) { mjCtx = new GenMRMapJoinCtx(taskTmpDir, tt_desc, ts_op, null); } else { mjCtx.setTaskTmpDir(taskTmpDir); mjCtx.setTTDesc(tt_desc); mjCtx.setRootMapJoinOp(ts_op); } opProcCtx.setMapJoinCtx(mjOp, mjCtx); opProcCtx.getMapCurrCtx().put(parent, new GenMapRedCtx(childTask, null, null)); setupBucketMapJoinInfo(cplan, mjOp, false); } currTopOp = null; String currAliasId = null; opProcCtx.setCurrTopOp(currTopOp); opProcCtx.setCurrAliasId(currAliasId); opProcCtx.setCurrTask(childTask); }
public int executeInChildVM(DriverContext driverContext) { // execute in child jvm try { // generate the cmd line to run in the child jvm Context ctx = driverContext.getCtx(); String hiveJar = conf.getJar(); String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN); conf.setVar( ConfVars.HIVEADDEDJARS, Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR)); // write out the plan to a local file Path planPath = new Path(ctx.getLocalTmpPath(), "plan.xml"); MapredLocalWork plan = getWork(); LOG.info("Generating plan file " + planPath.toString()); OutputStream out = null; try { out = FileSystem.getLocal(conf).create(planPath); SerializationUtilities.serializePlan(plan, out); out.close(); out = null; } finally { IOUtils.closeQuietly(out); } String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : ""; String jarCmd; jarCmd = hiveJar + " " + ExecDriver.class.getName(); String hiveConfArgs = ExecDriver.generateCmdLine(conf, ctx); String cmdLine = hadoopExec + " jar " + jarCmd + " -localtask -plan " + planPath.toString() + " " + isSilent + " " + hiveConfArgs; String workDir = (new File(".")).getCanonicalPath(); String files = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE); if (!files.isEmpty()) { cmdLine = cmdLine + " -files " + files; workDir = ctx.getLocalTmpPath().toUri().getPath(); if (!(new File(workDir)).mkdir()) { throw new IOException("Cannot create tmp working dir: " + workDir); } for (String f : StringUtils.split(files, ',')) { Path p = new Path(f); String target = p.toUri().getPath(); String link = workDir + Path.SEPARATOR + p.getName(); if (FileUtil.symLink(target, link) != 0) { throw new IOException("Cannot link to added file: " + target + " from: " + link); } } } // Inherit Java system variables String hadoopOpts; StringBuilder sb = new StringBuilder(); Properties p = System.getProperties(); for (String element : HIVE_SYS_PROP) { if (p.containsKey(element)) { sb.append(" -D" + element + "=" + p.getProperty(element)); } } hadoopOpts = sb.toString(); // Inherit the environment variables String[] env; Map<String, String> variables = new HashMap<String, String>(System.getenv()); // The user can specify the hadoop memory // if ("local".equals(conf.getVar(HiveConf.ConfVars.HADOOPJT))) { // if we are running in local mode - then the amount of memory used // by the child jvm can no longer default to the memory used by the // parent jvm // int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM); int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM); if (hadoopMem == 0) { // remove env var that would default child jvm to use parent's memory // as default. child jvm would use default memory for a hadoop client variables.remove(HADOOP_MEM_KEY); } else { // user specified the memory for local mode hadoop run console.printInfo(" set heap size\t" + hadoopMem + "MB"); variables.put(HADOOP_MEM_KEY, String.valueOf(hadoopMem)); } // } else { // nothing to do - we are not running in local mode - only submitting // the job via a child process. in this case it's appropriate that the // child jvm use the same memory as the parent jvm // } // Set HADOOP_USER_NAME env variable for child process, so that // it also runs with hadoop permissions for the user the job is running as // This will be used by hadoop only in unsecure(/non kerberos) mode String endUserName = Utils.getUGI().getShortUserName(); LOG.debug("setting HADOOP_USER_NAME\t" + endUserName); variables.put("HADOOP_USER_NAME", endUserName); if (variables.containsKey(HADOOP_OPTS_KEY)) { variables.put(HADOOP_OPTS_KEY, variables.get(HADOOP_OPTS_KEY) + hadoopOpts); } else { variables.put(HADOOP_OPTS_KEY, hadoopOpts); } // For Windows OS, we need to pass HIVE_HADOOP_CLASSPATH Java parameter while starting // Hiveserver2 using "-hiveconf hive.hadoop.classpath=%HIVE_LIB%". This is to combine path(s). if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH) != null) { if (variables.containsKey("HADOOP_CLASSPATH")) { variables.put( "HADOOP_CLASSPATH", variables.get("HADOOP_CLASSPATH") + ";" + HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH)); } else { variables.put( "HADOOP_CLASSPATH", HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH)); } } if (variables.containsKey(MapRedTask.HIVE_DEBUG_RECURSIVE)) { MapRedTask.configureDebugVariablesForChildJVM(variables); } if (UserGroupInformation.isSecurityEnabled() && UserGroupInformation.isLoginKeytabBased()) { // If kerberos security is enabled, and HS2 doAs is enabled, // then additional params need to be set so that the command is run as // intended user secureDoAs = new SecureCmdDoAs(conf); secureDoAs.addEnv(variables); } // If HIVE_LOCAL_TASK_CHILD_OPTS is set, child VM environment setting // HADOOP_CLIENT_OPTS will be replaced with HIVE_LOCAL_TASK_CHILD_OPTS. // HADOOP_OPTS is updated too since HADOOP_CLIENT_OPTS is appended // to HADOOP_OPTS in most cases. This way, the local task JVM can // have different settings from those of HiveServer2. if (variables.containsKey(HIVE_LOCAL_TASK_CHILD_OPTS_KEY)) { String childOpts = variables.get(HIVE_LOCAL_TASK_CHILD_OPTS_KEY); if (childOpts == null) { childOpts = ""; } String clientOpts = variables.put(HADOOP_CLIENT_OPTS, childOpts); String tmp = variables.get(HADOOP_OPTS_KEY); if (tmp != null && !StringUtils.isBlank(clientOpts)) { tmp = tmp.replace(clientOpts, childOpts); variables.put(HADOOP_OPTS_KEY, tmp); } } env = new String[variables.size()]; int pos = 0; for (Map.Entry<String, String> entry : variables.entrySet()) { String name = entry.getKey(); String value = entry.getValue(); env[pos++] = name + "=" + value; LOG.debug("Setting env: " + env[pos - 1]); } LOG.info("Executing: " + cmdLine); // Run ExecDriver in another JVM executor = Runtime.getRuntime().exec(cmdLine, env, new File(workDir)); CachingPrintStream errPrintStream = new CachingPrintStream(System.err); StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), null, System.out); StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), null, errPrintStream); outPrinter.start(); errPrinter.start(); int exitVal = jobExecHelper.progressLocal(executor, getId()); // wait for stream threads to finish outPrinter.join(); errPrinter.join(); if (exitVal != 0) { LOG.error("Execution failed with exit status: " + exitVal); if (SessionState.get() != null) { SessionState.get().addLocalMapRedErrors(getId(), errPrintStream.getOutput()); } } else { LOG.info("Execution completed successfully"); } return exitVal; } catch (Exception e) { LOG.error("Exception: " + e, e); return (1); } finally { if (secureDoAs != null) { secureDoAs.close(); } } }
/** * Process the FileSink operator to generate a MoveTask if necessary. * * @param fsOp current FileSink operator * @param stack parent operators * @param opProcCtx * @param chDir whether the operator should be first output to a tmp dir and then merged to the * final dir later * @return the final file name to which the FileSinkOperator should store. * @throws SemanticException */ private String processFS( FileSinkOperator fsOp, Stack<Node> stack, NodeProcessorCtx opProcCtx, boolean chDir) throws SemanticException { GenMRProcContext ctx = (GenMRProcContext) opProcCtx; List<FileSinkOperator> seenFSOps = ctx.getSeenFileSinkOps(); if (seenFSOps == null) { seenFSOps = new ArrayList<FileSinkOperator>(); } if (!seenFSOps.contains(fsOp)) { seenFSOps.add(fsOp); } ctx.setSeenFileSinkOps(seenFSOps); Task<? extends Serializable> currTask = ctx.getCurrTask(); // If the directory needs to be changed, send the new directory String dest = null; if (chDir) { dest = fsOp.getConf().getFinalDirName(); // generate the temporary file // it must be on the same file system as the current destination ParseContext parseCtx = ctx.getParseCtx(); Context baseCtx = parseCtx.getContext(); String tmpDir = baseCtx.getExternalTmpFileURI((new Path(dest)).toUri()); FileSinkDesc fileSinkDesc = fsOp.getConf(); // Change all the linked file sink descriptors if (fileSinkDesc.isLinkedFileSink()) { for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) { String fileName = Utilities.getFileNameFromDirName(fsConf.getDirName()); fsConf.setParentDir(tmpDir); fsConf.setDirName(tmpDir + Path.SEPARATOR + fileName); } } else { fileSinkDesc.setDirName(tmpDir); } } Task<MoveWork> mvTask = null; if (!chDir) { mvTask = findMoveTask(ctx.getMvTask(), fsOp); } Operator<? extends OperatorDesc> currTopOp = ctx.getCurrTopOp(); String currAliasId = ctx.getCurrAliasId(); HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = ctx.getOpTaskMap(); List<Operator<? extends OperatorDesc>> seenOps = ctx.getSeenOps(); List<Task<? extends Serializable>> rootTasks = ctx.getRootTasks(); // Set the move task to be dependent on the current task if (mvTask != null) { addDependentMoveTasks(ctx, mvTask, currTask); } // In case of multi-table insert, the path to alias mapping is needed for // all the sources. Since there is no // reducer, treat it as a plan with null reducer // If it is a map-only job, the task needs to be processed if (currTopOp != null) { Task<? extends Serializable> mapTask = opTaskMap.get(null); if (mapTask == null) { if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan( currAliasId, currTopOp, (MapredWork) currTask.getWork(), false, ctx); } opTaskMap.put(null, currTask); if (!rootTasks.contains(currTask) && (currTask.getParentTasks() == null || currTask.getParentTasks().isEmpty())) { rootTasks.add(currTask); } } else { if (!seenOps.contains(currTopOp)) { seenOps.add(currTopOp); GenMapRedUtils.setTaskPlan( currAliasId, currTopOp, (MapredWork) mapTask.getWork(), false, ctx); } else { UnionOperator currUnionOp = ctx.getCurrUnionOp(); if (currUnionOp != null) { opTaskMap.put(null, currTask); ctx.setCurrTopOp(null); GenMapRedUtils.initUnionPlan(ctx, currUnionOp, currTask, false); return dest; } } // mapTask and currTask should be merged by and join/union operator // (e.g., GenMRUnion1) which has multiple topOps. // assert mapTask == currTask : "mapTask.id = " + mapTask.getId() // + "; currTask.id = " + currTask.getId(); } return dest; } UnionOperator currUnionOp = ctx.getCurrUnionOp(); if (currUnionOp != null) { opTaskMap.put(null, currTask); GenMapRedUtils.initUnionPlan(ctx, currUnionOp, currTask, false); return dest; } return dest; }
/** Execute a query plan using Hadoop. */ @SuppressWarnings({"deprecation", "unchecked"}) @Override public int execute(DriverContext driverContext) { IOPrepareCache ioPrepareCache = IOPrepareCache.get(); ioPrepareCache.clear(); boolean success = true; Context ctx = driverContext.getCtx(); boolean ctxCreated = false; Path emptyScratchDir; MapWork mWork = work.getMapWork(); ReduceWork rWork = work.getReduceWork(); try { if (ctx == null) { ctx = new Context(job); ctxCreated = true; } emptyScratchDir = ctx.getMRTmpPath(); FileSystem fs = emptyScratchDir.getFileSystem(job); fs.mkdirs(emptyScratchDir); } catch (IOException e) { e.printStackTrace(); console.printError( "Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return 5; } HiveFileFormatUtils.prepareJobOutput(job); // See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput() job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(ExecMapper.class); job.setMapOutputKeyClass(HiveKey.class); job.setMapOutputValueClass(BytesWritable.class); try { String partitioner = HiveConf.getVar(job, ConfVars.HIVEPARTITIONER); job.setPartitionerClass(JavaUtils.loadClass(partitioner)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e); } if (mWork.getNumMapTasks() != null) { job.setNumMapTasks(mWork.getNumMapTasks().intValue()); } if (mWork.getMaxSplitSize() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mWork.getMaxSplitSize().longValue()); } if (mWork.getMinSplitSize() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mWork.getMinSplitSize().longValue()); } if (mWork.getMinSplitSizePerNode() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, mWork.getMinSplitSizePerNode().longValue()); } if (mWork.getMinSplitSizePerRack() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, mWork.getMinSplitSizePerRack().longValue()); } job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0); job.setReducerClass(ExecReducer.class); // set input format information if necessary setInputAttributes(job); // Turn on speculative execution for reducers boolean useSpeculativeExecReducers = HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS); HiveConf.setBoolVar( job, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, useSpeculativeExecReducers); String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); if (mWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); } LOG.info("Using " + inpFormat); try { job.setInputFormat(JavaUtils.loadClass(inpFormat)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e); } // No-Op - we don't really write anything here .. job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Transfer HIVEAUXJARS and HIVEADDEDJARS to "tmpjars" so hadoop understands // it String auxJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEAUXJARS); String addedJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDJARS); if (StringUtils.isNotBlank(auxJars) || StringUtils.isNotBlank(addedJars)) { String allJars = StringUtils.isNotBlank(auxJars) ? (StringUtils.isNotBlank(addedJars) ? addedJars + "," + auxJars : auxJars) : addedJars; LOG.info("adding libjars: " + allJars); initializeFiles("tmpjars", allJars); } // Transfer HIVEADDEDFILES to "tmpfiles" so hadoop understands it String addedFiles = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDFILES); if (StringUtils.isNotBlank(addedFiles)) { initializeFiles("tmpfiles", addedFiles); } int returnVal = 0; boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJOBNAME)); if (noName) { // This is for a special case to ensure unit tests pass HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, "JOB" + Utilities.randGen.nextInt()); } String addedArchives = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDARCHIVES); // Transfer HIVEADDEDARCHIVES to "tmparchives" so hadoop understands it if (StringUtils.isNotBlank(addedArchives)) { initializeFiles("tmparchives", addedArchives); } try { MapredLocalWork localwork = mWork.getMapRedLocalWork(); if (localwork != null && localwork.hasStagedAlias()) { if (!ShimLoader.getHadoopShims().isLocalMode(job)) { Path localPath = localwork.getTmpPath(); Path hdfsPath = mWork.getTmpHDFSPath(); FileSystem hdfs = hdfsPath.getFileSystem(job); FileSystem localFS = localPath.getFileSystem(job); FileStatus[] hashtableFiles = localFS.listStatus(localPath); int fileNumber = hashtableFiles.length; String[] fileNames = new String[fileNumber]; for (int i = 0; i < fileNumber; i++) { fileNames[i] = hashtableFiles[i].getPath().getName(); } // package and compress all the hashtable files to an archive file String stageId = this.getId(); String archiveFileName = Utilities.generateTarFileName(stageId); localwork.setStageID(stageId); CompressionUtils.tar(localPath.toUri().getPath(), fileNames, archiveFileName); Path archivePath = Utilities.generateTarPath(localPath, stageId); LOG.info("Archive " + hashtableFiles.length + " hash table files to " + archivePath); // upload archive file to hdfs Path hdfsFilePath = Utilities.generateTarPath(hdfsPath, stageId); short replication = (short) job.getInt("mapred.submit.replication", 10); hdfs.copyFromLocalFile(archivePath, hdfsFilePath); hdfs.setReplication(hdfsFilePath, replication); LOG.info("Upload 1 archive file from" + archivePath + " to: " + hdfsFilePath); // add the archive file to distributed cache DistributedCache.createSymlink(job); DistributedCache.addCacheArchive(hdfsFilePath.toUri(), job); LOG.info( "Add 1 archive file to distributed cache. Archive file: " + hdfsFilePath.toUri()); } } work.configureJobConf(job); List<Path> inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false); Utilities.setInputPaths(job, inputPaths); Utilities.setMapRedWork(job, work, ctx.getMRTmpPath()); if (mWork.getSamplingType() > 0 && rWork != null && job.getNumReduceTasks() > 1) { try { handleSampling(ctx, mWork, job); job.setPartitionerClass(HiveTotalOrderPartitioner.class); } catch (IllegalStateException e) { console.printInfo("Not enough sampling data.. Rolling back to single reducer task"); rWork.setNumReduceTasks(1); job.setNumReduceTasks(1); } catch (Exception e) { LOG.error("Sampling error", e); console.printError( e.toString(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); rWork.setNumReduceTasks(1); job.setNumReduceTasks(1); } } // remove the pwd from conf file so that job tracker doesn't show this // logs String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD); if (pwd != null) { HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE"); } JobClient jc = new JobClient(job); // make this client wait if job tracker is not behaving well. Throttle.checkJobTracker(job, LOG); if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) { // initialize stats publishing table StatsPublisher statsPublisher; StatsFactory factory = StatsFactory.newFactory(job); if (factory != null) { statsPublisher = factory.getStatsPublisher(); List<String> statsTmpDir = Utilities.getStatsTmpDirs(mWork, job); if (rWork != null) { statsTmpDir.addAll(Utilities.getStatsTmpDirs(rWork, job)); } StatsCollectionContext sc = new StatsCollectionContext(job); sc.setStatsTmpDirs(statsTmpDir); if (!statsPublisher.init(sc)) { // creating stats table if not exists if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) { throw new HiveException( ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg()); } } } } Utilities.createTmpDirs(job, mWork); Utilities.createTmpDirs(job, rWork); SessionState ss = SessionState.get(); if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") && ss != null) { TezSessionState session = ss.getTezSession(); TezSessionPoolManager.getInstance().close(session, true); } // Finally SUBMIT the JOB! rj = jc.submitJob(job); // replace it back if (pwd != null) { HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, pwd); } returnVal = jobExecHelper.progress(rj, jc, ctx.getHiveTxnManager()); success = (returnVal == 0); } catch (Exception e) { e.printStackTrace(); String mesg = " with exception '" + Utilities.getNameMessage(e) + "'"; if (rj != null) { mesg = "Ended Job = " + rj.getJobID() + mesg; } else { mesg = "Job Submission failed" + mesg; } // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); success = false; returnVal = 1; } finally { Utilities.clearWork(job); try { if (ctxCreated) { ctx.clear(); } if (rj != null) { if (returnVal != 0) { rj.killJob(); } jobID = rj.getID().toString(); } } catch (Exception e) { LOG.warn("Failed while cleaning up ", e); } finally { HadoopJobExecHelper.runningJobs.remove(rj); } } // get the list of Dynamic partition paths try { if (rj != null) { if (mWork.getAliasToWork() != null) { for (Operator<? extends OperatorDesc> op : mWork.getAliasToWork().values()) { op.jobClose(job, success); } } if (rWork != null) { rWork.getReducer().jobClose(job, success); } } } catch (Exception e) { // jobClose needs to execute successfully otherwise fail task if (success) { success = false; returnVal = 3; String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'"; console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); } } return (returnVal); }
@Override public void acquireLocks(QueryPlan plan, Context ctx, String username) throws LockException { // Make sure we've built the lock manager getLockManager(); // If the lock manager is still null, then it means we aren't using a // lock manager if (lockMgr == null) return; List<HiveLockObj> lockObjects = new ArrayList<HiveLockObj>(); // Sort all the inputs, outputs. // If a lock needs to be acquired on any partition, a read lock needs to be acquired on all // its parents also for (ReadEntity input : plan.getInputs()) { if (!input.needsLock()) { continue; } LOG.debug("Adding " + input.getName() + " to list of lock inputs"); if (input.getType() == ReadEntity.Type.DATABASE) { lockObjects.addAll( getLockObjects(plan, input.getDatabase(), null, null, HiveLockMode.SHARED)); } else if (input.getType() == ReadEntity.Type.TABLE) { lockObjects.addAll(getLockObjects(plan, null, input.getTable(), null, HiveLockMode.SHARED)); } else { lockObjects.addAll( getLockObjects(plan, null, null, input.getPartition(), HiveLockMode.SHARED)); } } for (WriteEntity output : plan.getOutputs()) { HiveLockMode lockMode = getWriteEntityLockMode(output); if (lockMode == null) { continue; } LOG.debug("Adding " + output.getName() + " to list of lock outputs"); List<HiveLockObj> lockObj = null; if (output.getType() == WriteEntity.Type.DATABASE) { lockObjects.addAll(getLockObjects(plan, output.getDatabase(), null, null, lockMode)); } else if (output.getTyp() == WriteEntity.Type.TABLE) { lockObj = getLockObjects(plan, null, output.getTable(), null, lockMode); } else if (output.getTyp() == WriteEntity.Type.PARTITION) { lockObj = getLockObjects(plan, null, null, output.getPartition(), lockMode); } // In case of dynamic queries, it is possible to have incomplete dummy partitions else if (output.getTyp() == WriteEntity.Type.DUMMYPARTITION) { lockObj = getLockObjects(plan, null, null, output.getPartition(), HiveLockMode.SHARED); } if (lockObj != null) { lockObjects.addAll(lockObj); ctx.getOutputLockObjects().put(output, lockObj); } } if (lockObjects.isEmpty() && !ctx.isNeedLockMgr()) { return; } dedupLockObjects(lockObjects); List<HiveLock> hiveLocks = lockMgr.lock(lockObjects, false); if (hiveLocks == null) { throw new LockException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg()); } else { ctx.setHiveLocks(hiveLocks); } }
@Override public int execute(DriverContext driverContext) { Context ctx = driverContext.getCtx(); boolean ctxCreated = false; try { if (ctx == null) { ctx = new Context(conf); ctxCreated = true; } // estimate number of reducers setNumberOfReducers(); // auto-determine local mode if allowed if (!ctx.isLocalOnlyExecutionMode() && conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) { if (inputSummary == null) { inputSummary = Utilities.getInputSummary(driverContext.getCtx(), work, null); } // set the values of totalInputFileSize and totalInputNumFiles, estimating them // if percentage block sampling is being used estimateInputSize(); // at this point the number of reducers is precisely defined in the plan int numReducers = work.getNumReduceTasks(); if (LOG.isDebugEnabled()) { LOG.debug( "Task: " + getId() + ", Summary: " + totalInputFileSize + "," + totalInputNumFiles + "," + numReducers); } String reason = MapRedTask.isEligibleForLocalMode( conf, numReducers, totalInputFileSize, totalInputNumFiles); if (reason == null) { // clone configuration before modifying it on per-task basis cloneConf(); conf.setVar(HiveConf.ConfVars.HADOOPJT, "local"); console.printInfo("Selecting local mode for task: " + getId()); this.setLocalMode(true); } else { console.printInfo("Cannot run job locally: " + reason); this.setLocalMode(false); } } runningViaChild = "local".equals(conf.getVar(HiveConf.ConfVars.HADOOPJT)) || conf.getBoolVar(HiveConf.ConfVars.SUBMITVIACHILD); if (!runningViaChild) { // we are not running this mapred task via child jvm // so directly invoke ExecDriver return super.execute(driverContext); } // we need to edit the configuration to setup cmdline. clone it first cloneConf(); // propagate input format if necessary super.setInputAttributes(conf); // enable assertion String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN); String hiveJar = conf.getJar(); String libJarsOption; String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR); conf.setVar(ConfVars.HIVEADDEDJARS, addedJars); String auxJars = conf.getAuxJars(); // Put auxjars and addedjars together into libjars if (StringUtils.isEmpty(addedJars)) { if (StringUtils.isEmpty(auxJars)) { libJarsOption = " "; } else { libJarsOption = " -libjars " + auxJars + " "; } } else { if (StringUtils.isEmpty(auxJars)) { libJarsOption = " -libjars " + addedJars + " "; } else { libJarsOption = " -libjars " + addedJars + "," + auxJars + " "; } } // Generate the hiveConfArgs after potentially adding the jars String hiveConfArgs = generateCmdLine(conf); // write out the plan to a local file Path planPath = new Path(ctx.getLocalTmpFileURI(), "plan.xml"); OutputStream out = FileSystem.getLocal(conf).create(planPath); MapredWork plan = getWork(); LOG.info("Generating plan file " + planPath.toString()); Utilities.serializeMapRedWork(plan, out); String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : ""; String jarCmd; if (ShimLoader.getHadoopShims().usesJobShell()) { jarCmd = libJarsOption + hiveJar + " " + ExecDriver.class.getName(); } else { jarCmd = hiveJar + " " + ExecDriver.class.getName() + libJarsOption; } String cmdLine = hadoopExec + " jar " + jarCmd + " -plan " + planPath.toString() + " " + isSilent + " " + hiveConfArgs; String workDir = (new File(".")).getCanonicalPath(); String files = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE); if (!files.isEmpty()) { cmdLine = cmdLine + " -files " + files; workDir = (new Path(ctx.getLocalTmpFileURI())).toUri().getPath(); if (!(new File(workDir)).mkdir()) { throw new IOException("Cannot create tmp working dir: " + workDir); } for (String f : StringUtils.split(files, ',')) { Path p = new Path(f); String target = p.toUri().getPath(); String link = workDir + Path.SEPARATOR + p.getName(); if (FileUtil.symLink(target, link) != 0) { throw new IOException("Cannot link to added file: " + target + " from: " + link); } } } LOG.info("Executing: " + cmdLine); Process executor = null; // Inherit Java system variables String hadoopOpts; StringBuilder sb = new StringBuilder(); Properties p = System.getProperties(); for (String element : HIVE_SYS_PROP) { if (p.containsKey(element)) { sb.append(" -D" + element + "=" + p.getProperty(element)); } } hadoopOpts = sb.toString(); // Inherit the environment variables String[] env; Map<String, String> variables = new HashMap(System.getenv()); // The user can specify the hadoop memory if ("local".equals(conf.getVar(HiveConf.ConfVars.HADOOPJT))) { // if we are running in local mode - then the amount of memory used // by the child jvm can no longer default to the memory used by the // parent jvm int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM); if (hadoopMem == 0) { // remove env var that would default child jvm to use parent's memory // as default. child jvm would use default memory for a hadoop client variables.remove(HADOOP_MEM_KEY); } else { // user specified the memory for local mode hadoop run variables.put(HADOOP_MEM_KEY, String.valueOf(hadoopMem)); } } else { // nothing to do - we are not running in local mode - only submitting // the job via a child process. in this case it's appropriate that the // child jvm use the same memory as the parent jvm } if (variables.containsKey(HADOOP_OPTS_KEY)) { variables.put(HADOOP_OPTS_KEY, variables.get(HADOOP_OPTS_KEY) + hadoopOpts); } else { variables.put(HADOOP_OPTS_KEY, hadoopOpts); } if (variables.containsKey(HIVE_DEBUG_RECURSIVE)) { configureDebugVariablesForChildJVM(variables); } env = new String[variables.size()]; int pos = 0; for (Map.Entry<String, String> entry : variables.entrySet()) { String name = entry.getKey(); String value = entry.getValue(); env[pos++] = name + "=" + value; } // Run ExecDriver in another JVM executor = Runtime.getRuntime().exec(cmdLine, env, new File(workDir)); StreamPrinter outPrinter = new StreamPrinter( executor.getInputStream(), null, SessionState.getConsole().getChildOutStream()); StreamPrinter errPrinter = new StreamPrinter( executor.getErrorStream(), null, SessionState.getConsole().getChildErrStream()); outPrinter.start(); errPrinter.start(); int exitVal = jobExecHelper.progressLocal(executor, getId()); if (exitVal != 0) { LOG.error("Execution failed with exit status: " + exitVal); } else { LOG.info("Execution completed successfully"); } return exitVal; } catch (Exception e) { e.printStackTrace(); LOG.error("Exception: " + e.getMessage()); return (1); } finally { try { // creating the context can create a bunch of files. So make // sure to clear it out if (ctxCreated) { ctx.clear(); } } catch (Exception e) { LOG.error("Exception: " + e.getMessage()); } } }
public OpetatorTreeDemo() { super(new GridLayout(1, 0)); DefaultMutableTreeNode top = null; try { ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); System.out.println(classLoader.getResource("hive-default.xml")); System.out.println(classLoader.getResource("hive-site.xml")); HiveConf hiveConf = new HiveConf(SessionState.class); SessionState.start(new SessionState(hiveConf)); Context ctx = new Context(hiveConf); ctx.setTryCount(10); ctx.setCmd(CommondDemo.command3); ctx.setHDFSCleanup(true); ParseDriver pd = new ParseDriver(); ASTNode astTree = pd.parse(CommondDemo.command3, ctx); astTree = ParseUtils.findRootNonNullToken(astTree); SemanticAnalyzer sem = (SemanticAnalyzer) SemanticAnalyzerFactory.get(hiveConf, astTree); sem.analyze(astTree, ctx); sem.validate(); List<Operator<? extends OperatorDesc>> topOpList = new ArrayList<Operator<? extends OperatorDesc>>(sem.topOps.values()); if (topOpList.size() == 1) { top = createNodes(topOpList.get(0)); } else { top = new DefaultMutableTreeNode("root"); for (Operator<? extends OperatorDesc> op : topOpList) { top.add(createNodes(op)); } } } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } // Create a tree that allows one selection at a time. tree = new JTree(top); tree.getSelectionModel().setSelectionMode(TreeSelectionModel.SINGLE_TREE_SELECTION); // Listen for when the selection changes. tree.addTreeSelectionListener(this); if (playWithLineStyle) { System.out.println("line style = " + lineStyle); tree.putClientProperty("JTree.lineStyle", lineStyle); } // Create the scroll pane and add the tree to it. JScrollPane treeView = new JScrollPane(tree); // Create the HTML viewing pane. htmlPane = new JEditorPane(); htmlPane.setEditable(false); initHelp(); JScrollPane htmlView = new JScrollPane(htmlPane); // Add the scroll panes to a split pane. JSplitPane splitPane = new JSplitPane(JSplitPane.VERTICAL_SPLIT); splitPane.setTopComponent(treeView); splitPane.setBottomComponent(htmlView); Dimension minimumSize = new Dimension(100, 50); htmlView.setMinimumSize(minimumSize); treeView.setMinimumSize(minimumSize); splitPane.setDividerLocation(100); splitPane.setPreferredSize(new Dimension(500, 300)); // Add the split pane to this panel. add(splitPane); }
@Override /** start a new map-reduce job to do the merge, almost the same as ExecDriver. */ public int execute(DriverContext driverContext) { HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, CombineHiveInputFormat.class.getName()); success = true; ShimLoader.getHadoopShims().setNullOutputFormat(job); job.setMapperClass(work.getMapperClass()); Context ctx = driverContext.getCtx(); boolean ctxCreated = false; try { if (ctx == null) { ctx = new Context(job); ctxCreated = true; } } catch (IOException e) { e.printStackTrace(); console.printError( "Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return 5; } job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); if (work.getNumMapTasks() != null) { job.setNumMapTasks(work.getNumMapTasks()); } // zero reducers job.setNumReduceTasks(0); if (work.getMinSplitSize() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, work.getMinSplitSize().longValue()); } if (work.getInputformat() != null) { HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, work.getInputformat()); } String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); } LOG.info("Using " + inpFormat); try { job.setInputFormat((Class<? extends InputFormat>) (Class.forName(inpFormat))); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage()); } String outputPath = this.work.getOutputDir(); Path tempOutPath = Utilities.toTempPath(new Path(outputPath)); try { FileSystem fs = tempOutPath.getFileSystem(job); if (!fs.exists(tempOutPath)) { fs.mkdirs(tempOutPath); } } catch (IOException e) { console.printError("Can't make path " + outputPath + " : " + e.getMessage()); return 6; } RCFileBlockMergeOutputFormat.setMergeOutputPath(job, new Path(outputPath)); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); HiveConf.setBoolVar( job, HiveConf.ConfVars.HIVEMERGECURRENTJOBHASDYNAMICPARTITIONS, work.hasDynamicPartitions()); int returnVal = 0; RunningJob rj = null; boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJOBNAME)); String jobName = null; if (noName && this.getQueryPlan() != null) { int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH); jobName = Utilities.abbreviate(this.getQueryPlan().getQueryStr(), maxlen - 6); } if (noName) { // This is for a special case to ensure unit tests pass HiveConf.setVar( job, HiveConf.ConfVars.HADOOPJOBNAME, jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt()); } try { addInputPaths(job, work); Utilities.setMapRedWork(job, work, ctx.getMRTmpFileURI()); // remove the pwd from conf file so that job tracker doesn't show this // logs String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD); if (pwd != null) { HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE"); } JobClient jc = new JobClient(job); String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR); if (!addedJars.isEmpty()) { job.set("tmpjars", addedJars); } // make this client wait if job trcker is not behaving well. Throttle.checkJobTracker(job, LOG); // Finally SUBMIT the JOB! rj = jc.submitJob(job); returnVal = jobExecHelper.progress(rj, jc); success = (returnVal == 0); } catch (Exception e) { e.printStackTrace(); String mesg = " with exception '" + Utilities.getNameMessage(e) + "'"; if (rj != null) { mesg = "Ended Job = " + rj.getJobID() + mesg; } else { mesg = "Job Submission failed" + mesg; } // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); success = false; returnVal = 1; } finally { try { if (ctxCreated) { ctx.clear(); } if (rj != null) { if (returnVal != 0) { rj.killJob(); } HadoopJobExecHelper.runningJobKillURIs.remove(rj.getJobID()); jobID = rj.getID().toString(); } RCFileMergeMapper.jobClose(outputPath, success, job, console); } catch (Exception e) { } } return (returnVal); }