/** * Localizes files, archives and jars the user has instructed us to provide on the cluster as * resources for execution. * * @param conf * @return List<LocalResource> local resources to add to execution * @throws IOException when hdfs operation fails * @throws LoginException when getDefaultDestDir fails with the same exception */ public List<LocalResource> localizeTempFilesFromConf(String hdfsDirPathStr, Configuration conf) throws IOException, LoginException { List<LocalResource> tmpResources = new ArrayList<LocalResource>(); String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE); if (StringUtils.isNotBlank(addedFiles)) { HiveConf.setVar(conf, ConfVars.HIVEADDEDFILES, addedFiles); } String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR); if (StringUtils.isNotBlank(addedJars)) { HiveConf.setVar(conf, ConfVars.HIVEADDEDJARS, addedJars); } String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE); if (StringUtils.isNotBlank(addedArchives)) { HiveConf.setVar(conf, ConfVars.HIVEADDEDARCHIVES, addedArchives); } String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS); // need to localize the additional jars and files // we need the directory on hdfs to which we shall put all these files String allFiles = auxJars + "," + addedJars + "," + addedFiles + "," + addedArchives; addTempFiles(conf, tmpResources, hdfsDirPathStr, allFiles.split(",")); return tmpResources; }
/** Initialization when invoked from QL. */ @Override public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) { super.initialize(conf, queryPlan, driverContext); job = new JobConf(conf, ExecDriver.class); // NOTE: initialize is only called if it is in non-local mode. // In case it's in non-local mode, we need to move the SessionState files // and jars to jobConf. // In case it's in local mode, MapRedTask will set the jobConf. // // "tmpfiles" and "tmpjars" are set by the method ExecDriver.execute(), // which will be called by both local and NON-local mode. String addedFiles = Utilities.getResourceFiles(job, SessionState.ResourceType.FILE); if (StringUtils.isNotBlank(addedFiles)) { HiveConf.setVar(job, ConfVars.HIVEADDEDFILES, addedFiles); } String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR); if (StringUtils.isNotBlank(addedJars)) { HiveConf.setVar(job, ConfVars.HIVEADDEDJARS, addedJars); } String addedArchives = Utilities.getResourceFiles(job, SessionState.ResourceType.ARCHIVE); if (StringUtils.isNotBlank(addedArchives)) { HiveConf.setVar(job, ConfVars.HIVEADDEDARCHIVES, addedArchives); } conf.stripHiddenConfigurations(job); this.jobExecHelper = new HadoopJobExecHelper(job, console, this, this); }
public int executeInChildVM(DriverContext driverContext) { // execute in child jvm try { // generate the cmd line to run in the child jvm Context ctx = driverContext.getCtx(); String hiveJar = conf.getJar(); String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN); conf.setVar( ConfVars.HIVEADDEDJARS, Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR)); // write out the plan to a local file Path planPath = new Path(ctx.getLocalTmpPath(), "plan.xml"); MapredLocalWork plan = getWork(); LOG.info("Generating plan file " + planPath.toString()); OutputStream out = null; try { out = FileSystem.getLocal(conf).create(planPath); SerializationUtilities.serializePlan(plan, out); out.close(); out = null; } finally { IOUtils.closeQuietly(out); } String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : ""; String jarCmd; jarCmd = hiveJar + " " + ExecDriver.class.getName(); String hiveConfArgs = ExecDriver.generateCmdLine(conf, ctx); String cmdLine = hadoopExec + " jar " + jarCmd + " -localtask -plan " + planPath.toString() + " " + isSilent + " " + hiveConfArgs; String workDir = (new File(".")).getCanonicalPath(); String files = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE); if (!files.isEmpty()) { cmdLine = cmdLine + " -files " + files; workDir = ctx.getLocalTmpPath().toUri().getPath(); if (!(new File(workDir)).mkdir()) { throw new IOException("Cannot create tmp working dir: " + workDir); } for (String f : StringUtils.split(files, ',')) { Path p = new Path(f); String target = p.toUri().getPath(); String link = workDir + Path.SEPARATOR + p.getName(); if (FileUtil.symLink(target, link) != 0) { throw new IOException("Cannot link to added file: " + target + " from: " + link); } } } // Inherit Java system variables String hadoopOpts; StringBuilder sb = new StringBuilder(); Properties p = System.getProperties(); for (String element : HIVE_SYS_PROP) { if (p.containsKey(element)) { sb.append(" -D" + element + "=" + p.getProperty(element)); } } hadoopOpts = sb.toString(); // Inherit the environment variables String[] env; Map<String, String> variables = new HashMap<String, String>(System.getenv()); // The user can specify the hadoop memory // if ("local".equals(conf.getVar(HiveConf.ConfVars.HADOOPJT))) { // if we are running in local mode - then the amount of memory used // by the child jvm can no longer default to the memory used by the // parent jvm // int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM); int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM); if (hadoopMem == 0) { // remove env var that would default child jvm to use parent's memory // as default. child jvm would use default memory for a hadoop client variables.remove(HADOOP_MEM_KEY); } else { // user specified the memory for local mode hadoop run console.printInfo(" set heap size\t" + hadoopMem + "MB"); variables.put(HADOOP_MEM_KEY, String.valueOf(hadoopMem)); } // } else { // nothing to do - we are not running in local mode - only submitting // the job via a child process. in this case it's appropriate that the // child jvm use the same memory as the parent jvm // } // Set HADOOP_USER_NAME env variable for child process, so that // it also runs with hadoop permissions for the user the job is running as // This will be used by hadoop only in unsecure(/non kerberos) mode String endUserName = Utils.getUGI().getShortUserName(); LOG.debug("setting HADOOP_USER_NAME\t" + endUserName); variables.put("HADOOP_USER_NAME", endUserName); if (variables.containsKey(HADOOP_OPTS_KEY)) { variables.put(HADOOP_OPTS_KEY, variables.get(HADOOP_OPTS_KEY) + hadoopOpts); } else { variables.put(HADOOP_OPTS_KEY, hadoopOpts); } // For Windows OS, we need to pass HIVE_HADOOP_CLASSPATH Java parameter while starting // Hiveserver2 using "-hiveconf hive.hadoop.classpath=%HIVE_LIB%". This is to combine path(s). if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH) != null) { if (variables.containsKey("HADOOP_CLASSPATH")) { variables.put( "HADOOP_CLASSPATH", variables.get("HADOOP_CLASSPATH") + ";" + HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH)); } else { variables.put( "HADOOP_CLASSPATH", HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH)); } } if (variables.containsKey(MapRedTask.HIVE_DEBUG_RECURSIVE)) { MapRedTask.configureDebugVariablesForChildJVM(variables); } if (UserGroupInformation.isSecurityEnabled() && UserGroupInformation.isLoginKeytabBased()) { // If kerberos security is enabled, and HS2 doAs is enabled, // then additional params need to be set so that the command is run as // intended user secureDoAs = new SecureCmdDoAs(conf); secureDoAs.addEnv(variables); } // If HIVE_LOCAL_TASK_CHILD_OPTS is set, child VM environment setting // HADOOP_CLIENT_OPTS will be replaced with HIVE_LOCAL_TASK_CHILD_OPTS. // HADOOP_OPTS is updated too since HADOOP_CLIENT_OPTS is appended // to HADOOP_OPTS in most cases. This way, the local task JVM can // have different settings from those of HiveServer2. if (variables.containsKey(HIVE_LOCAL_TASK_CHILD_OPTS_KEY)) { String childOpts = variables.get(HIVE_LOCAL_TASK_CHILD_OPTS_KEY); if (childOpts == null) { childOpts = ""; } String clientOpts = variables.put(HADOOP_CLIENT_OPTS, childOpts); String tmp = variables.get(HADOOP_OPTS_KEY); if (tmp != null && !StringUtils.isBlank(clientOpts)) { tmp = tmp.replace(clientOpts, childOpts); variables.put(HADOOP_OPTS_KEY, tmp); } } env = new String[variables.size()]; int pos = 0; for (Map.Entry<String, String> entry : variables.entrySet()) { String name = entry.getKey(); String value = entry.getValue(); env[pos++] = name + "=" + value; LOG.debug("Setting env: " + env[pos - 1]); } LOG.info("Executing: " + cmdLine); // Run ExecDriver in another JVM executor = Runtime.getRuntime().exec(cmdLine, env, new File(workDir)); CachingPrintStream errPrintStream = new CachingPrintStream(System.err); StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), null, System.out); StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), null, errPrintStream); outPrinter.start(); errPrinter.start(); int exitVal = jobExecHelper.progressLocal(executor, getId()); // wait for stream threads to finish outPrinter.join(); errPrinter.join(); if (exitVal != 0) { LOG.error("Execution failed with exit status: " + exitVal); if (SessionState.get() != null) { SessionState.get().addLocalMapRedErrors(getId(), errPrintStream.getOutput()); } } else { LOG.info("Execution completed successfully"); } return exitVal; } catch (Exception e) { LOG.error("Exception: " + e, e); return (1); } finally { if (secureDoAs != null) { secureDoAs.close(); } } }
@Override /** start a new map-reduce job to do the merge, almost the same as ExecDriver. */ public int execute(DriverContext driverContext) { HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, CombineHiveInputFormat.class.getName()); success = true; ShimLoader.getHadoopShims().setNullOutputFormat(job); job.setMapperClass(work.getMapperClass()); Context ctx = driverContext.getCtx(); boolean ctxCreated = false; try { if (ctx == null) { ctx = new Context(job); ctxCreated = true; } } catch (IOException e) { e.printStackTrace(); console.printError( "Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return 5; } job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); if (work.getNumMapTasks() != null) { job.setNumMapTasks(work.getNumMapTasks()); } // zero reducers job.setNumReduceTasks(0); if (work.getMinSplitSize() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, work.getMinSplitSize().longValue()); } if (work.getInputformat() != null) { HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, work.getInputformat()); } String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); } LOG.info("Using " + inpFormat); try { job.setInputFormat((Class<? extends InputFormat>) (Class.forName(inpFormat))); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage()); } String outputPath = this.work.getOutputDir(); Path tempOutPath = Utilities.toTempPath(new Path(outputPath)); try { FileSystem fs = tempOutPath.getFileSystem(job); if (!fs.exists(tempOutPath)) { fs.mkdirs(tempOutPath); } } catch (IOException e) { console.printError("Can't make path " + outputPath + " : " + e.getMessage()); return 6; } RCFileBlockMergeOutputFormat.setMergeOutputPath(job, new Path(outputPath)); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); HiveConf.setBoolVar( job, HiveConf.ConfVars.HIVEMERGECURRENTJOBHASDYNAMICPARTITIONS, work.hasDynamicPartitions()); int returnVal = 0; RunningJob rj = null; boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJOBNAME)); String jobName = null; if (noName && this.getQueryPlan() != null) { int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH); jobName = Utilities.abbreviate(this.getQueryPlan().getQueryStr(), maxlen - 6); } if (noName) { // This is for a special case to ensure unit tests pass HiveConf.setVar( job, HiveConf.ConfVars.HADOOPJOBNAME, jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt()); } try { addInputPaths(job, work); Utilities.setMapRedWork(job, work, ctx.getMRTmpFileURI()); // remove the pwd from conf file so that job tracker doesn't show this // logs String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD); if (pwd != null) { HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE"); } JobClient jc = new JobClient(job); String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR); if (!addedJars.isEmpty()) { job.set("tmpjars", addedJars); } // make this client wait if job trcker is not behaving well. Throttle.checkJobTracker(job, LOG); // Finally SUBMIT the JOB! rj = jc.submitJob(job); returnVal = jobExecHelper.progress(rj, jc); success = (returnVal == 0); } catch (Exception e) { e.printStackTrace(); String mesg = " with exception '" + Utilities.getNameMessage(e) + "'"; if (rj != null) { mesg = "Ended Job = " + rj.getJobID() + mesg; } else { mesg = "Job Submission failed" + mesg; } // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); success = false; returnVal = 1; } finally { try { if (ctxCreated) { ctx.clear(); } if (rj != null) { if (returnVal != 0) { rj.killJob(); } HadoopJobExecHelper.runningJobKillURIs.remove(rj.getJobID()); jobID = rj.getID().toString(); } RCFileMergeMapper.jobClose(outputPath, success, job, console); } catch (Exception e) { } } return (returnVal); }