/** Execute a query plan using Hadoop. */ @SuppressWarnings({"deprecation", "unchecked"}) @Override public int execute(DriverContext driverContext) { IOPrepareCache ioPrepareCache = IOPrepareCache.get(); ioPrepareCache.clear(); boolean success = true; Context ctx = driverContext.getCtx(); boolean ctxCreated = false; Path emptyScratchDir; MapWork mWork = work.getMapWork(); ReduceWork rWork = work.getReduceWork(); try { if (ctx == null) { ctx = new Context(job); ctxCreated = true; } emptyScratchDir = ctx.getMRTmpPath(); FileSystem fs = emptyScratchDir.getFileSystem(job); fs.mkdirs(emptyScratchDir); } catch (IOException e) { e.printStackTrace(); console.printError( "Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return 5; } HiveFileFormatUtils.prepareJobOutput(job); // See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput() job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(ExecMapper.class); job.setMapOutputKeyClass(HiveKey.class); job.setMapOutputValueClass(BytesWritable.class); try { String partitioner = HiveConf.getVar(job, ConfVars.HIVEPARTITIONER); job.setPartitionerClass(JavaUtils.loadClass(partitioner)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e); } if (mWork.getNumMapTasks() != null) { job.setNumMapTasks(mWork.getNumMapTasks().intValue()); } if (mWork.getMaxSplitSize() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mWork.getMaxSplitSize().longValue()); } if (mWork.getMinSplitSize() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mWork.getMinSplitSize().longValue()); } if (mWork.getMinSplitSizePerNode() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, mWork.getMinSplitSizePerNode().longValue()); } if (mWork.getMinSplitSizePerRack() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, mWork.getMinSplitSizePerRack().longValue()); } job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0); job.setReducerClass(ExecReducer.class); // set input format information if necessary setInputAttributes(job); // Turn on speculative execution for reducers boolean useSpeculativeExecReducers = HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS); HiveConf.setBoolVar( job, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, useSpeculativeExecReducers); String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); if (mWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); } LOG.info("Using " + inpFormat); try { job.setInputFormat(JavaUtils.loadClass(inpFormat)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e); } // No-Op - we don't really write anything here .. job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Transfer HIVEAUXJARS and HIVEADDEDJARS to "tmpjars" so hadoop understands // it String auxJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEAUXJARS); String addedJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDJARS); if (StringUtils.isNotBlank(auxJars) || StringUtils.isNotBlank(addedJars)) { String allJars = StringUtils.isNotBlank(auxJars) ? (StringUtils.isNotBlank(addedJars) ? addedJars + "," + auxJars : auxJars) : addedJars; LOG.info("adding libjars: " + allJars); initializeFiles("tmpjars", allJars); } // Transfer HIVEADDEDFILES to "tmpfiles" so hadoop understands it String addedFiles = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDFILES); if (StringUtils.isNotBlank(addedFiles)) { initializeFiles("tmpfiles", addedFiles); } int returnVal = 0; boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJOBNAME)); if (noName) { // This is for a special case to ensure unit tests pass HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, "JOB" + Utilities.randGen.nextInt()); } String addedArchives = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDARCHIVES); // Transfer HIVEADDEDARCHIVES to "tmparchives" so hadoop understands it if (StringUtils.isNotBlank(addedArchives)) { initializeFiles("tmparchives", addedArchives); } try { MapredLocalWork localwork = mWork.getMapRedLocalWork(); if (localwork != null && localwork.hasStagedAlias()) { if (!ShimLoader.getHadoopShims().isLocalMode(job)) { Path localPath = localwork.getTmpPath(); Path hdfsPath = mWork.getTmpHDFSPath(); FileSystem hdfs = hdfsPath.getFileSystem(job); FileSystem localFS = localPath.getFileSystem(job); FileStatus[] hashtableFiles = localFS.listStatus(localPath); int fileNumber = hashtableFiles.length; String[] fileNames = new String[fileNumber]; for (int i = 0; i < fileNumber; i++) { fileNames[i] = hashtableFiles[i].getPath().getName(); } // package and compress all the hashtable files to an archive file String stageId = this.getId(); String archiveFileName = Utilities.generateTarFileName(stageId); localwork.setStageID(stageId); CompressionUtils.tar(localPath.toUri().getPath(), fileNames, archiveFileName); Path archivePath = Utilities.generateTarPath(localPath, stageId); LOG.info("Archive " + hashtableFiles.length + " hash table files to " + archivePath); // upload archive file to hdfs Path hdfsFilePath = Utilities.generateTarPath(hdfsPath, stageId); short replication = (short) job.getInt("mapred.submit.replication", 10); hdfs.copyFromLocalFile(archivePath, hdfsFilePath); hdfs.setReplication(hdfsFilePath, replication); LOG.info("Upload 1 archive file from" + archivePath + " to: " + hdfsFilePath); // add the archive file to distributed cache DistributedCache.createSymlink(job); DistributedCache.addCacheArchive(hdfsFilePath.toUri(), job); LOG.info( "Add 1 archive file to distributed cache. Archive file: " + hdfsFilePath.toUri()); } } work.configureJobConf(job); List<Path> inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false); Utilities.setInputPaths(job, inputPaths); Utilities.setMapRedWork(job, work, ctx.getMRTmpPath()); if (mWork.getSamplingType() > 0 && rWork != null && job.getNumReduceTasks() > 1) { try { handleSampling(ctx, mWork, job); job.setPartitionerClass(HiveTotalOrderPartitioner.class); } catch (IllegalStateException e) { console.printInfo("Not enough sampling data.. Rolling back to single reducer task"); rWork.setNumReduceTasks(1); job.setNumReduceTasks(1); } catch (Exception e) { LOG.error("Sampling error", e); console.printError( e.toString(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); rWork.setNumReduceTasks(1); job.setNumReduceTasks(1); } } // remove the pwd from conf file so that job tracker doesn't show this // logs String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD); if (pwd != null) { HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE"); } JobClient jc = new JobClient(job); // make this client wait if job tracker is not behaving well. Throttle.checkJobTracker(job, LOG); if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) { // initialize stats publishing table StatsPublisher statsPublisher; StatsFactory factory = StatsFactory.newFactory(job); if (factory != null) { statsPublisher = factory.getStatsPublisher(); List<String> statsTmpDir = Utilities.getStatsTmpDirs(mWork, job); if (rWork != null) { statsTmpDir.addAll(Utilities.getStatsTmpDirs(rWork, job)); } StatsCollectionContext sc = new StatsCollectionContext(job); sc.setStatsTmpDirs(statsTmpDir); if (!statsPublisher.init(sc)) { // creating stats table if not exists if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) { throw new HiveException( ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg()); } } } } Utilities.createTmpDirs(job, mWork); Utilities.createTmpDirs(job, rWork); SessionState ss = SessionState.get(); if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") && ss != null) { TezSessionState session = ss.getTezSession(); TezSessionPoolManager.getInstance().close(session, true); } // Finally SUBMIT the JOB! rj = jc.submitJob(job); // replace it back if (pwd != null) { HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, pwd); } returnVal = jobExecHelper.progress(rj, jc, ctx.getHiveTxnManager()); success = (returnVal == 0); } catch (Exception e) { e.printStackTrace(); String mesg = " with exception '" + Utilities.getNameMessage(e) + "'"; if (rj != null) { mesg = "Ended Job = " + rj.getJobID() + mesg; } else { mesg = "Job Submission failed" + mesg; } // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); success = false; returnVal = 1; } finally { Utilities.clearWork(job); try { if (ctxCreated) { ctx.clear(); } if (rj != null) { if (returnVal != 0) { rj.killJob(); } jobID = rj.getID().toString(); } } catch (Exception e) { LOG.warn("Failed while cleaning up ", e); } finally { HadoopJobExecHelper.runningJobs.remove(rj); } } // get the list of Dynamic partition paths try { if (rj != null) { if (mWork.getAliasToWork() != null) { for (Operator<? extends OperatorDesc> op : mWork.getAliasToWork().values()) { op.jobClose(job, success); } } if (rWork != null) { rWork.getReducer().jobClose(job, success); } } } catch (Exception e) { // jobClose needs to execute successfully otherwise fail task if (success) { success = false; returnVal = 3; String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'"; console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); } } return (returnVal); }
private void run(String[] args) throws Exception { LlapOptionsProcessor optionsProcessor = new LlapOptionsProcessor(); LlapOptions options = optionsProcessor.processOptions(args); if (options == null) { // help return; } Path tmpDir = new Path(options.getDirectory()); if (conf == null) { throw new Exception("Cannot load any configuration to run command"); } FileSystem fs = FileSystem.get(conf); FileSystem lfs = FileSystem.getLocal(conf).getRawFileSystem(); // needed so that the file is actually loaded into configuration. for (String f : NEEDED_CONFIGS) { conf.addResource(f); if (conf.getResource(f) == null) { throw new Exception("Unable to find required config file: " + f); } } for (String f : OPTIONAL_CONFIGS) { conf.addResource(f); } conf.reloadConfiguration(); if (options.getName() != null) { // update service registry configs - caveat: this has nothing to do with the actual settings // as read by the AM // if needed, use --hiveconf llap.daemon.service.hosts=@llap0 to dynamically switch between // instances conf.set(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName()); } if (options.getSize() != -1) { if (options.getCache() != -1) { Preconditions.checkArgument( options.getCache() < options.getSize(), "Cache has to be smaller than the container sizing"); } if (options.getXmx() != -1) { Preconditions.checkArgument( options.getXmx() < options.getSize(), "Working memory has to be smaller than the container sizing"); } if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT)) { Preconditions.checkArgument( options.getXmx() + options.getCache() < options.getSize(), "Working memory + cache has to be smaller than the containing sizing "); } } final long minAlloc = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1); if (options.getSize() != -1) { final long containerSize = options.getSize() / (1024 * 1024); Preconditions.checkArgument( containerSize >= minAlloc, "Container size should be greater than minimum allocation(%s)", minAlloc + "m"); conf.setLong(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, containerSize); } if (options.getExecutors() != -1) { conf.setLong(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, options.getExecutors()); // TODO: vcpu settings - possibly when DRFA works right } if (options.getCache() != -1) { conf.setLong(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, options.getCache()); } if (options.getXmx() != -1) { // Needs more explanation here // Xmx is not the max heap value in JDK8 // You need to subtract 50% of the survivor fraction from this, to get actual usable memory // before it goes into GC conf.setLong( ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, (long) (options.getXmx()) / (1024 * 1024)); } for (Entry<Object, Object> props : options.getConfig().entrySet()) { conf.set((String) props.getKey(), (String) props.getValue()); } URL logger = conf.getResource("llap-daemon-log4j2.properties"); if (null == logger) { throw new Exception("Unable to find required config file: llap-daemon-log4j2.properties"); } Path home = new Path(System.getenv("HIVE_HOME")); Path scripts = new Path(new Path(new Path(home, "scripts"), "llap"), "bin"); if (!lfs.exists(home)) { throw new Exception("Unable to find HIVE_HOME:" + home); } else if (!lfs.exists(scripts)) { LOG.warn("Unable to find llap scripts:" + scripts); } Path libDir = new Path(tmpDir, "lib"); String tezLibs = conf.get("tez.lib.uris"); if (tezLibs == null) { LOG.warn("Missing tez.lib.uris in tez-site.xml"); } if (LOG.isDebugEnabled()) { LOG.debug("Copying tez libs from " + tezLibs); } lfs.mkdirs(libDir); fs.copyToLocalFile(new Path(tezLibs), new Path(libDir, "tez.tar.gz")); CompressionUtils.unTar(new Path(libDir, "tez.tar.gz").toString(), libDir.toString(), true); lfs.delete(new Path(libDir, "tez.tar.gz"), false); lfs.copyFromLocalFile(new Path(Utilities.jarFinderGetJar(LlapInputFormat.class)), libDir); lfs.copyFromLocalFile(new Path(Utilities.jarFinderGetJar(HiveInputFormat.class)), libDir); // copy default aux classes (json/hbase) for (String className : DEFAULT_AUX_CLASSES) { localizeJarForClass(lfs, libDir, className, false); } if (options.getIsHBase()) { try { localizeJarForClass(lfs, libDir, HBASE_SERDE_CLASS, true); Job fakeJob = new Job(new JobConf()); // HBase API is convoluted. TableMapReduceUtil.addDependencyJars(fakeJob); Collection<String> hbaseJars = fakeJob.getConfiguration().getStringCollection("tmpjars"); for (String jarPath : hbaseJars) { if (!jarPath.isEmpty()) { lfs.copyFromLocalFile(new Path(jarPath), libDir); } } } catch (Throwable t) { String err = "Failed to add HBase jars. Use --auxhbase=false to avoid localizing them"; LOG.error(err); System.err.println(err); throw new RuntimeException(t); } } String auxJars = options.getAuxJars(); if (auxJars != null && !auxJars.isEmpty()) { // TODO: transitive dependencies warning? String[] jarPaths = auxJars.split(","); for (String jarPath : jarPaths) { if (!jarPath.isEmpty()) { lfs.copyFromLocalFile(new Path(jarPath), libDir); } } } Path confPath = new Path(tmpDir, "conf"); lfs.mkdirs(confPath); for (String f : NEEDED_CONFIGS) { copyConfig(options, lfs, confPath, f); } for (String f : OPTIONAL_CONFIGS) { try { copyConfig(options, lfs, confPath, f); } catch (Throwable t) { LOG.info("Error getting an optional config " + f + "; ignoring: " + t.getMessage()); } } lfs.copyFromLocalFile(new Path(logger.toString()), confPath); // extract configs for processing by the python fragments in Slider JSONObject configs = new JSONObject(); configs.put( ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB)); configs.put( HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, HiveConf.getLongVar(conf, HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE)); configs.put( HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT.varname, HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT)); configs.put( ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB)); configs.put( ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE)); configs.put( ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_NUM_EXECUTORS)); configs.put( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1)); configs.put( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, -1)); FSDataOutputStream os = lfs.create(new Path(tmpDir, "config.json")); OutputStreamWriter w = new OutputStreamWriter(os); configs.write(w); w.close(); os.close(); lfs.close(); fs.close(); if (LOG.isDebugEnabled()) { LOG.debug("Exiting successfully"); } }