private void createBackgroundOperationPool() { int poolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS); LOG.info("HiveServer2: Background operation thread pool size: " + poolSize); int poolQueueSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_WAIT_QUEUE_SIZE); LOG.info("HiveServer2: Background operation thread wait queue size: " + poolQueueSize); long keepAliveTime = HiveConf.getTimeVar( hiveConf, ConfVars.HIVE_SERVER2_ASYNC_EXEC_KEEPALIVE_TIME, TimeUnit.SECONDS); LOG.info( "HiveServer2: Background operation thread keepalive time: " + keepAliveTime + " seconds"); // Create a thread pool with #poolSize threads // Threads terminate when they are idle for more than the keepAliveTime // A bounded blocking queue is used to queue incoming operations, if #operations > poolSize String threadPoolName = "HiveServer2-Background-Pool"; backgroundOperationPool = new ThreadPoolExecutor( poolSize, poolSize, keepAliveTime, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(poolQueueSize), new ThreadFactoryWithGarbageCleanup(threadPoolName)); backgroundOperationPool.allowCoreThreadTimeOut(true); checkInterval = HiveConf.getTimeVar( hiveConf, ConfVars.HIVE_SERVER2_SESSION_CHECK_INTERVAL, TimeUnit.MILLISECONDS); sessionTimeout = HiveConf.getTimeVar( hiveConf, ConfVars.HIVE_SERVER2_IDLE_SESSION_TIMEOUT, TimeUnit.MILLISECONDS); }
/* * Helper to determine the size of the container requested * from yarn. Falls back to Map-reduce's map size if tez * container size isn't set. */ private Resource getContainerResource(Configuration conf) { Resource containerResource; int memory = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE) > 0 ? HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE) : conf.getInt(MRJobConfig.MAP_MEMORY_MB, MRJobConfig.DEFAULT_MAP_MEMORY_MB); int cpus = conf.getInt(MRJobConfig.MAP_CPU_VCORES, MRJobConfig.DEFAULT_MAP_CPU_VCORES); return Resource.newInstance(memory, cpus); }
public MapJoinBytesTableContainer( Configuration hconf, MapJoinObjectSerDeContext valCtx, long keyCount, long memUsage) throws SerDeException { this( HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD), HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE), valCtx, keyCount, memUsage); }
/** * Add the StatsTask as a dependent task of the MoveTask because StatsTask will change the * Table/Partition metadata. For atomicity, we should not change it before the data is actually * there done by MoveTask. * * @param nd the FileSinkOperator whose results are taken care of by the MoveTask. * @param mvTask The MoveTask that moves the FileSinkOperator's results. * @param currTask The MapRedTask that the FileSinkOperator belongs to. * @param hconf HiveConf */ private void addStatsTask( FileSinkOperator nd, MoveTask mvTask, Task<? extends Serializable> currTask, HiveConf hconf) { MoveWork mvWork = ((MoveTask) mvTask).getWork(); StatsWork statsWork = null; if (mvWork.getLoadTableWork() != null) { statsWork = new StatsWork(mvWork.getLoadTableWork()); } else if (mvWork.getLoadFileWork() != null) { statsWork = new StatsWork(mvWork.getLoadFileWork()); } assert statsWork != null : "Error when genereting StatsTask"; statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); MapredWork mrWork = (MapredWork) currTask.getWork(); // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix // in FileSinkDesc is used for stats publishing. They should be consistent. statsWork.setAggKey(((FileSinkOperator) nd).getConf().getStatsAggPrefix()); Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf); // mark the MapredWork and FileSinkOperator for gathering stats nd.getConf().setGatherStats(true); mrWork.setGatheringStats(true); nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); nd.getConf() .setMaxStatsKeyPrefixLength(hconf.getIntVar(ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH)); // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName()); // subscribe feeds from the MoveTask so that MoveTask can forward the list // of dynamic partition list to the StatsTask mvTask.addDependentTask(statsTask); statsTask.subscribeFeed(mvTask); }
public HiveAuthFactory(HiveConf conf) throws TTransportException { this.conf = conf; saslMessageLimit = conf.getIntVar(ConfVars.HIVE_THRIFT_SASL_MESSAGE_LIMIT); String transTypeStr = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_TRANSPORT_MODE); String authTypeStr = conf.getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION); transportType = TransTypes.valueOf(transTypeStr.toUpperCase()); authType = authTypeStr == null ? transportType.getDefaultAuthType() : AuthTypes.valueOf(authTypeStr.toUpperCase()); if (transportType == TransTypes.BINARY && authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.name()) && ShimLoader.getHadoopShims().isSecureShimImpl()) { saslServer = ShimLoader.getHadoopThriftAuthBridge() .createServer( conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB), conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL)); // start delegation token manager try { saslServer.startDelegationTokenSecretManager(conf, null, ServerMode.HIVESERVER2); } catch (Exception e) { throw new TTransportException("Failed to start token manager", e); } } else { saslServer = null; } }
@Override public synchronized void init(HiveConf hiveConf) { if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) { initOperationLogCapture( hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL)); } else { LOG.debug("Operation level logging is turned off"); } if (hiveConf.isWebUiQueryInfoCacheEnabled()) { historicSqlOperations = new SQLOperationDisplayCache( hiveConf.getIntVar(ConfVars.HIVE_SERVER2_WEBUI_MAX_HISTORIC_QUERIES)); } super.init(hiveConf); }
private void validateFileMetadata() throws IOException { if (fileMetadata.getCompressionKind() == CompressionKind.NONE) return; int bufferSize = fileMetadata.getCompressionBufferSize(); int minAllocSize = HiveConf.getIntVar(conf, HiveConf.ConfVars.LLAP_ORC_CACHE_MIN_ALLOC); if (bufferSize < minAllocSize) { LOG.warn( "ORC compression buffer size (" + bufferSize + ") is smaller than LLAP low-level " + "cache minimum allocation size (" + minAllocSize + "). Decrease the value for " + HiveConf.ConfVars.LLAP_ORC_CACHE_MIN_ALLOC.toString() + " to avoid wasting memory"); } }
@Override public synchronized void init(HiveConf hiveConf) { this.hiveConf = hiveConf; sessionManager = new SessionManager(hiveServer2); defaultFetchRows = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_RESULTSET_DEFAULT_FETCH_SIZE); addService(sessionManager); // If the hadoop cluster is secure, do a kerberos login for the service from the keytab if (UserGroupInformation.isSecurityEnabled()) { try { HiveAuthFactory.loginFromKeytab(hiveConf); this.serviceUGI = Utils.getUGI(); } catch (IOException e) { throw new ServiceException("Unable to login to kerberos with given principal/keytab", e); } catch (LoginException e) { throw new ServiceException("Unable to login to kerberos with given principal/keytab", e); } // Also try creating a UGI object for the SPNego principal String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL); String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB); if (principal.isEmpty() || keyTabFile.isEmpty()) { LOG.info( "SPNego httpUGI not created, spNegoPrincipal: " + principal + ", ketabFile: " + keyTabFile); } else { try { this.httpUGI = HiveAuthFactory.loginFromSpnegoKeytabAndReturnUGI(hiveConf); LOG.info("SPNego httpUGI successfully created."); } catch (IOException e) { LOG.warn("SPNego httpUGI creation failed: ", e); } } } // creates connection to HMS and thus *must* occur after kerberos login above try { applyAuthorizationConfigPolicy(hiveConf); } catch (Exception e) { throw new RuntimeException( "Error applying authorization policy on hive configuration: " + e.getMessage(), e); } setupBlockedUdfs(); super.init(hiveConf); }
public QueryTracker(Configuration conf, String[] localDirsBase) { super("QueryTracker"); this.localDirsBase = localDirsBase; try { localFs = FileSystem.getLocal(conf); } catch (IOException e) { throw new RuntimeException("Failed to setup local filesystem instance", e); } this.defaultDeleteDelaySeconds = HiveConf.getTimeVar(conf, ConfVars.LLAP_FILE_CLEANUP_DELAY_SECONDS, TimeUnit.SECONDS); int numCleanerThreads = HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_NUM_FILE_CLEANER_THREADS); this.executorService = Executors.newScheduledThreadPool( numCleanerThreads, new ThreadFactoryBuilder() .setDaemon(true) .setNameFormat("QueryFileCleaner %d") .build()); }
/** * Find out if a job can be run in local mode based on it's characteristics * * @param conf Hive Configuration * @param numReducers total number of reducers for this job * @param inputLength the size of the input * @param inputFileCount the number of files of input * @return String null if job is eligible for local mode, reason otherwise */ public static String isEligibleForLocalMode( HiveConf conf, int numReducers, long inputLength, long inputFileCount) { long maxBytes = conf.getLongVar(HiveConf.ConfVars.LOCALMODEMAXBYTES); long maxInputFiles = conf.getIntVar(HiveConf.ConfVars.LOCALMODEMAXINPUTFILES); // check for max input size if (inputLength > maxBytes) { return "Input Size (= " + inputLength + ") is larger than " + HiveConf.ConfVars.LOCALMODEMAXBYTES.varname + " (= " + maxBytes + ")"; } // ideally we would like to do this check based on the number of splits // in the absence of an easy way to get the number of splits - do this // based on the total number of files (pessimistically assumming that // splits are equal to number of files in worst case) if (inputFileCount > maxInputFiles) { return "Number of Input Files (= " + inputFileCount + ") is larger than " + HiveConf.ConfVars.LOCALMODEMAXINPUTFILES.varname + "(= " + maxInputFiles + ")"; } // since local mode only runs with 1 reducers - make sure that the // the number of reducers (set by user or inferred) is <=1 if (numReducers > 1) { return "Number of reducers (= " + numReducers + ") is more than 1"; } return null; }
public Result invokeInternal(final Object proxy, final Method method, final Object[] args) throws Throwable { boolean gotNewConnectUrl = false; boolean reloadConf = HiveConf.getBoolVar(origConf, HiveConf.ConfVars.HMSHANDLERFORCERELOADCONF); long retryInterval = HiveConf.getTimeVar(origConf, HiveConf.ConfVars.HMSHANDLERINTERVAL, TimeUnit.MILLISECONDS); int retryLimit = HiveConf.getIntVar(origConf, HiveConf.ConfVars.HMSHANDLERATTEMPTS); long timeout = HiveConf.getTimeVar( origConf, HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, TimeUnit.MILLISECONDS); Deadline.registerIfNot(timeout); if (reloadConf) { MetaStoreInit.updateConnectionURL(origConf, getActiveConf(), null, metaStoreInitData); } int retryCount = 0; Throwable caughtException = null; while (true) { try { if (reloadConf || gotNewConnectUrl) { baseHandler.setConf(getActiveConf()); } Object object = null; boolean isStarted = Deadline.startTimer(method.getName()); try { object = method.invoke(baseHandler, args); } finally { if (isStarted) { Deadline.stopTimer(); } } return new Result(object, retryCount); } catch (javax.jdo.JDOException e) { caughtException = e; } catch (UndeclaredThrowableException e) { if (e.getCause() != null) { if (e.getCause() instanceof javax.jdo.JDOException) { // Due to reflection, the jdo exception is wrapped in // invocationTargetException caughtException = e.getCause(); } else if (e.getCause() instanceof MetaException && e.getCause().getCause() != null && e.getCause().getCause() instanceof javax.jdo.JDOException) { // The JDOException may be wrapped further in a MetaException caughtException = e.getCause().getCause(); } else { LOG.error(ExceptionUtils.getStackTrace(e.getCause())); throw e.getCause(); } } else { LOG.error(ExceptionUtils.getStackTrace(e)); throw e; } } catch (InvocationTargetException e) { if (e.getCause() instanceof javax.jdo.JDOException) { // Due to reflection, the jdo exception is wrapped in // invocationTargetException caughtException = e.getCause(); } else if (e.getCause() instanceof NoSuchObjectException || e.getTargetException().getCause() instanceof NoSuchObjectException) { String methodName = method.getName(); if (!methodName.startsWith("get_database") && !methodName.startsWith("get_table") && !methodName.startsWith("get_partition") && !methodName.startsWith("get_function")) { LOG.error(ExceptionUtils.getStackTrace(e.getCause())); } throw e.getCause(); } else if (e.getCause() instanceof MetaException && e.getCause().getCause() != null) { if (e.getCause().getCause() instanceof javax.jdo.JDOException || e.getCause().getCause() instanceof NucleusException) { // The JDOException or the Nucleus Exception may be wrapped further in a MetaException caughtException = e.getCause().getCause(); } else if (e.getCause().getCause() instanceof DeadlineException) { // The Deadline Exception needs no retry and be thrown immediately. Deadline.clear(); LOG.error( "Error happens in method " + method.getName() + ": " + ExceptionUtils.getStackTrace(e.getCause())); throw e.getCause(); } else { LOG.error(ExceptionUtils.getStackTrace(e.getCause())); throw e.getCause(); } } else { LOG.error(ExceptionUtils.getStackTrace(e.getCause())); throw e.getCause(); } } if (retryCount >= retryLimit) { LOG.error("HMSHandler Fatal error: " + ExceptionUtils.getStackTrace(caughtException)); // Since returning exceptions with a nested "cause" can be a problem in // Thrift, we are stuffing the stack trace into the message itself. throw new MetaException(ExceptionUtils.getStackTrace(caughtException)); } assert (retryInterval >= 0); retryCount++; LOG.error( String.format( "Retrying HMSHandler after %d ms (attempt %d of %d)", retryInterval, retryCount, retryLimit) + " with error: " + ExceptionUtils.getStackTrace(caughtException)); Thread.sleep(retryInterval); // If we have a connection error, the JDO connection URL hook might // provide us with a new URL to access the datastore. String lastUrl = MetaStoreInit.getConnectionURL(getActiveConf()); gotNewConnectUrl = MetaStoreInit.updateConnectionURL(origConf, getActiveConf(), lastUrl, metaStoreInitData); } }
private LlapIoImpl(Configuration conf) throws IOException { String ioMode = HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_IO_MEMORY_MODE); boolean useLowLevelCache = LlapIoImpl.MODE_CACHE.equalsIgnoreCase(ioMode), useAllocOnly = !useLowLevelCache && LlapIoImpl.MODE_ALLOCATOR.equalsIgnoreCase(ioMode); LOG.info("Initializing LLAP IO in {} mode", ioMode); String displayName = "LlapDaemonCacheMetrics-" + MetricsUtils.getHostName(); String sessionId = conf.get("llap.daemon.metrics.sessionid"); this.cacheMetrics = LlapDaemonCacheMetrics.create(displayName, sessionId); displayName = "LlapDaemonQueueMetrics-" + MetricsUtils.getHostName(); int[] intervals = conf.getInts(String.valueOf(HiveConf.ConfVars.LLAP_QUEUE_METRICS_PERCENTILE_INTERVALS)); this.queueMetrics = LlapDaemonQueueMetrics.create(displayName, sessionId, intervals); LOG.info( "Started llap daemon metrics with displayName: {} sessionId: {}", displayName, sessionId); OrcMetadataCache metadataCache = null; LowLevelCacheImpl orcCache = null; BufferUsageManager bufferManager = null; if (useLowLevelCache) { // Memory manager uses cache policy to trigger evictions, so create the policy first. boolean useLrfu = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_USE_LRFU); LowLevelCachePolicy cachePolicy = useLrfu ? new LowLevelLrfuCachePolicy(conf) : new LowLevelFifoCachePolicy(conf); // Allocator uses memory manager to request memory, so create the manager next. LowLevelCacheMemoryManager memManager = new LowLevelCacheMemoryManager(conf, cachePolicy, cacheMetrics); // Cache uses allocator to allocate and deallocate, create allocator and then caches. EvictionAwareAllocator allocator = new BuddyAllocator(conf, memManager, cacheMetrics); this.allocator = allocator; orcCache = new LowLevelCacheImpl(cacheMetrics, cachePolicy, allocator, true); metadataCache = new OrcMetadataCache(memManager, cachePolicy); // And finally cache policy uses cache to notify it of eviction. The cycle is complete! cachePolicy.setEvictionListener(new EvictionDispatcher(orcCache, metadataCache)); cachePolicy.setParentDebugDumper(orcCache); orcCache.init(); // Start the cache threads. bufferManager = orcCache; // Cache also serves as buffer manager. } else { if (useAllocOnly) { LowLevelCacheMemoryManager memManager = new LowLevelCacheMemoryManager(conf, null, cacheMetrics); allocator = new BuddyAllocator(conf, memManager, cacheMetrics); } else { allocator = new SimpleAllocator(conf); } bufferManager = new SimpleBufferManager(allocator, cacheMetrics); } // IO thread pool. Listening is used for unhandled errors for now (TODO: remove?) int numThreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.LLAP_IO_THREADPOOL_SIZE); executor = MoreExecutors.listeningDecorator( Executors.newFixedThreadPool( numThreads, new ThreadFactoryBuilder() .setNameFormat("IO-Elevator-Thread-%d") .setDaemon(true) .build())); // TODO: this should depends on input format and be in a map, or something. this.cvp = new OrcColumnVectorProducer( metadataCache, orcCache, bufferManager, conf, cacheMetrics, queueMetrics); LOG.info("LLAP IO initialized"); registerMXBeans(); }
private String showJobFailDebugInfo() throws IOException { console.printError("Error during job, obtaining debugging information..."); if (!conf.get("mapred.job.tracker", "local").equals("local")) { // Show Tracking URL for remotely running jobs. console.printError("Job Tracking URL: " + rj.getTrackingURL()); } // Loop to get all task completion events because getTaskCompletionEvents // only returns a subset per call TaskInfoGrabber tlg = new TaskInfoGrabber(); Thread t = new Thread(tlg); try { t.start(); t.join(HiveConf.getIntVar(conf, HiveConf.ConfVars.TASKLOG_DEBUG_TIMEOUT)); } catch (InterruptedException e) { console.printError( "Timed out trying to finish grabbing task log URLs, " + "some task info may be missing"); } // Remove failures for tasks that succeeded for (String task : successes) { failures.remove(task); } if (failures.keySet().size() == 0) { return null; } // Find the highest failure count computeMaxFailures(); // Display Error Message for tasks with the highest failure count String jtUrl = null; try { jtUrl = JobTrackerURLResolver.getURL(conf); } catch (Exception e) { console.printError("Unable to retrieve URL for Hadoop Task logs. " + e.getMessage()); } String msg = null; for (String task : failures.keySet()) { if (failures.get(task).intValue() == maxFailures) { TaskInfo ti = taskIdToInfo.get(task); String jobId = ti.getJobId(); String taskUrl = (jtUrl == null) ? null : jtUrl + "/taskdetails.jsp?jobid=" + jobId + "&tipid=" + task.toString(); TaskLogProcessor tlp = new TaskLogProcessor(conf); for (String logUrl : ti.getLogUrls()) { tlp.addTaskAttemptLogUrl(logUrl); } if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.JOB_DEBUG_CAPTURE_STACKTRACES) && stackTraces != null) { if (!stackTraces.containsKey(jobId)) { stackTraces.put(jobId, new ArrayList<List<String>>()); } stackTraces.get(jobId).addAll(tlp.getStackTraces()); } if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.SHOW_JOB_FAIL_DEBUG_INFO)) { List<ErrorAndSolution> errors = tlp.getErrors(); StringBuilder sb = new StringBuilder(); // We use a StringBuilder and then call printError only once as // printError will write to both stderr and the error log file. In // situations where both the stderr and the log file output is // simultaneously output to a single stream, this will look cleaner. sb.append("\n"); sb.append("Task with the most failures(" + maxFailures + "): \n"); sb.append("-----\n"); sb.append("Task ID:\n " + task + "\n\n"); if (taskUrl != null) { sb.append("URL:\n " + taskUrl + "\n"); } for (ErrorAndSolution e : errors) { sb.append("\n"); sb.append("Possible error:\n " + e.getError() + "\n\n"); sb.append("Solution:\n " + e.getSolution() + "\n"); } sb.append("-----\n"); sb.append("Diagnostic Messages for this Task:\n"); String[] diagMesgs = ti.getDiagnosticMesgs(); for (String mesg : diagMesgs) { sb.append(mesg + "\n"); } msg = sb.toString(); console.printError(msg); } // Only print out one task because that's good enough for debugging. break; } } return msg; }
private void run(String[] args) throws Exception { LlapOptionsProcessor optionsProcessor = new LlapOptionsProcessor(); LlapOptions options = optionsProcessor.processOptions(args); if (options == null) { // help return; } Path tmpDir = new Path(options.getDirectory()); if (conf == null) { throw new Exception("Cannot load any configuration to run command"); } FileSystem fs = FileSystem.get(conf); FileSystem lfs = FileSystem.getLocal(conf).getRawFileSystem(); // needed so that the file is actually loaded into configuration. for (String f : NEEDED_CONFIGS) { conf.addResource(f); if (conf.getResource(f) == null) { throw new Exception("Unable to find required config file: " + f); } } for (String f : OPTIONAL_CONFIGS) { conf.addResource(f); } conf.reloadConfiguration(); if (options.getName() != null) { // update service registry configs - caveat: this has nothing to do with the actual settings // as read by the AM // if needed, use --hiveconf llap.daemon.service.hosts=@llap0 to dynamically switch between // instances conf.set(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName()); } if (options.getSize() != -1) { if (options.getCache() != -1) { Preconditions.checkArgument( options.getCache() < options.getSize(), "Cache has to be smaller than the container sizing"); } if (options.getXmx() != -1) { Preconditions.checkArgument( options.getXmx() < options.getSize(), "Working memory has to be smaller than the container sizing"); } if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT)) { Preconditions.checkArgument( options.getXmx() + options.getCache() < options.getSize(), "Working memory + cache has to be smaller than the containing sizing "); } } final long minAlloc = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1); if (options.getSize() != -1) { final long containerSize = options.getSize() / (1024 * 1024); Preconditions.checkArgument( containerSize >= minAlloc, "Container size should be greater than minimum allocation(%s)", minAlloc + "m"); conf.setLong(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, containerSize); } if (options.getExecutors() != -1) { conf.setLong(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, options.getExecutors()); // TODO: vcpu settings - possibly when DRFA works right } if (options.getCache() != -1) { conf.setLong(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, options.getCache()); } if (options.getXmx() != -1) { // Needs more explanation here // Xmx is not the max heap value in JDK8 // You need to subtract 50% of the survivor fraction from this, to get actual usable memory // before it goes into GC conf.setLong( ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, (long) (options.getXmx()) / (1024 * 1024)); } for (Entry<Object, Object> props : options.getConfig().entrySet()) { conf.set((String) props.getKey(), (String) props.getValue()); } URL logger = conf.getResource("llap-daemon-log4j2.properties"); if (null == logger) { throw new Exception("Unable to find required config file: llap-daemon-log4j2.properties"); } Path home = new Path(System.getenv("HIVE_HOME")); Path scripts = new Path(new Path(new Path(home, "scripts"), "llap"), "bin"); if (!lfs.exists(home)) { throw new Exception("Unable to find HIVE_HOME:" + home); } else if (!lfs.exists(scripts)) { LOG.warn("Unable to find llap scripts:" + scripts); } Path libDir = new Path(tmpDir, "lib"); String tezLibs = conf.get("tez.lib.uris"); if (tezLibs == null) { LOG.warn("Missing tez.lib.uris in tez-site.xml"); } if (LOG.isDebugEnabled()) { LOG.debug("Copying tez libs from " + tezLibs); } lfs.mkdirs(libDir); fs.copyToLocalFile(new Path(tezLibs), new Path(libDir, "tez.tar.gz")); CompressionUtils.unTar(new Path(libDir, "tez.tar.gz").toString(), libDir.toString(), true); lfs.delete(new Path(libDir, "tez.tar.gz"), false); lfs.copyFromLocalFile(new Path(Utilities.jarFinderGetJar(LlapInputFormat.class)), libDir); lfs.copyFromLocalFile(new Path(Utilities.jarFinderGetJar(HiveInputFormat.class)), libDir); // copy default aux classes (json/hbase) for (String className : DEFAULT_AUX_CLASSES) { localizeJarForClass(lfs, libDir, className, false); } if (options.getIsHBase()) { try { localizeJarForClass(lfs, libDir, HBASE_SERDE_CLASS, true); Job fakeJob = new Job(new JobConf()); // HBase API is convoluted. TableMapReduceUtil.addDependencyJars(fakeJob); Collection<String> hbaseJars = fakeJob.getConfiguration().getStringCollection("tmpjars"); for (String jarPath : hbaseJars) { if (!jarPath.isEmpty()) { lfs.copyFromLocalFile(new Path(jarPath), libDir); } } } catch (Throwable t) { String err = "Failed to add HBase jars. Use --auxhbase=false to avoid localizing them"; LOG.error(err); System.err.println(err); throw new RuntimeException(t); } } String auxJars = options.getAuxJars(); if (auxJars != null && !auxJars.isEmpty()) { // TODO: transitive dependencies warning? String[] jarPaths = auxJars.split(","); for (String jarPath : jarPaths) { if (!jarPath.isEmpty()) { lfs.copyFromLocalFile(new Path(jarPath), libDir); } } } Path confPath = new Path(tmpDir, "conf"); lfs.mkdirs(confPath); for (String f : NEEDED_CONFIGS) { copyConfig(options, lfs, confPath, f); } for (String f : OPTIONAL_CONFIGS) { try { copyConfig(options, lfs, confPath, f); } catch (Throwable t) { LOG.info("Error getting an optional config " + f + "; ignoring: " + t.getMessage()); } } lfs.copyFromLocalFile(new Path(logger.toString()), confPath); // extract configs for processing by the python fragments in Slider JSONObject configs = new JSONObject(); configs.put( ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB)); configs.put( HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, HiveConf.getLongVar(conf, HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE)); configs.put( HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT.varname, HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT)); configs.put( ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB)); configs.put( ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE)); configs.put( ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_NUM_EXECUTORS)); configs.put( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1)); configs.put( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, -1)); FSDataOutputStream os = lfs.create(new Path(tmpDir, "config.json")); OutputStreamWriter w = new OutputStreamWriter(os); configs.write(w); w.close(); os.close(); lfs.close(); fs.close(); if (LOG.isDebugEnabled()) { LOG.debug("Exiting successfully"); } }
public PTFPartition(HiveConf cfg, SerDe serDe, StructObjectInspector oI) throws HiveException { String partitionClass = HiveConf.getVar(cfg, ConfVars.HIVE_PTF_PARTITION_PERSISTENCE_CLASS); int partitionMemSize = HiveConf.getIntVar(cfg, ConfVars.HIVE_PTF_PARTITION_PERSISTENT_SIZE); init(partitionClass, partitionMemSize, serDe, oI); }