public static void main(String[] args) throws Exception { Thread.setDefaultUncaughtExceptionHandler(new LlapDaemonUncaughtExceptionHandler()); LlapDaemon llapDaemon = null; try { // Cache settings will need to be setup in llap-daemon-site.xml - since the daemons don't read // hive-site.xml // Ideally, these properties should be part of LlapDameonConf rather than HiveConf LlapConfiguration daemonConf = new LlapConfiguration(); int numExecutors = daemonConf.getInt( LlapConfiguration.LLAP_DAEMON_NUM_EXECUTORS, LlapConfiguration.LLAP_DAEMON_NUM_EXECUTORS_DEFAULT); String[] localDirs = daemonConf.getTrimmedStrings(LlapConfiguration.LLAP_DAEMON_WORK_DIRS); int rpcPort = daemonConf.getInt( LlapConfiguration.LLAP_DAEMON_RPC_PORT, LlapConfiguration.LLAP_DAEMON_RPC_PORT_DEFAULT); int shufflePort = daemonConf.getInt( ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, ShuffleHandler.DEFAULT_SHUFFLE_PORT); long executorMemoryBytes = daemonConf.getInt( LlapConfiguration.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB, LlapConfiguration.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB_DEFAULT) * 1024l * 1024l; long cacheMemoryBytes = HiveConf.getLongVar(daemonConf, HiveConf.ConfVars.LLAP_ORC_CACHE_MAX_SIZE); boolean isDirectCache = HiveConf.getBoolVar(daemonConf, HiveConf.ConfVars.LLAP_ORC_CACHE_ALLOCATE_DIRECT); boolean llapIoEnabled = HiveConf.getBoolVar(daemonConf, HiveConf.ConfVars.LLAP_IO_ENABLED); llapDaemon = new LlapDaemon( daemonConf, numExecutors, executorMemoryBytes, llapIoEnabled, isDirectCache, cacheMemoryBytes, localDirs, rpcPort, shufflePort); LOG.info("Adding shutdown hook for LlapDaemon"); ShutdownHookManager.addShutdownHook(new CompositeServiceShutdownHook(llapDaemon), 1); llapDaemon.init(daemonConf); llapDaemon.start(); LOG.info("Started LlapDaemon"); // Relying on the RPC threads to keep the service alive. } catch (Throwable t) { // TODO Replace this with a ExceptionHandler / ShutdownHook LOG.warn("Failed to start LLAP Daemon with exception", t); if (llapDaemon != null) { llapDaemon.shutdown(); } System.exit(-1); } }
/** * Add the StatsTask as a dependent task of the MoveTask because StatsTask will change the * Table/Partition metadata. For atomicity, we should not change it before the data is actually * there done by MoveTask. * * @param nd the FileSinkOperator whose results are taken care of by the MoveTask. * @param mvTask The MoveTask that moves the FileSinkOperator's results. * @param currTask The MapRedTask that the FileSinkOperator belongs to. * @param hconf HiveConf */ private void addStatsTask( FileSinkOperator nd, MoveTask mvTask, Task<? extends Serializable> currTask, HiveConf hconf) { MoveWork mvWork = ((MoveTask) mvTask).getWork(); StatsWork statsWork = null; if (mvWork.getLoadTableWork() != null) { statsWork = new StatsWork(mvWork.getLoadTableWork()); } else if (mvWork.getLoadFileWork() != null) { statsWork = new StatsWork(mvWork.getLoadFileWork()); } assert statsWork != null : "Error when genereting StatsTask"; statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); MapredWork mrWork = (MapredWork) currTask.getWork(); // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix // in FileSinkDesc is used for stats publishing. They should be consistent. statsWork.setAggKey(((FileSinkOperator) nd).getConf().getStatsAggPrefix()); Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf); // mark the MapredWork and FileSinkOperator for gathering stats nd.getConf().setGatherStats(true); mrWork.setGatheringStats(true); nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); nd.getConf() .setMaxStatsKeyPrefixLength(hconf.getIntVar(ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH)); // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName()); // subscribe feeds from the MoveTask so that MoveTask can forward the list // of dynamic partition list to the StatsTask mvTask.addDependentTask(statsTask); statsTask.subscribeFeed(mvTask); }
public ThriftHttpServlet( TProcessor processor, TProtocolFactory protocolFactory, String authType, UserGroupInformation serviceUGI, UserGroupInformation httpUGI, HiveAuthFactory hiveAuthFactory) { super(processor, protocolFactory); this.authType = authType; this.serviceUGI = serviceUGI; this.httpUGI = httpUGI; this.hiveAuthFactory = hiveAuthFactory; this.isCookieAuthEnabled = hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_AUTH_ENABLED); // Initialize the cookie based authentication related variables. if (isCookieAuthEnabled) { // Generate the signer with secret. String secret = Long.toString(RAN.nextLong()); LOG.debug("Using the random number as the secret for cookie generation " + secret); this.signer = new CookieSigner(secret.getBytes()); this.cookieMaxAge = (int) hiveConf.getTimeVar( ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_MAX_AGE, TimeUnit.SECONDS); this.cookieDomain = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_DOMAIN); this.cookiePath = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_PATH); // always send secure cookies for SSL mode this.isCookieSecure = hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_USE_SSL); this.isHttpOnlyCookie = hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_HTTPONLY); } }
/** * File Sink Operator encountered. * * @param nd the file sink operator encountered * @param opProcCtx context */ public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { GenMRProcContext ctx = (GenMRProcContext) opProcCtx; ParseContext parseCtx = ctx.getParseCtx(); boolean chDir = false; Task<? extends Serializable> currTask = ctx.getCurrTask(); FileSinkOperator fsOp = (FileSinkOperator) nd; boolean isInsertTable = // is INSERT OVERWRITE TABLE fsOp.getConf().getTableInfo().getTableName() != null && parseCtx.getQB().getParseInfo().isInsertToTable(); HiveConf hconf = parseCtx.getConf(); // Has the user enabled merging of files for map-only jobs or for all jobs if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) { List<Task<? extends Serializable>> mvTasks = ctx.getMvTask(); // In case of unions or map-joins, it is possible that the file has // already been seen. // So, no need to attempt to merge the files again. if ((ctx.getSeenFileSinkOps() == null) || (!ctx.getSeenFileSinkOps().contains(nd))) { // no need of merging if the move is to a local file system MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp); if (isInsertTable && hconf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf()); } if ((mvTask != null) && !mvTask.isLocal()) { // There are separate configuration parameters to control whether to // merge for a map-only job // or for a map-reduce job MapredWork currWork = (MapredWork) currTask.getWork(); boolean mergeMapOnly = hconf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES) && currWork.getReducer() == null; boolean mergeMapRed = hconf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES) && currWork.getReducer() != null; if (mergeMapOnly || mergeMapRed) { chDir = true; } } } } String finalName = processFS(nd, stack, opProcCtx, chDir); // need to merge the files in the destination table/partitions if (chDir && (finalName != null)) { createMergeJob((FileSinkOperator) nd, ctx, finalName); } return null; }
/** * create a new plan and return. * * @return the new plan */ public static MapredWork getMapRedWork(HiveConf conf) { MapredWork work = new MapredWork(); // This code has been only added for testing boolean mapperCannotSpanPartns = conf.getBoolVar(HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS); work.setMapperCannotSpanPartns(mapperCannotSpanPartns); work.setPathToAliases(new LinkedHashMap<String, ArrayList<String>>()); work.setPathToPartitionInfo(new LinkedHashMap<String, PartitionDesc>()); work.setAliasToWork(new LinkedHashMap<String, Operator<? extends Serializable>>()); work.setTagToValueDesc(new ArrayList<TableDesc>()); work.setReducer(null); work.setHadoopSupportsSplittable( conf.getBoolVar(HiveConf.ConfVars.HIVE_COMBINE_INPUT_FORMAT_SUPPORTS_SPLITTABLE)); return work; }
public int processLine(String line) { int lastRet = 0, ret = 0; String command = ""; for (String oneCmd : line.split(";")) { if (StringUtils.endsWith(oneCmd, "\\")) { command += StringUtils.chop(oneCmd) + ";"; continue; } else { command += oneCmd; } if (StringUtils.isBlank(command)) continue; try { ret = processCmd(command); } catch (TException e) { e.printStackTrace(); } catch (HiveException e) { e.printStackTrace(); } command = ""; lastRet = ret; boolean ignoreErrors = HiveConf.getBoolVar(conf, HiveConf.ConfVars.CLIIGNOREERRORS); if (ret != 0 && !ignoreErrors) { return ret; } } return lastRet; }
public boolean getIsSilent() { if (conf != null) { return conf.getBoolVar(HiveConf.ConfVars.HIVESESSIONSILENT); } else { return isSilent; } }
public RowSet getOperationLogRowSet( OperationHandle opHandle, FetchOrientation orientation, long maxRows, HiveConf hConf) throws HiveSQLException { TableSchema tableSchema = new TableSchema(getLogSchema()); RowSet rowSet = RowSetFactory.create(tableSchema, getOperation(opHandle).getProtocolVersion()); if (hConf.getBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED) == false) { LOG.warn( "Try to get operation log when hive.server2.logging.operation.enabled is false, no log will be returned. "); return rowSet; } // get the OperationLog object from the operation OperationLog operationLog = getOperation(opHandle).getOperationLog(); if (operationLog == null) { throw new HiveSQLException("Couldn't find log associated with operation handle: " + opHandle); } // read logs List<String> logs; try { logs = operationLog.readOperationLog(isFetchFirst(orientation), maxRows); } catch (SQLException e) { throw new HiveSQLException(e.getMessage(), e.getCause()); } // convert logs to RowSet for (String log : logs) { rowSet.addRow(new String[] {log}); } return rowSet; }
public void closeSession(SessionHandle sessionHandle) throws HiveSQLException { HiveSession session = handleToSession.remove(sessionHandle); if (session == null) { throw new HiveSQLException("Session does not exist!"); } session.close(); // Shutdown HiveServer2 if it has been deregistered from ZooKeeper and has no active sessions if (!(hiveServer2 == null) && (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_SUPPORT_DYNAMIC_SERVICE_DISCOVERY)) && (!hiveServer2.isRegisteredWithZooKeeper())) { // Asynchronously shutdown this instance of HiveServer2, // if there are no active client sessions if (getOpenSessionCount() == 0) { LOG.info( "This instance of HiveServer2 has been removed from the list of server " + "instances available for dynamic service discovery. " + "The last client session has ended - will shutdown now."); Thread shutdownThread = new Thread() { @Override public void run() { hiveServer2.stop(); } }; shutdownThread.start(); } } }
/** Update the options after connection is established in CLI mode. */ public void updateBeeLineOptsFromConf() { if (!beeLine.isBeeLine()) { if (conf == null) { conf = beeLine.getCommands().getHiveConf(false); } setForce(HiveConf.getBoolVar(conf, HiveConf.ConfVars.CLIIGNOREERRORS)); } }
public boolean getShowHeader() { if (beeLine.isBeeLine()) { return showHeader; } else { boolean header; HiveConf conf = beeLine.getCommands().getHiveConf(true); header = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CLI_PRINT_HEADER); return header; } }
@Override public synchronized void init(HiveConf hiveConf) { this.hiveConf = hiveConf; // Create operation log root directory, if operation logging is enabled if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) { initOperationLogRootDir(); } createBackgroundOperationPool(); addService(operationManager); super.init(hiveConf); }
/* * Helper function to create JobConf for specific ReduceWork. */ private JobConf initializeVertexConf(JobConf baseConf, ReduceWork reduceWork) { JobConf conf = new JobConf(baseConf); conf.set("mapred.reducer.class", ExecReducer.class.getName()); boolean useSpeculativeExecReducers = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS); HiveConf.setBoolVar( conf, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, useSpeculativeExecReducers); return conf; }
/** * Create a vertex from a given work object. * * @param conf JobConf to be used to this execution unit * @param work The instance of BaseWork representing the actual work to be performed by this * vertex. * @param scratchDir HDFS scratch dir for this execution unit. * @param list * @param appJarLr Local resource for hive-exec. * @param additionalLr * @param fileSystem FS corresponding to scratchDir and LocalResources * @param ctx This query's context * @return Vertex */ public Vertex createVertex( JobConf conf, BaseWork work, Path scratchDir, LocalResource appJarLr, List<LocalResource> additionalLr, FileSystem fileSystem, Context ctx, boolean hasChildren, TezWork tezWork) throws Exception { Vertex v = null; // simply dispatch the call to the right method for the actual (sub-) type of // BaseWork. if (work instanceof MapWork) { v = createVertex( conf, (MapWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx, tezWork); } else if (work instanceof ReduceWork) { v = createVertex( conf, (ReduceWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx); } else { // something is seriously wrong if this is happening throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg()); } // initialize stats publisher if necessary if (work.isGatheringStats()) { StatsPublisher statsPublisher; StatsFactory factory = StatsFactory.newFactory(conf); if (factory != null) { statsPublisher = factory.getStatsPublisher(); if (!statsPublisher.init(conf)) { // creating stats table if not exists if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) { throw new HiveException( ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg()); } } } } // final vertices need to have at least one output if (!hasChildren) { v.addOutput( "out_" + work.getName(), new OutputDescriptor(MROutput.class.getName()) .setUserPayload(MRHelpers.createUserPayloadFromConf(conf))); } return v; }
public static boolean checkAndSetAsyncLogging(final Configuration conf) { final boolean asyncLogging = HiveConf.getBoolVar(conf, ConfVars.HIVE_ASYNC_LOG_ENABLED); if (asyncLogging) { System.setProperty( "Log4jContextSelector", "org.apache.logging.log4j.core.async.AsyncLoggerContextSelector"); // default is ClassLoaderContextSelector which is created during automatic logging // initialization in a static initialization block. // Changing ContextSelector at runtime requires creating new context factory which will // internally create new context selector based on system property. LogManager.setFactory(new Log4jContextFactory()); } return asyncLogging; }
public SessionState(HiveConf conf, String userName) { this.conf = conf; this.userName = userName; isSilent = conf.getBoolVar(HiveConf.ConfVars.HIVESESSIONSILENT); ls = new LineageState(); // Must be deterministic order map for consistent q-test output across Java versions overriddenConfigurations = new LinkedHashMap<String, String>(); overriddenConfigurations.putAll(HiveConf.getConfSystemProperties()); // if there isn't already a session name, go ahead and create it. if (StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HIVESESSIONID))) { conf.setVar(HiveConf.ConfVars.HIVESESSIONID, makeSessionId()); } parentLoader = JavaUtils.getClassLoader(); }
@Override public synchronized void init(HiveConf hiveConf) { if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) { initOperationLogCapture( hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL)); } else { LOG.debug("Operation level logging is turned off"); } if (hiveConf.isWebUiQueryInfoCacheEnabled()) { historicSqlOperations = new SQLOperationDisplayCache( hiveConf.getIntVar(ConfVars.HIVE_SERVER2_WEBUI_MAX_HISTORIC_QUERIES)); } super.init(hiveConf); }
private void generateJDOFilterOverPartitions( Configuration conf, Table table, Map<String, Object> params, FilterBuilder filterBuilder) throws MetaException { int partitionColumnCount = table.getPartitionKeys().size(); int partitionColumnIndex = getPartColIndexForFilter(table, filterBuilder); if (filterBuilder.hasError()) return; boolean canPushDownIntegral = HiveConf.getBoolVar(conf, HiveConf.ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN); String valueAsString = getJdoFilterPushdownParam( table, partitionColumnIndex, filterBuilder, canPushDownIntegral); if (filterBuilder.hasError()) return; String paramName = PARAM_PREFIX + params.size(); params.put(paramName, valueAsString); boolean isOpEquals = operator == Operator.EQUALS; if (isOpEquals || operator == Operator.NOTEQUALS || operator == Operator.NOTEQUALS2) { makeFilterForEquals( keyName, valueAsString, paramName, params, partitionColumnIndex, partitionColumnCount, isOpEquals, filterBuilder); return; } // get the value for a partition key form MPartition.values (PARTITION_KEY_VALUES) String valString = "values.get(" + partitionColumnIndex + ")"; if (operator == Operator.LIKE) { if (isReverseOrder) { // For LIKE, the value should be on the RHS. filterBuilder.setError( "Value should be on the RHS for LIKE operator : Key <" + keyName + ">"); } // TODO: in all likelihood, this won't actually work. Keep it for backward compat. filterBuilder.append(" " + valString + "." + operator.getJdoOp() + "(" + paramName + ") "); } else { filterBuilder.append( isReverseOrder ? paramName + " " + operator.getJdoOp() + " " + valString : " " + valString + " " + operator.getJdoOp() + " " + paramName); } }
/** * Test setting {@link HiveConf.ConfVars}} config parameter HIVE_SERVER2_ENABLE_DOAS for unsecure * mode */ public void testDoAsSetting() { HiveConf hconf = new HiveConf(); assertTrue( "default value of hive server2 doAs should be true", hconf.getBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS)); CLIService cliService = new CLIService(); cliService.init(hconf); ThriftCLIService tcliService = new ThriftCLIService(cliService); tcliService.init(hconf); TProcessorFactory procFactory = PlainSaslHelper.getPlainProcessorFactory(tcliService); assertEquals( "doAs enabled processor for unsecure mode", procFactory.getProcessor(null).getClass(), TUGIContainingProcessor.class); }
/** * Create a MapReduce job for a particular partition if Hadoop version is pre 0.20, otherwise * create a Map-only job using CombineHiveInputFormat for all partitions. * * @param fsOp The FileSink operator. * @param ctx The MR processing context. * @param finalName the final destination path the merge job should output. * @throws SemanticException */ private void createMergeJob(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName) throws SemanticException { // if the hadoop version support CombineFileInputFormat (version >= 0.20), // create a Map-only job for merge, otherwise create a MapReduce merge job. ParseContext parseCtx = ctx.getParseCtx(); HiveConf conf = parseCtx.getConf(); if (conf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPONLY) && Utilities.supportCombineFileInputFormat()) { // create Map-only merge job createMap4Merge(fsOp, ctx, finalName); LOG.info("use CombineHiveInputformat for the merge job"); } else { createMapReduce4Merge(fsOp, ctx, finalName); LOG.info("use HiveInputFormat for the merge job"); } }
public static SplitLocationProvider getSplitLocationProvider(Configuration conf, Logger LOG) throws IOException { boolean useCustomLocations = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS); SplitLocationProvider splitLocationProvider; LOG.info("SplitGenerator using llap affinitized locations: " + useCustomLocations); if (useCustomLocations) { LlapRegistryService serviceRegistry; serviceRegistry = LlapRegistryService.getClient(conf); Collection<ServiceInstance> serviceInstances = serviceRegistry.getInstances().getAllInstancesOrdered(true); ArrayList<String> locations = new ArrayList<>(serviceInstances.size()); for (ServiceInstance serviceInstance : serviceInstances) { if (LOG.isDebugEnabled()) { LOG.debug( "Adding " + serviceInstance.getWorkerIdentity() + " with hostname=" + serviceInstance.getHost() + " to list for split locations"); } locations.add(serviceInstance.getHost()); } splitLocationProvider = new HostAffinitySplitLocationProvider(locations); } else { splitLocationProvider = new SplitLocationProvider() { @Override public String[] getLocations(InputSplit split) throws IOException { if (split == null) { return null; } String[] locations = split.getLocations(); if (locations != null && locations.length == 1) { if ("localhost".equals(locations[0])) { return ArrayUtils.EMPTY_STRING_ARRAY; } } return locations; } }; } return splitLocationProvider; }
private FunctionInfo getQualifiedFunctionInfoUnderLock(String qualifiedName) throws SemanticException { FunctionInfo info = mFunctions.get(qualifiedName); if (info != null && info.isBlockedFunction()) { throw new SemanticException("UDF " + qualifiedName + " is not allowed"); } if (!isNative && info != null && info.isDiscarded()) { // the persistent function is discarded. try reload mFunctions.remove(qualifiedName); return null; } // HIVE-6672: In HiveServer2 the JARs for this UDF may have been loaded by a different thread, // and the current thread may not be able to resolve the UDF. Test for this condition // and if necessary load the JARs in this thread. if (isNative && info != null && info.isPersistent()) { return registerToSessionRegistry(qualifiedName, info); } if (info != null || !isNative) { return info; // We have the UDF, or we are in the session registry (or both). } // If we are in the system registry and this feature is enabled, try to get it from metastore. SessionState ss = SessionState.get(); HiveConf conf = (ss == null) ? null : ss.getConf(); if (conf == null || !HiveConf.getBoolVar(conf, ConfVars.HIVE_ALLOW_UDF_LOAD_ON_DEMAND)) { return null; } // This is a little bit weird. We'll do the MS call outside of the lock. Our caller calls us // under lock, so we'd preserve the lock state for them; their finally block will release the // lock correctly. See the comment on the lock field - the locking needs to be reworked. lock.unlock(); try { return getFunctionInfoFromMetastoreNoLock(qualifiedName, conf); } finally { lock.lock(); } }
private LlapIoImpl(Configuration conf) throws IOException { String ioMode = HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_IO_MEMORY_MODE); boolean useLowLevelCache = LlapIoImpl.MODE_CACHE.equalsIgnoreCase(ioMode), useAllocOnly = !useLowLevelCache && LlapIoImpl.MODE_ALLOCATOR.equalsIgnoreCase(ioMode); LOG.info("Initializing LLAP IO in {} mode", ioMode); String displayName = "LlapDaemonCacheMetrics-" + MetricsUtils.getHostName(); String sessionId = conf.get("llap.daemon.metrics.sessionid"); this.cacheMetrics = LlapDaemonCacheMetrics.create(displayName, sessionId); displayName = "LlapDaemonQueueMetrics-" + MetricsUtils.getHostName(); int[] intervals = conf.getInts(String.valueOf(HiveConf.ConfVars.LLAP_QUEUE_METRICS_PERCENTILE_INTERVALS)); this.queueMetrics = LlapDaemonQueueMetrics.create(displayName, sessionId, intervals); LOG.info( "Started llap daemon metrics with displayName: {} sessionId: {}", displayName, sessionId); OrcMetadataCache metadataCache = null; LowLevelCacheImpl orcCache = null; BufferUsageManager bufferManager = null; if (useLowLevelCache) { // Memory manager uses cache policy to trigger evictions, so create the policy first. boolean useLrfu = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_USE_LRFU); LowLevelCachePolicy cachePolicy = useLrfu ? new LowLevelLrfuCachePolicy(conf) : new LowLevelFifoCachePolicy(conf); // Allocator uses memory manager to request memory, so create the manager next. LowLevelCacheMemoryManager memManager = new LowLevelCacheMemoryManager(conf, cachePolicy, cacheMetrics); // Cache uses allocator to allocate and deallocate, create allocator and then caches. EvictionAwareAllocator allocator = new BuddyAllocator(conf, memManager, cacheMetrics); this.allocator = allocator; orcCache = new LowLevelCacheImpl(cacheMetrics, cachePolicy, allocator, true); metadataCache = new OrcMetadataCache(memManager, cachePolicy); // And finally cache policy uses cache to notify it of eviction. The cycle is complete! cachePolicy.setEvictionListener(new EvictionDispatcher(orcCache, metadataCache)); cachePolicy.setParentDebugDumper(orcCache); orcCache.init(); // Start the cache threads. bufferManager = orcCache; // Cache also serves as buffer manager. } else { if (useAllocOnly) { LowLevelCacheMemoryManager memManager = new LowLevelCacheMemoryManager(conf, null, cacheMetrics); allocator = new BuddyAllocator(conf, memManager, cacheMetrics); } else { allocator = new SimpleAllocator(conf); } bufferManager = new SimpleBufferManager(allocator, cacheMetrics); } // IO thread pool. Listening is used for unhandled errors for now (TODO: remove?) int numThreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.LLAP_IO_THREADPOOL_SIZE); executor = MoreExecutors.listeningDecorator( Executors.newFixedThreadPool( numThreads, new ThreadFactoryBuilder() .setNameFormat("IO-Elevator-Thread-%d") .setDaemon(true) .build())); // TODO: this should depends on input format and be in a map, or something. this.cvp = new OrcColumnVectorProducer( metadataCache, orcCache, bufferManager, conf, cacheMetrics, queueMetrics); LOG.info("LLAP IO initialized"); registerMXBeans(); }
/** * File Sink Operator encountered. * * @param nd the file sink operator encountered * @param opProcCtx context */ public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { GenMRProcContext ctx = (GenMRProcContext) opProcCtx; ParseContext parseCtx = ctx.getParseCtx(); boolean chDir = false; Task<? extends Serializable> currTask = ctx.getCurrTask(); FileSinkOperator fsOp = (FileSinkOperator) nd; boolean isInsertTable = // is INSERT OVERWRITE TABLE fsOp.getConf().getTableInfo().getTableName() != null && parseCtx.getQB().getParseInfo().isInsertToTable(); HiveConf hconf = parseCtx.getConf(); // Mark this task as a final map reduce task (ignoring the optional merge task) ((MapredWork) currTask.getWork()).setFinalMapRed(true); // If this file sink desc has been processed due to a linked file sink desc, // use that task Map<FileSinkDesc, Task<? extends Serializable>> fileSinkDescs = ctx.getLinkedFileDescTasks(); if (fileSinkDescs != null) { Task<? extends Serializable> childTask = fileSinkDescs.get(fsOp.getConf()); processLinkedFileDesc(ctx, childTask); return null; } // Has the user enabled merging of files for map-only jobs or for all jobs if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) { List<Task<MoveWork>> mvTasks = ctx.getMvTask(); // In case of unions or map-joins, it is possible that the file has // already been seen. // So, no need to attempt to merge the files again. if ((ctx.getSeenFileSinkOps() == null) || (!ctx.getSeenFileSinkOps().contains(nd))) { // no need of merging if the move is to a local file system MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp); if (isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)) { addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf()); } if ((mvTask != null) && !mvTask.isLocal() && fsOp.getConf().canBeMerged()) { if (fsOp.getConf().isLinkedFileSink()) { // If the user has HIVEMERGEMAPREDFILES set to false, the idea was the // number of reducers are few, so the number of files anyway are small. // However, with this optimization, we are increasing the number of files // possibly by a big margin. So, merge aggresively. if (hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) || hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES)) { chDir = true; } } else { // There are separate configuration parameters to control whether to // merge for a map-only job // or for a map-reduce job MapredWork currWork = (MapredWork) currTask.getWork(); boolean mergeMapOnly = hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && currWork.getReducer() == null; boolean mergeMapRed = hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && currWork.getReducer() != null; if (mergeMapOnly || mergeMapRed) { chDir = true; } } } } } String finalName = processFS(fsOp, stack, opProcCtx, chDir); if (chDir) { // Merge the files in the destination table/partitions by creating Map-only merge job // If underlying data is RCFile or OrcFile a BlockMerge task would be created. LOG.info("using CombineHiveInputformat for the merge job"); createMRWorkForMergingFiles(fsOp, ctx, finalName); } FileSinkDesc fileSinkDesc = fsOp.getConf(); if (fileSinkDesc.isLinkedFileSink()) { Map<FileSinkDesc, Task<? extends Serializable>> linkedFileDescTasks = ctx.getLinkedFileDescTasks(); if (linkedFileDescTasks == null) { linkedFileDescTasks = new HashMap<FileSinkDesc, Task<? extends Serializable>>(); ctx.setLinkedFileDescTasks(linkedFileDescTasks); } // The child tasks may be null in case of a select if ((currTask.getChildTasks() != null) && (currTask.getChildTasks().size() == 1)) { for (FileSinkDesc fileDesc : fileSinkDesc.getLinkedFileSinkDesc()) { linkedFileDescTasks.put(fileDesc, currTask.getChildTasks().get(0)); } } } return null; }
/* * Helper function to create Vertex from MapWork. */ private Vertex createVertex( JobConf conf, MapWork mapWork, LocalResource appJarLr, List<LocalResource> additionalLr, FileSystem fs, Path mrScratchDir, Context ctx, TezWork tezWork) throws Exception { Path tezDir = getTezDir(mrScratchDir); // set up the operator plan Utilities.setMapWork(conf, mapWork, mrScratchDir, false); // create the directories FileSinkOperators need Utilities.createTmpDirs(conf, mapWork); // Tez ask us to call this even if there's no preceding vertex MultiStageMRConfToTezTranslator.translateVertexConfToTez(conf, null); // finally create the vertex Vertex map = null; // use tez to combine splits boolean useTezGroupedSplits = false; int numTasks = -1; Class amSplitGeneratorClass = null; InputSplitInfo inputSplitInfo = null; Class inputFormatClass = conf.getClass("mapred.input.format.class", InputFormat.class); boolean vertexHasCustomInput = false; if (tezWork != null) { for (BaseWork baseWork : tezWork.getParents(mapWork)) { if (tezWork.getEdgeType(baseWork, mapWork) == EdgeType.CUSTOM_EDGE) { vertexHasCustomInput = true; } } } if (vertexHasCustomInput) { useTezGroupedSplits = false; // grouping happens in execution phase. Setting the class to TezGroupedSplitsInputFormat // here would cause pre-mature grouping which would be incorrect. inputFormatClass = HiveInputFormat.class; conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class); // mapreduce.tez.input.initializer.serialize.event.payload should be set to false when using // this plug-in to avoid getting a serialized event at run-time. conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false); } else { // we'll set up tez to combine spits for us iff the input format // is HiveInputFormat if (inputFormatClass == HiveInputFormat.class) { useTezGroupedSplits = true; conf.setClass( "mapred.input.format.class", TezGroupedSplitsInputFormat.class, InputFormat.class); } } if (HiveConf.getBoolVar(conf, ConfVars.HIVE_AM_SPLIT_GENERATION)) { // if we're generating the splits in the AM, we just need to set // the correct plugin. amSplitGeneratorClass = MRInputAMSplitGenerator.class; } else { // client side split generation means we have to compute them now inputSplitInfo = MRHelpers.generateInputSplits( conf, new Path(tezDir, "split_" + mapWork.getName().replaceAll(" ", "_"))); numTasks = inputSplitInfo.getNumTasks(); } byte[] serializedConf = MRHelpers.createUserPayloadFromConf(conf); map = new Vertex( mapWork.getName(), new ProcessorDescriptor(MapTezProcessor.class.getName()).setUserPayload(serializedConf), numTasks, getContainerResource(conf)); Map<String, String> environment = new HashMap<String, String>(); MRHelpers.updateEnvironmentForMRTasks(conf, environment, true); map.setTaskEnvironment(environment); map.setJavaOpts(getContainerJavaOpts(conf)); assert mapWork.getAliasToWork().keySet().size() == 1; String alias = mapWork.getAliasToWork().keySet().iterator().next(); byte[] mrInput = null; if (useTezGroupedSplits) { mrInput = MRHelpers.createMRInputPayloadWithGrouping( serializedConf, HiveInputFormat.class.getName()); } else { mrInput = MRHelpers.createMRInputPayload(serializedConf, null); } map.addInput( alias, new InputDescriptor(MRInputLegacy.class.getName()).setUserPayload(mrInput), amSplitGeneratorClass); Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); localResources.put(getBaseName(appJarLr), appJarLr); for (LocalResource lr : additionalLr) { localResources.put(getBaseName(lr), lr); } if (inputSplitInfo != null) { // only relevant for client-side split generation map.setTaskLocationsHint(inputSplitInfo.getTaskLocationHints()); MRHelpers.updateLocalResourcesForInputSplits( FileSystem.get(conf), inputSplitInfo, localResources); } map.setTaskLocalResources(localResources); return map; }
/** * @param fsInput The FileSink operator. * @param ctx The MR processing context. * @param finalName the final destination path the merge job should output. * @throws SemanticException * <p>create a Map-only merge job using CombineHiveInputFormat for all partitions with * following operators: MR job J0: ... | v FileSinkOperator_1 (fsInput) | v Merge job J1: | v * TableScan (using CombineHiveInputFormat) (tsMerge) | v FileSinkOperator (fsMerge) * <p>Here the pathToPartitionInfo & pathToAlias will remain the same, which means the paths * do not contain the dynamic partitions (their parent). So after the dynamic partitions are * created (after the first job finished before the moveTask or ConditionalTask start), we * need to change the pathToPartitionInfo & pathToAlias to include the dynamic partition * directories. */ private void createMRWorkForMergingFiles( FileSinkOperator fsInput, GenMRProcContext ctx, String finalName) throws SemanticException { // // 1. create the operator tree // HiveConf conf = ctx.getParseCtx().getConf(); FileSinkDesc fsInputDesc = fsInput.getConf(); // Create a TableScan operator RowSchema inputRS = fsInput.getSchema(); Operator<? extends OperatorDesc> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS); // Create a FileSink operator TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone(); FileSinkDesc fsOutputDesc = new FileSinkDesc(finalName, ts, conf.getBoolVar(ConfVars.COMPRESSRESULT)); FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(fsOutputDesc, inputRS, tsMerge); // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema // needs to include the partition column, and the fsOutput should have // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned. DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx(); if (dpCtx != null && dpCtx.getNumDPCols() > 0) { // adding DP ColumnInfo to the RowSchema signature ArrayList<ColumnInfo> signature = inputRS.getSignature(); String tblAlias = fsInputDesc.getTableInfo().getTableName(); LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>(); StringBuilder partCols = new StringBuilder(); for (String dpCol : dpCtx.getDPColNames()) { ColumnInfo colInfo = new ColumnInfo( dpCol, TypeInfoFactory.stringTypeInfo, // all partition column type should be string tblAlias, true); // partition column is virtual column signature.add(colInfo); colMap.put(dpCol, dpCol); // input and output have the same column name partCols.append(dpCol).append('/'); } partCols.setLength(partCols.length() - 1); // remove the last '/' inputRS.setSignature(signature); // create another DynamicPartitionCtx, which has a different input-to-DP column mapping DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx); dpCtx2.setInputToDPCols(colMap); fsOutputDesc.setDynPartCtx(dpCtx2); // update the FileSinkOperator to include partition columns fsInputDesc .getTableInfo() .getProperties() .setProperty( org.apache .hadoop .hive .metastore .api .hive_metastoreConstants .META_TABLE_PARTITION_COLUMNS, partCols.toString()); // list of dynamic partition column names } else { // non-partitioned table fsInputDesc .getTableInfo() .getProperties() .remove( org.apache .hadoop .hive .metastore .api .hive_metastoreConstants .META_TABLE_PARTITION_COLUMNS); } // // 2. Constructing a conditional task consisting of a move task and a map reduce task // MoveWork dummyMv = new MoveWork( null, null, null, new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null), false); MapredWork cplan; if (conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) { // Check if InputFormatClass is valid String inputFormatClass = conf.getVar(ConfVars.HIVEMERGERCFILEINPUTFORMATBLOCKLEVEL); try { Class c = (Class<? extends InputFormat>) Class.forName(inputFormatClass); LOG.info("RCFile format- Using block level merge"); cplan = createBlockMergeTask( fsInputDesc, finalName, dpCtx != null && dpCtx.getNumDPCols() > 0, RCFileMergeMapper.class, RCFileInputFormat.class, RCFileBlockMergeInputFormat.class); } catch (ClassNotFoundException e) { String msg = "Illegal input format class: " + inputFormatClass; throw new SemanticException(msg); } } else if (conf.getBoolVar(ConfVars.HIVEMERGEORCBLOCKLEVEL) && fsInputDesc.getTableInfo().getInputFileFormatClass().equals(OrcInputFormat.class)) { // Check if InputFormatClass is valid String inputFormatClass = conf.getVar(ConfVars.HIVEMERGEORCINPUTFORMATBLOCKLEVEL); try { Class c = (Class<? extends InputFormat>) Class.forName(inputFormatClass); LOG.info("ORCFile format- Using block level merge"); cplan = createBlockMergeTask( fsInputDesc, finalName, dpCtx != null && dpCtx.getNumDPCols() > 0, OrcMergeMapper.class, OrcInputFormat.class, OrcBlockMergeInputFormat.class); } catch (ClassNotFoundException e) { String msg = "Illegal input format class: " + inputFormatClass; throw new SemanticException(msg); } } else { cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc); // use CombineHiveInputFormat for map-only merging } cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"); // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't // know if merge MR2 will be triggered at execution time ConditionalTask cndTsk = createCondTask(conf, ctx.getCurrTask(), dummyMv, cplan, fsInputDesc.getFinalDirName()); // keep the dynamic partition context in conditional task resolver context ConditionalResolverMergeFilesCtx mrCtx = (ConditionalResolverMergeFilesCtx) cndTsk.getResolverCtx(); mrCtx.setDPCtx(fsInputDesc.getDynPartCtx()); mrCtx.setLbCtx(fsInputDesc.getLbCtx()); // // 3. add the moveTask as the children of the conditional task // linkMoveTask(ctx, fsOutput, cndTsk); }
private String showJobFailDebugInfo() throws IOException { console.printError("Error during job, obtaining debugging information..."); if (!conf.get("mapred.job.tracker", "local").equals("local")) { // Show Tracking URL for remotely running jobs. console.printError("Job Tracking URL: " + rj.getTrackingURL()); } // Loop to get all task completion events because getTaskCompletionEvents // only returns a subset per call TaskInfoGrabber tlg = new TaskInfoGrabber(); Thread t = new Thread(tlg); try { t.start(); t.join(HiveConf.getIntVar(conf, HiveConf.ConfVars.TASKLOG_DEBUG_TIMEOUT)); } catch (InterruptedException e) { console.printError( "Timed out trying to finish grabbing task log URLs, " + "some task info may be missing"); } // Remove failures for tasks that succeeded for (String task : successes) { failures.remove(task); } if (failures.keySet().size() == 0) { return null; } // Find the highest failure count computeMaxFailures(); // Display Error Message for tasks with the highest failure count String jtUrl = null; try { jtUrl = JobTrackerURLResolver.getURL(conf); } catch (Exception e) { console.printError("Unable to retrieve URL for Hadoop Task logs. " + e.getMessage()); } String msg = null; for (String task : failures.keySet()) { if (failures.get(task).intValue() == maxFailures) { TaskInfo ti = taskIdToInfo.get(task); String jobId = ti.getJobId(); String taskUrl = (jtUrl == null) ? null : jtUrl + "/taskdetails.jsp?jobid=" + jobId + "&tipid=" + task.toString(); TaskLogProcessor tlp = new TaskLogProcessor(conf); for (String logUrl : ti.getLogUrls()) { tlp.addTaskAttemptLogUrl(logUrl); } if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.JOB_DEBUG_CAPTURE_STACKTRACES) && stackTraces != null) { if (!stackTraces.containsKey(jobId)) { stackTraces.put(jobId, new ArrayList<List<String>>()); } stackTraces.get(jobId).addAll(tlp.getStackTraces()); } if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.SHOW_JOB_FAIL_DEBUG_INFO)) { List<ErrorAndSolution> errors = tlp.getErrors(); StringBuilder sb = new StringBuilder(); // We use a StringBuilder and then call printError only once as // printError will write to both stderr and the error log file. In // situations where both the stderr and the log file output is // simultaneously output to a single stream, this will look cleaner. sb.append("\n"); sb.append("Task with the most failures(" + maxFailures + "): \n"); sb.append("-----\n"); sb.append("Task ID:\n " + task + "\n\n"); if (taskUrl != null) { sb.append("URL:\n " + taskUrl + "\n"); } for (ErrorAndSolution e : errors) { sb.append("\n"); sb.append("Possible error:\n " + e.getError() + "\n\n"); sb.append("Solution:\n " + e.getSolution() + "\n"); } sb.append("-----\n"); sb.append("Diagnostic Messages for this Task:\n"); String[] diagMesgs = ti.getDiagnosticMesgs(); for (String mesg : diagMesgs) { sb.append(mesg + "\n"); } msg = sb.toString(); console.printError(msg); } // Only print out one task because that's good enough for debugging. break; } } return msg; }
public Result invokeInternal(final Object proxy, final Method method, final Object[] args) throws Throwable { boolean gotNewConnectUrl = false; boolean reloadConf = HiveConf.getBoolVar(origConf, HiveConf.ConfVars.HMSHANDLERFORCERELOADCONF); long retryInterval = HiveConf.getTimeVar(origConf, HiveConf.ConfVars.HMSHANDLERINTERVAL, TimeUnit.MILLISECONDS); int retryLimit = HiveConf.getIntVar(origConf, HiveConf.ConfVars.HMSHANDLERATTEMPTS); long timeout = HiveConf.getTimeVar( origConf, HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, TimeUnit.MILLISECONDS); Deadline.registerIfNot(timeout); if (reloadConf) { MetaStoreInit.updateConnectionURL(origConf, getActiveConf(), null, metaStoreInitData); } int retryCount = 0; Throwable caughtException = null; while (true) { try { if (reloadConf || gotNewConnectUrl) { baseHandler.setConf(getActiveConf()); } Object object = null; boolean isStarted = Deadline.startTimer(method.getName()); try { object = method.invoke(baseHandler, args); } finally { if (isStarted) { Deadline.stopTimer(); } } return new Result(object, retryCount); } catch (javax.jdo.JDOException e) { caughtException = e; } catch (UndeclaredThrowableException e) { if (e.getCause() != null) { if (e.getCause() instanceof javax.jdo.JDOException) { // Due to reflection, the jdo exception is wrapped in // invocationTargetException caughtException = e.getCause(); } else if (e.getCause() instanceof MetaException && e.getCause().getCause() != null && e.getCause().getCause() instanceof javax.jdo.JDOException) { // The JDOException may be wrapped further in a MetaException caughtException = e.getCause().getCause(); } else { LOG.error(ExceptionUtils.getStackTrace(e.getCause())); throw e.getCause(); } } else { LOG.error(ExceptionUtils.getStackTrace(e)); throw e; } } catch (InvocationTargetException e) { if (e.getCause() instanceof javax.jdo.JDOException) { // Due to reflection, the jdo exception is wrapped in // invocationTargetException caughtException = e.getCause(); } else if (e.getCause() instanceof NoSuchObjectException || e.getTargetException().getCause() instanceof NoSuchObjectException) { String methodName = method.getName(); if (!methodName.startsWith("get_database") && !methodName.startsWith("get_table") && !methodName.startsWith("get_partition") && !methodName.startsWith("get_function")) { LOG.error(ExceptionUtils.getStackTrace(e.getCause())); } throw e.getCause(); } else if (e.getCause() instanceof MetaException && e.getCause().getCause() != null) { if (e.getCause().getCause() instanceof javax.jdo.JDOException || e.getCause().getCause() instanceof NucleusException) { // The JDOException or the Nucleus Exception may be wrapped further in a MetaException caughtException = e.getCause().getCause(); } else if (e.getCause().getCause() instanceof DeadlineException) { // The Deadline Exception needs no retry and be thrown immediately. Deadline.clear(); LOG.error( "Error happens in method " + method.getName() + ": " + ExceptionUtils.getStackTrace(e.getCause())); throw e.getCause(); } else { LOG.error(ExceptionUtils.getStackTrace(e.getCause())); throw e.getCause(); } } else { LOG.error(ExceptionUtils.getStackTrace(e.getCause())); throw e.getCause(); } } if (retryCount >= retryLimit) { LOG.error("HMSHandler Fatal error: " + ExceptionUtils.getStackTrace(caughtException)); // Since returning exceptions with a nested "cause" can be a problem in // Thrift, we are stuffing the stack trace into the message itself. throw new MetaException(ExceptionUtils.getStackTrace(caughtException)); } assert (retryInterval >= 0); retryCount++; LOG.error( String.format( "Retrying HMSHandler after %d ms (attempt %d of %d)", retryInterval, retryCount, retryLimit) + " with error: " + ExceptionUtils.getStackTrace(caughtException)); Thread.sleep(retryInterval); // If we have a connection error, the JDO connection URL hook might // provide us with a new URL to access the datastore. String lastUrl = MetaStoreInit.getConnectionURL(getActiveConf()); gotNewConnectUrl = MetaStoreInit.updateConnectionURL(origConf, getActiveConf(), lastUrl, metaStoreInitData); } }
@SuppressWarnings("unchecked") public static void main(String[] args) throws IOException, HiveException { String planFileName = null; String jobConfFileName = null; boolean noLog = false; String files = null; boolean localtask = false; try { for (int i = 0; i < args.length; i++) { if (args[i].equals("-plan")) { planFileName = args[++i]; } else if (args[i].equals("-jobconffile")) { jobConfFileName = args[++i]; } else if (args[i].equals("-nolog")) { noLog = true; } else if (args[i].equals("-files")) { files = args[++i]; } else if (args[i].equals("-localtask")) { localtask = true; } } } catch (IndexOutOfBoundsException e) { System.err.println("Missing argument to option"); printUsage(); } JobConf conf; if (localtask) { conf = new JobConf(MapredLocalTask.class); } else { conf = new JobConf(ExecDriver.class); } if (jobConfFileName != null) { conf.addResource(new Path(jobConfFileName)); } if (files != null) { conf.set("tmpfiles", files); } if (UserGroupInformation.isSecurityEnabled()) { String hadoopAuthToken = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION); if (hadoopAuthToken != null) { conf.set("mapreduce.job.credentials.binary", hadoopAuthToken); } } boolean isSilent = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESESSIONSILENT); String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID, "").trim(); if (queryId.isEmpty()) { queryId = "unknown-" + System.currentTimeMillis(); } System.setProperty(HiveConf.ConfVars.HIVEQUERYID.toString(), queryId); if (noLog) { // If started from main(), and noLog is on, we should not output // any logs. To turn the log on, please set -Dtest.silent=false org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getRootLogger(); NullAppender appender = NullAppender.createNullAppender(); appender.addToLogger(logger.getName(), Level.ERROR); appender.start(); } else { setupChildLog4j(conf); } Logger LOG = LoggerFactory.getLogger(ExecDriver.class.getName()); LogHelper console = new LogHelper(LOG, isSilent); if (planFileName == null) { console.printError("Must specify Plan File Name"); printUsage(); } // print out the location of the log file for the user so // that it's easy to find reason for local mode execution failures for (Appender appender : ((org.apache.logging.log4j.core.Logger) LogManager.getRootLogger()) .getAppenders() .values()) { if (appender instanceof FileAppender) { console.printInfo("Execution log at: " + ((FileAppender) appender).getFileName()); } else if (appender instanceof RollingFileAppender) { console.printInfo("Execution log at: " + ((RollingFileAppender) appender).getFileName()); } } // the plan file should always be in local directory Path p = new Path(planFileName); FileSystem fs = FileSystem.getLocal(conf); InputStream pathData = fs.open(p); // this is workaround for hadoop-17 - libjars are not added to classpath of the // child process. so we add it here explicitly String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS); String addedJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEADDEDJARS); try { // see also - code in CliDriver.java ClassLoader loader = conf.getClassLoader(); if (StringUtils.isNotBlank(auxJars)) { loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ",")); } if (StringUtils.isNotBlank(addedJars)) { loader = Utilities.addToClassPath(loader, StringUtils.split(addedJars, ",")); } conf.setClassLoader(loader); // Also set this to the Thread ContextClassLoader, so new threads will // inherit // this class loader, and propagate into newly created Configurations by // those // new threads. Thread.currentThread().setContextClassLoader(loader); } catch (Exception e) { throw new HiveException(e.getMessage(), e); } int ret; if (localtask) { memoryMXBean = ManagementFactory.getMemoryMXBean(); MapredLocalWork plan = Utilities.deserializePlan(pathData, MapredLocalWork.class, conf); MapredLocalTask ed = new MapredLocalTask(plan, conf, isSilent); ret = ed.executeInProcess(new DriverContext()); } else { MapredWork plan = Utilities.deserializePlan(pathData, MapredWork.class, conf); ExecDriver ed = new ExecDriver(plan, conf, isSilent); ret = ed.execute(new DriverContext()); } if (ret != 0) { System.exit(ret); } }
/** Execute a query plan using Hadoop. */ @SuppressWarnings({"deprecation", "unchecked"}) @Override public int execute(DriverContext driverContext) { IOPrepareCache ioPrepareCache = IOPrepareCache.get(); ioPrepareCache.clear(); boolean success = true; Context ctx = driverContext.getCtx(); boolean ctxCreated = false; Path emptyScratchDir; MapWork mWork = work.getMapWork(); ReduceWork rWork = work.getReduceWork(); try { if (ctx == null) { ctx = new Context(job); ctxCreated = true; } emptyScratchDir = ctx.getMRTmpPath(); FileSystem fs = emptyScratchDir.getFileSystem(job); fs.mkdirs(emptyScratchDir); } catch (IOException e) { e.printStackTrace(); console.printError( "Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return 5; } HiveFileFormatUtils.prepareJobOutput(job); // See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput() job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(ExecMapper.class); job.setMapOutputKeyClass(HiveKey.class); job.setMapOutputValueClass(BytesWritable.class); try { String partitioner = HiveConf.getVar(job, ConfVars.HIVEPARTITIONER); job.setPartitionerClass(JavaUtils.loadClass(partitioner)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e); } if (mWork.getNumMapTasks() != null) { job.setNumMapTasks(mWork.getNumMapTasks().intValue()); } if (mWork.getMaxSplitSize() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mWork.getMaxSplitSize().longValue()); } if (mWork.getMinSplitSize() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mWork.getMinSplitSize().longValue()); } if (mWork.getMinSplitSizePerNode() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, mWork.getMinSplitSizePerNode().longValue()); } if (mWork.getMinSplitSizePerRack() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, mWork.getMinSplitSizePerRack().longValue()); } job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0); job.setReducerClass(ExecReducer.class); // set input format information if necessary setInputAttributes(job); // Turn on speculative execution for reducers boolean useSpeculativeExecReducers = HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS); HiveConf.setBoolVar( job, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, useSpeculativeExecReducers); String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); if (mWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); } LOG.info("Using " + inpFormat); try { job.setInputFormat(JavaUtils.loadClass(inpFormat)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e); } // No-Op - we don't really write anything here .. job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Transfer HIVEAUXJARS and HIVEADDEDJARS to "tmpjars" so hadoop understands // it String auxJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEAUXJARS); String addedJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDJARS); if (StringUtils.isNotBlank(auxJars) || StringUtils.isNotBlank(addedJars)) { String allJars = StringUtils.isNotBlank(auxJars) ? (StringUtils.isNotBlank(addedJars) ? addedJars + "," + auxJars : auxJars) : addedJars; LOG.info("adding libjars: " + allJars); initializeFiles("tmpjars", allJars); } // Transfer HIVEADDEDFILES to "tmpfiles" so hadoop understands it String addedFiles = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDFILES); if (StringUtils.isNotBlank(addedFiles)) { initializeFiles("tmpfiles", addedFiles); } int returnVal = 0; boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJOBNAME)); if (noName) { // This is for a special case to ensure unit tests pass HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, "JOB" + Utilities.randGen.nextInt()); } String addedArchives = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDARCHIVES); // Transfer HIVEADDEDARCHIVES to "tmparchives" so hadoop understands it if (StringUtils.isNotBlank(addedArchives)) { initializeFiles("tmparchives", addedArchives); } try { MapredLocalWork localwork = mWork.getMapRedLocalWork(); if (localwork != null && localwork.hasStagedAlias()) { if (!ShimLoader.getHadoopShims().isLocalMode(job)) { Path localPath = localwork.getTmpPath(); Path hdfsPath = mWork.getTmpHDFSPath(); FileSystem hdfs = hdfsPath.getFileSystem(job); FileSystem localFS = localPath.getFileSystem(job); FileStatus[] hashtableFiles = localFS.listStatus(localPath); int fileNumber = hashtableFiles.length; String[] fileNames = new String[fileNumber]; for (int i = 0; i < fileNumber; i++) { fileNames[i] = hashtableFiles[i].getPath().getName(); } // package and compress all the hashtable files to an archive file String stageId = this.getId(); String archiveFileName = Utilities.generateTarFileName(stageId); localwork.setStageID(stageId); CompressionUtils.tar(localPath.toUri().getPath(), fileNames, archiveFileName); Path archivePath = Utilities.generateTarPath(localPath, stageId); LOG.info("Archive " + hashtableFiles.length + " hash table files to " + archivePath); // upload archive file to hdfs Path hdfsFilePath = Utilities.generateTarPath(hdfsPath, stageId); short replication = (short) job.getInt("mapred.submit.replication", 10); hdfs.copyFromLocalFile(archivePath, hdfsFilePath); hdfs.setReplication(hdfsFilePath, replication); LOG.info("Upload 1 archive file from" + archivePath + " to: " + hdfsFilePath); // add the archive file to distributed cache DistributedCache.createSymlink(job); DistributedCache.addCacheArchive(hdfsFilePath.toUri(), job); LOG.info( "Add 1 archive file to distributed cache. Archive file: " + hdfsFilePath.toUri()); } } work.configureJobConf(job); List<Path> inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false); Utilities.setInputPaths(job, inputPaths); Utilities.setMapRedWork(job, work, ctx.getMRTmpPath()); if (mWork.getSamplingType() > 0 && rWork != null && job.getNumReduceTasks() > 1) { try { handleSampling(ctx, mWork, job); job.setPartitionerClass(HiveTotalOrderPartitioner.class); } catch (IllegalStateException e) { console.printInfo("Not enough sampling data.. Rolling back to single reducer task"); rWork.setNumReduceTasks(1); job.setNumReduceTasks(1); } catch (Exception e) { LOG.error("Sampling error", e); console.printError( e.toString(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); rWork.setNumReduceTasks(1); job.setNumReduceTasks(1); } } // remove the pwd from conf file so that job tracker doesn't show this // logs String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD); if (pwd != null) { HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE"); } JobClient jc = new JobClient(job); // make this client wait if job tracker is not behaving well. Throttle.checkJobTracker(job, LOG); if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) { // initialize stats publishing table StatsPublisher statsPublisher; StatsFactory factory = StatsFactory.newFactory(job); if (factory != null) { statsPublisher = factory.getStatsPublisher(); List<String> statsTmpDir = Utilities.getStatsTmpDirs(mWork, job); if (rWork != null) { statsTmpDir.addAll(Utilities.getStatsTmpDirs(rWork, job)); } StatsCollectionContext sc = new StatsCollectionContext(job); sc.setStatsTmpDirs(statsTmpDir); if (!statsPublisher.init(sc)) { // creating stats table if not exists if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) { throw new HiveException( ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg()); } } } } Utilities.createTmpDirs(job, mWork); Utilities.createTmpDirs(job, rWork); SessionState ss = SessionState.get(); if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") && ss != null) { TezSessionState session = ss.getTezSession(); TezSessionPoolManager.getInstance().close(session, true); } // Finally SUBMIT the JOB! rj = jc.submitJob(job); // replace it back if (pwd != null) { HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, pwd); } returnVal = jobExecHelper.progress(rj, jc, ctx.getHiveTxnManager()); success = (returnVal == 0); } catch (Exception e) { e.printStackTrace(); String mesg = " with exception '" + Utilities.getNameMessage(e) + "'"; if (rj != null) { mesg = "Ended Job = " + rj.getJobID() + mesg; } else { mesg = "Job Submission failed" + mesg; } // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); success = false; returnVal = 1; } finally { Utilities.clearWork(job); try { if (ctxCreated) { ctx.clear(); } if (rj != null) { if (returnVal != 0) { rj.killJob(); } jobID = rj.getID().toString(); } } catch (Exception e) { LOG.warn("Failed while cleaning up ", e); } finally { HadoopJobExecHelper.runningJobs.remove(rj); } } // get the list of Dynamic partition paths try { if (rj != null) { if (mWork.getAliasToWork() != null) { for (Operator<? extends OperatorDesc> op : mWork.getAliasToWork().values()) { op.jobClose(job, success); } } if (rWork != null) { rWork.getReducer().jobClose(job, success); } } } catch (Exception e) { // jobClose needs to execute successfully otherwise fail task if (success) { success = false; returnVal = 3; String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'"; console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); } } return (returnVal); }