public void generateTestData() throws Exception { // remove data from previous runs. cleanDir(DB_DIR); cleanDir(WH_DIR); HiveConf conf = new HiveConf(); conf.set( "javax.jdo.option.ConnectionURL", String.format("jdbc:derby:;databaseName=%s;create=true", DB_DIR)); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "file:///"); conf.set("hive.metastore.warehouse.dir", WH_DIR); SessionState ss = new SessionState(new HiveConf(SessionState.class)); SessionState.start(ss); hiveDriver = new Driver(conf); // generate (key, value) test data String testDataFile = generateTestDataFile(); createTableAndLoadData("default", "kv", testDataFile); executeQuery("CREATE DATABASE IF NOT EXISTS db1"); createTableAndLoadData("db1", "kv_db1", testDataFile); ss.close(); }
public void preTest(HiveConf conf) throws Exception { if (zooKeeperCluster == null) { // create temp dir String tmpBaseDir = System.getProperty("test.tmp.dir"); File tmpDir = Utilities.createTempDir(tmpBaseDir); zooKeeperCluster = new MiniZooKeeperCluster(); zkPort = zooKeeperCluster.startup(tmpDir); } if (zooKeeper != null) { zooKeeper.close(); } int sessionTimeout = (int) conf.getTimeVar( HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT, TimeUnit.MILLISECONDS); zooKeeper = new ZooKeeper( "localhost:" + zkPort, sessionTimeout, new Watcher() { @Override public void process(WatchedEvent arg0) {} }); String zkServer = "localhost"; conf.set("hive.zookeeper.quorum", zkServer); conf.set("hive.zookeeper.client.port", "" + zkPort); }
public static void main(String[] args) throws Exception { Thread.setDefaultUncaughtExceptionHandler(new LlapDaemonUncaughtExceptionHandler()); LlapDaemon llapDaemon = null; try { // Cache settings will need to be setup in llap-daemon-site.xml - since the daemons don't read // hive-site.xml // Ideally, these properties should be part of LlapDameonConf rather than HiveConf LlapConfiguration daemonConf = new LlapConfiguration(); int numExecutors = daemonConf.getInt( LlapConfiguration.LLAP_DAEMON_NUM_EXECUTORS, LlapConfiguration.LLAP_DAEMON_NUM_EXECUTORS_DEFAULT); String[] localDirs = daemonConf.getTrimmedStrings(LlapConfiguration.LLAP_DAEMON_WORK_DIRS); int rpcPort = daemonConf.getInt( LlapConfiguration.LLAP_DAEMON_RPC_PORT, LlapConfiguration.LLAP_DAEMON_RPC_PORT_DEFAULT); int shufflePort = daemonConf.getInt( ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, ShuffleHandler.DEFAULT_SHUFFLE_PORT); long executorMemoryBytes = daemonConf.getInt( LlapConfiguration.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB, LlapConfiguration.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB_DEFAULT) * 1024l * 1024l; long cacheMemoryBytes = HiveConf.getLongVar(daemonConf, HiveConf.ConfVars.LLAP_ORC_CACHE_MAX_SIZE); boolean isDirectCache = HiveConf.getBoolVar(daemonConf, HiveConf.ConfVars.LLAP_ORC_CACHE_ALLOCATE_DIRECT); boolean llapIoEnabled = HiveConf.getBoolVar(daemonConf, HiveConf.ConfVars.LLAP_IO_ENABLED); llapDaemon = new LlapDaemon( daemonConf, numExecutors, executorMemoryBytes, llapIoEnabled, isDirectCache, cacheMemoryBytes, localDirs, rpcPort, shufflePort); LOG.info("Adding shutdown hook for LlapDaemon"); ShutdownHookManager.addShutdownHook(new CompositeServiceShutdownHook(llapDaemon), 1); llapDaemon.init(daemonConf); llapDaemon.start(); LOG.info("Started LlapDaemon"); // Relying on the RPC threads to keep the service alive. } catch (Throwable t) { // TODO Replace this with a ExceptionHandler / ShutdownHook LOG.warn("Failed to start LLAP Daemon with exception", t); if (llapDaemon != null) { llapDaemon.shutdown(); } System.exit(-1); } }
private CliSessionState startSessionState() throws IOException { HiveConf.setVar( conf, HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER, "org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator"); String execEngine = conf.get("hive.execution.engine"); conf.set("hive.execution.engine", "mr"); CliSessionState ss = new CliSessionState(conf); assert ss != null; ss.in = System.in; ss.out = System.out; ss.err = System.out; SessionState oldSs = SessionState.get(); if (oldSs != null && clusterType == MiniClusterType.tez) { oldSs.close(); } if (oldSs != null && oldSs.out != null && oldSs.out != System.out) { oldSs.out.close(); } SessionState.start(ss); isSessionStateStarted = true; conf.set("hive.execution.engine", execEngine); return ss; }
/** * Method to fetch table data * * @param table table name * @param database database * @return list of columns in comma seperated way * @throws Exception if any error occurs */ private List<String> getTableData(String table, String database) throws Exception { HiveConf conf = new HiveConf(); conf.addResource("hive-site.xml"); ArrayList<String> results = new ArrayList<String>(); ArrayList<String> temp = new ArrayList<String>(); Hive hive = Hive.get(conf); org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table); FetchWork work; if (!tbl.getPartCols().isEmpty()) { List<Partition> partitions = hive.getPartitions(tbl); List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>(); List<String> partLocs = new ArrayList<String>(); for (Partition part : partitions) { partLocs.add(part.getLocation()); partDesc.add(Utilities.getPartitionDesc(part)); } work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl)); work.setLimit(100); } else { work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl)); } FetchTask task = new FetchTask(); task.setWork(work); task.initialize(conf, null, null); task.fetch(temp); for (String str : temp) { results.add(str.replace("\t", ",")); } return results; }
/** * When auto-shipping hive tar (for example when hive query or pig script is submitted via * webhcat), Hive client is launched on some remote node where Hive has not been installed. We * need pass some properties to that client to make sure it connects to the right Metastore, * configures Tez, etc. Here we look for such properties in hive config, and set a comma-separated * list of key values in {@link #HIVE_PROPS_NAME}. Note that the user may choose to set the same * keys in HIVE_PROPS_NAME directly, in which case those values should take precedence. */ private void handleHiveProperties() { HiveConf hiveConf = new HiveConf(); // load hive-site.xml from classpath List<String> interestingPropNames = Arrays.asList( "hive.metastore.uris", "hive.metastore.sasl.enabled", "hive.metastore.execute.setugi", "hive.execution.engine"); // each items is a "key=value" format List<String> webhcatHiveProps = new ArrayList<String>(hiveProps()); for (String interestingPropName : interestingPropNames) { String value = hiveConf.get(interestingPropName); if (value != null) { boolean found = false; for (String whProp : webhcatHiveProps) { if (whProp.startsWith(interestingPropName + "=")) { found = true; break; } } if (!found) { webhcatHiveProps.add(interestingPropName + "=" + value); } } } StringBuilder hiveProps = new StringBuilder(); for (String whProp : webhcatHiveProps) { // make sure to escape separator char in prop values hiveProps.append(hiveProps.length() > 0 ? "," : "").append(StringUtils.escapeString(whProp)); } set(HIVE_PROPS_NAME, hiveProps.toString()); }
/** * Add the StatsTask as a dependent task of the MoveTask because StatsTask will change the * Table/Partition metadata. For atomicity, we should not change it before the data is actually * there done by MoveTask. * * @param nd the FileSinkOperator whose results are taken care of by the MoveTask. * @param mvTask The MoveTask that moves the FileSinkOperator's results. * @param currTask The MapRedTask that the FileSinkOperator belongs to. * @param hconf HiveConf */ private void addStatsTask( FileSinkOperator nd, MoveTask mvTask, Task<? extends Serializable> currTask, HiveConf hconf) { MoveWork mvWork = ((MoveTask) mvTask).getWork(); StatsWork statsWork = null; if (mvWork.getLoadTableWork() != null) { statsWork = new StatsWork(mvWork.getLoadTableWork()); } else if (mvWork.getLoadFileWork() != null) { statsWork = new StatsWork(mvWork.getLoadFileWork()); } assert statsWork != null : "Error when genereting StatsTask"; statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); MapredWork mrWork = (MapredWork) currTask.getWork(); // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix // in FileSinkDesc is used for stats publishing. They should be consistent. statsWork.setAggKey(((FileSinkOperator) nd).getConf().getStatsAggPrefix()); Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf); // mark the MapredWork and FileSinkOperator for gathering stats nd.getConf().setGatherStats(true); mrWork.setGatheringStats(true); nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE)); nd.getConf() .setMaxStatsKeyPrefixLength(hconf.getIntVar(ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH)); // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName()); // subscribe feeds from the MoveTask so that MoveTask can forward the list // of dynamic partition list to the StatsTask mvTask.addDependentTask(statsTask); statsTask.subscribeFeed(mvTask); }
// Check if this write entity needs to skipped private boolean filterWriteEntity(WriteEntity writeEntity) throws AuthorizationException { // skip URI validation for session scratch file URIs if (writeEntity.isTempURI()) { return true; } try { if (writeEntity.getTyp().equals(Type.DFS_DIR) || writeEntity.getTyp().equals(Type.LOCAL_DIR)) { HiveConf conf = SessionState.get().getConf(); String warehouseDir = conf.getVar(ConfVars.METASTOREWAREHOUSE); URI scratchURI = new URI(PathUtils.parseDFSURI(warehouseDir, conf.getVar(HiveConf.ConfVars.SCRATCHDIR))); URI requestURI = new URI(PathUtils.parseDFSURI(warehouseDir, writeEntity.getLocation().getPath())); LOG.debug("scratchURI = " + scratchURI + ", requestURI = " + requestURI); if (PathUtils.impliesURI(scratchURI, requestURI)) { return true; } URI localScratchURI = new URI(PathUtils.parseLocalURI(conf.getVar(HiveConf.ConfVars.LOCALSCRATCHDIR))); URI localRequestURI = new URI(PathUtils.parseLocalURI(writeEntity.getLocation().getPath())); LOG.debug( "localScratchURI = " + localScratchURI + ", localRequestURI = " + localRequestURI); if (PathUtils.impliesURI(localScratchURI, localRequestURI)) { return true; } } } catch (Exception e) { throw new AuthorizationException("Failed to extract uri details", e); } return false; }
public HiveAuthFactory(HiveConf conf) throws TTransportException { this.conf = conf; saslMessageLimit = conf.getIntVar(ConfVars.HIVE_THRIFT_SASL_MESSAGE_LIMIT); String transTypeStr = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_TRANSPORT_MODE); String authTypeStr = conf.getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION); transportType = TransTypes.valueOf(transTypeStr.toUpperCase()); authType = authTypeStr == null ? transportType.getDefaultAuthType() : AuthTypes.valueOf(authTypeStr.toUpperCase()); if (transportType == TransTypes.BINARY && authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.name()) && ShimLoader.getHadoopShims().isSecureShimImpl()) { saslServer = ShimLoader.getHadoopThriftAuthBridge() .createServer( conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB), conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL)); // start delegation token manager try { saslServer.startDelegationTokenSecretManager(conf, null, ServerMode.HIVESERVER2); } catch (Exception e) { throw new TTransportException("Failed to start token manager", e); } } else { saslServer = null; } }
private HCatClient getHCatClient(URI uri, Configuration conf, String user) throws HCatAccessorException { final HiveConf hiveConf = new HiveConf(conf, this.getClass()); String serverURI = getMetastoreConnectURI(uri); if (!serverURI.equals("")) { hiveConf.set("hive.metastore.local", "false"); } hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, serverURI); try { XLog.getLog(HCatURIHandler.class) .info( "Creating HCatClient for user [{0}] login_user [{1}] and server [{2}] ", user, UserGroupInformation.getLoginUser(), serverURI); // HiveMetastoreClient (hive 0.9) currently does not work if UGI has doAs // We are good to connect as the oozie user since listPartitions does not require // authorization /* UserGroupInformation ugi = ugiService.getProxyUser(user); return ugi.doAs(new PrivilegedExceptionAction<HCatClient>() { public HCatClient run() throws Exception { return HCatClient.create(hiveConf); } }); */ return HCatClient.create(hiveConf); } catch (HCatException e) { throw new HCatAccessorException(ErrorCode.E1501, e); } catch (IOException e) { throw new HCatAccessorException(ErrorCode.E1501, e); } }
/** * Localizes files, archives and jars the user has instructed us to provide on the cluster as * resources for execution. * * @param conf * @return List<LocalResource> local resources to add to execution * @throws IOException when hdfs operation fails * @throws LoginException when getDefaultDestDir fails with the same exception */ public List<LocalResource> localizeTempFilesFromConf(String hdfsDirPathStr, Configuration conf) throws IOException, LoginException { List<LocalResource> tmpResources = new ArrayList<LocalResource>(); String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE); if (StringUtils.isNotBlank(addedFiles)) { HiveConf.setVar(conf, ConfVars.HIVEADDEDFILES, addedFiles); } String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR); if (StringUtils.isNotBlank(addedJars)) { HiveConf.setVar(conf, ConfVars.HIVEADDEDJARS, addedJars); } String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE); if (StringUtils.isNotBlank(addedArchives)) { HiveConf.setVar(conf, ConfVars.HIVEADDEDARCHIVES, addedArchives); } String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS); // need to localize the additional jars and files // we need the directory on hdfs to which we shall put all these files String allFiles = auxJars + "," + addedJars + "," + addedFiles + "," + addedArchives; addTempFiles(conf, tmpResources, hdfsDirPathStr, allFiles.split(",")); return tmpResources; }
private void createBackgroundOperationPool() { int poolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS); LOG.info("HiveServer2: Background operation thread pool size: " + poolSize); int poolQueueSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_WAIT_QUEUE_SIZE); LOG.info("HiveServer2: Background operation thread wait queue size: " + poolQueueSize); long keepAliveTime = HiveConf.getTimeVar( hiveConf, ConfVars.HIVE_SERVER2_ASYNC_EXEC_KEEPALIVE_TIME, TimeUnit.SECONDS); LOG.info( "HiveServer2: Background operation thread keepalive time: " + keepAliveTime + " seconds"); // Create a thread pool with #poolSize threads // Threads terminate when they are idle for more than the keepAliveTime // A bounded blocking queue is used to queue incoming operations, if #operations > poolSize String threadPoolName = "HiveServer2-Background-Pool"; backgroundOperationPool = new ThreadPoolExecutor( poolSize, poolSize, keepAliveTime, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(poolQueueSize), new ThreadFactoryWithGarbageCleanup(threadPoolName)); backgroundOperationPool.allowCoreThreadTimeOut(true); checkInterval = HiveConf.getTimeVar( hiveConf, ConfVars.HIVE_SERVER2_SESSION_CHECK_INTERVAL, TimeUnit.MILLISECONDS); sessionTimeout = HiveConf.getTimeVar( hiveConf, ConfVars.HIVE_SERVER2_IDLE_SESSION_TIMEOUT, TimeUnit.MILLISECONDS); }
@VisibleForTesting public static String initHiveLog4jCommon(HiveConf conf, ConfVars confVarName) throws LogInitializationException { if (HiveConf.getVar(conf, confVarName).equals("")) { // if log4j configuration file not set, or could not found, use default setting return initHiveLog4jDefault(conf, "", confVarName); } else { // if log4j configuration file found successfully, use HiveConf property value String log4jFileName = HiveConf.getVar(conf, confVarName); File log4jConfigFile = new File(log4jFileName); boolean fileExists = log4jConfigFile.exists(); if (!fileExists) { // if property specified file not found in local file system // use default setting return initHiveLog4jDefault( conf, "Not able to find conf file: " + log4jConfigFile, confVarName); } else { // property speficied file found in local file system // use the specified file if (confVarName == HiveConf.ConfVars.HIVE_EXEC_LOG4J_FILE) { String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID); if (queryId == null || (queryId = queryId.trim()).isEmpty()) { queryId = "unknown-" + System.currentTimeMillis(); } System.setProperty(HiveConf.ConfVars.HIVEQUERYID.toString(), queryId); } final boolean async = checkAndSetAsyncLogging(conf); Configurator.initialize(null, log4jFileName); logConfigLocation(conf); return "Logging initialized using configuration in " + log4jConfigFile + " Async: " + async; } } }
/** Initialization when invoked from QL. */ @Override public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) { super.initialize(conf, queryPlan, driverContext); job = new JobConf(conf, ExecDriver.class); // NOTE: initialize is only called if it is in non-local mode. // In case it's in non-local mode, we need to move the SessionState files // and jars to jobConf. // In case it's in local mode, MapRedTask will set the jobConf. // // "tmpfiles" and "tmpjars" are set by the method ExecDriver.execute(), // which will be called by both local and NON-local mode. String addedFiles = Utilities.getResourceFiles(job, SessionState.ResourceType.FILE); if (StringUtils.isNotBlank(addedFiles)) { HiveConf.setVar(job, ConfVars.HIVEADDEDFILES, addedFiles); } String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR); if (StringUtils.isNotBlank(addedJars)) { HiveConf.setVar(job, ConfVars.HIVEADDEDJARS, addedJars); } String addedArchives = Utilities.getResourceFiles(job, SessionState.ResourceType.ARCHIVE); if (StringUtils.isNotBlank(addedArchives)) { HiveConf.setVar(job, ConfVars.HIVEADDEDARCHIVES, addedArchives); } conf.stripHiddenConfigurations(job); this.jobExecHelper = new HadoopJobExecHelper(job, console, this, this); }
public void initConf() throws Exception { // Plug verifying metastore in for testing. conf.setVar( HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL, "org.apache.hadoop.hive.metastore.VerifyingObjectStore"); if (mr != null) { assert dfs != null; mr.setupConfiguration(conf); // set fs.default.name to the uri of mini-dfs String dfsUriString = WindowsPathUtil.getHdfsUriString(dfs.getFileSystem().getUri().toString()); conf.setVar(HiveConf.ConfVars.HADOOPFS, dfsUriString); // hive.metastore.warehouse.dir needs to be set relative to the mini-dfs conf.setVar( HiveConf.ConfVars.METASTOREWAREHOUSE, (new Path(dfsUriString, "/build/ql/test/data/warehouse/")).toString()); } // Windows paths should be converted after MiniMrShim.setupConfiguration() // since setupConfiguration may overwrite configuration values. if (Shell.WINDOWS) { WindowsPathUtil.convertPathsFromWindowsToHdfs(conf); } }
/** * Gets the temporary directory of the given job * * @param conf * @param isLocal true to resolve local temporary directory * @return */ public static String getJobTmpDir(Configuration conf, boolean isLocal) { String fsName = HiveConf.getVar(conf, ConfVars.HADOOPFS); if (fsName.endsWith("/")) { fsName = fsName.substring(0, fsName.length() - 1); } return fsName + HiveConf.getVar(conf, (isLocal ? ConfVars.LOCALSCRATCHDIR : ConfVars.SCRATCHDIR), ""); }
public static void main(String[] args) throws Exception { HiveConf conf = new HiveConf(); conf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml"))); conf.setVar(ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName()); conf.setBoolVar(ConfVars.METASTORE_USE_THRIFT_SASL, true); SessionState.start(new CliSessionState(conf)); new CliDriver().processLine(args[0]); }
/* * Helper to determine the size of the container requested * from yarn. Falls back to Map-reduce's map size if tez * container size isn't set. */ private Resource getContainerResource(Configuration conf) { Resource containerResource; int memory = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE) > 0 ? HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE) : conf.getInt(MRJobConfig.MAP_MEMORY_MB, MRJobConfig.DEFAULT_MAP_MEMORY_MB); int cpus = conf.getInt(MRJobConfig.MAP_CPU_VCORES, MRJobConfig.DEFAULT_MAP_CPU_VCORES); return Resource.newInstance(memory, cpus); }
// Perform kerberos login using the hadoop shim API if the configuration is available public static void loginFromKeytab(HiveConf hiveConf) throws IOException { String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL); String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB); if (principal.isEmpty() || keyTabFile.isEmpty()) { throw new IOException("HiveServer2 Kerberos principal or keytab is not correctly configured"); } else { ShimLoader.getHadoopShims().loginUserFromKeytab(principal, keyTabFile); } }
/** * File Sink Operator encountered. * * @param nd the file sink operator encountered * @param opProcCtx context */ public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException { GenMRProcContext ctx = (GenMRProcContext) opProcCtx; ParseContext parseCtx = ctx.getParseCtx(); boolean chDir = false; Task<? extends Serializable> currTask = ctx.getCurrTask(); FileSinkOperator fsOp = (FileSinkOperator) nd; boolean isInsertTable = // is INSERT OVERWRITE TABLE fsOp.getConf().getTableInfo().getTableName() != null && parseCtx.getQB().getParseInfo().isInsertToTable(); HiveConf hconf = parseCtx.getConf(); // Has the user enabled merging of files for map-only jobs or for all jobs if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) { List<Task<? extends Serializable>> mvTasks = ctx.getMvTask(); // In case of unions or map-joins, it is possible that the file has // already been seen. // So, no need to attempt to merge the files again. if ((ctx.getSeenFileSinkOps() == null) || (!ctx.getSeenFileSinkOps().contains(nd))) { // no need of merging if the move is to a local file system MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp); if (isInsertTable && hconf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf()); } if ((mvTask != null) && !mvTask.isLocal()) { // There are separate configuration parameters to control whether to // merge for a map-only job // or for a map-reduce job MapredWork currWork = (MapredWork) currTask.getWork(); boolean mergeMapOnly = hconf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES) && currWork.getReducer() == null; boolean mergeMapRed = hconf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES) && currWork.getReducer() != null; if (mergeMapOnly || mergeMapRed) { chDir = true; } } } } String finalName = processFS(nd, stack, opProcCtx, chDir); // need to merge the files in the destination table/partitions if (chDir && (finalName != null)) { createMergeJob((FileSinkOperator) nd, ctx, finalName); } return null; }
@VisibleForTesting protected static AccessURI parseURI(String uri, boolean isLocal) throws SemanticException { try { HiveConf conf = SessionState.get().getConf(); String warehouseDir = conf.getVar(ConfVars.METASTOREWAREHOUSE); return new AccessURI(PathUtils.parseURI(warehouseDir, uri, isLocal)); } catch (Exception e) { throw new SemanticException("Error parsing URI " + uri + ": " + e.getMessage(), e); } }
// Perform SPNEGO login using the hadoop shim API if the configuration is available public static UserGroupInformation loginFromSpnegoKeytabAndReturnUGI(HiveConf hiveConf) throws IOException { String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL); String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB); if (principal.isEmpty() || keyTabFile.isEmpty()) { throw new IOException("HiveServer2 SPNEGO principal or keytab is not correctly configured"); } else { return ShimLoader.getHadoopShims().loginUserFromKeytabAndReturnUGI(principal, keyTabFile); } }
public void init() throws Exception { testWarehouse = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE); String execEngine = conf.get("hive.execution.engine"); conf.set("hive.execution.engine", "mr"); SessionState.start(conf); conf.set("hive.execution.engine", execEngine); db = Hive.get(conf); pd = new ParseDriver(); sem = new SemanticAnalyzer(conf); }
private static void initializeSetup() throws Exception { hiveConf = new HiveConf(mrConf, TestHCatMultiOutputFormat.class); hiveConf.set("hive.metastore.local", "false"); hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); hiveConf.set( HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, warehousedir.toString()); try { hmsc = new HiveMetaStoreClient(hiveConf, null); initalizeTables(); } catch (Throwable e) { LOG.error("Exception encountered while setting up testcase", e); throw new Exception(e); } finally { hmsc.close(); } }
@Before public void setUp() throws Exception { HiveConf conf = new HiveConf(); conf.setVar( HiveConf.ConfVars.METASTORE_EXPRESSION_PROXY_CLASS, MockPartitionExpressionProxy.class.getName()); objectStore = new ObjectStore(); objectStore.setConf(conf); dropAllStoreObjects(objectStore); }
@Override public String dump_config() throws TException { HiveConf c = new HiveConf(); ByteArrayOutputStream b = new ByteArrayOutputStream(); try { c.writeXml(b); return new String(b.toByteArray(), "UTF-8"); } catch (IOException e) { throw new TException(e); } }
@BeforeClass public static void setUpBeforeClass() throws Exception { tableName = "TestOperationLoggingLayout_table"; hiveConf = new HiveConf(); hiveConf.set(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL.varname, "execution"); // We need to set the below parameter to test performance level logging hiveConf.set("hive.ql.log.PerfLogger.level", "INFO,DRFA"); miniHS2 = new MiniHS2(hiveConf); confOverlay = new HashMap<String, String>(); confOverlay.put(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); miniHS2.start(confOverlay); }
/* * Helper function to create JobConf for specific ReduceWork. */ private JobConf initializeVertexConf(JobConf baseConf, ReduceWork reduceWork) { JobConf conf = new JobConf(baseConf); conf.set("mapred.reducer.class", ExecReducer.class.getName()); boolean useSpeculativeExecReducers = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS); HiveConf.setBoolVar( conf, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, useSpeculativeExecReducers); return conf; }
public MapJoinBytesTableContainer( Configuration hconf, MapJoinObjectSerDeContext valCtx, long keyCount, long memUsage) throws SerDeException { this( HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD), HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE), valCtx, keyCount, memUsage); }
/** * Separate from constructor, because initialize() may need to be called in a separate thread. */ synchronized void initialize() { assertState(QueryState.CREATED); this.hiveConf = new HiveConf(Driver.class); // Update configuration with user/group info. if (query.hadoop_user == null) { throw new RuntimeException("User must be specified."); } // Update scratch dir (to have one per user) File scratchDir = new File("/tmp/hive-beeswax-" + query.hadoop_user); hiveConf.set(HiveConf.ConfVars.SCRATCHDIR.varname, scratchDir.getPath()); // Create the temporary directory if necessary. // If mapred.job.tracker is set to local, this is used by MapRedTask. if (!scratchDir.isDirectory()) { if (scratchDir.exists() || !scratchDir.mkdirs()) { LOG.warn("Could not create tmp dir:" + scratchDir); } } driver = new Driver(hiveConf); ClassLoader loader = hiveConf.getClassLoader(); String auxJars = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVEAUXJARS); if (StringUtils.isNotBlank(auxJars)) { try { loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ",")); } catch (Exception e) { LOG.error("Failed to add jars to class loader: " + auxJars, e); } } hiveConf.setClassLoader(loader); Thread.currentThread().setContextClassLoader(loader); SessionState.start(hiveConf); // this is thread-local this.sessionState = SessionState.get(); // If this work has a LogContext, associate the children output to the logContext OutputStream lcOutStream = null; if (this.logContext != null) lcOutStream = this.logContext.getOutputStream(); // A copy of everything goes to the LogContext. // In addition, stderr goes to errStream for error reporting. // Note that child output is explicitly tee to System.{out,err}, // otherwise it'll be swallowed by outStream. this.sessionState.out = new PrintStream(new TeeOutputStream(lcOutStream, this.outStream)); this.sessionState.err = new PrintStream(new TeeOutputStream(lcOutStream, this.errStream)); this.sessionState.childOut = new PrintStream(new TeeOutputStream(System.out, sessionState.out)); this.sessionState.childErr = new PrintStream(new TeeOutputStream(System.err, sessionState.err)); this.state = QueryState.INITIALIZED; }