public HiveAuthFactory(HiveConf conf) throws TTransportException { this.conf = conf; saslMessageLimit = conf.getIntVar(ConfVars.HIVE_THRIFT_SASL_MESSAGE_LIMIT); String transTypeStr = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_TRANSPORT_MODE); String authTypeStr = conf.getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION); transportType = TransTypes.valueOf(transTypeStr.toUpperCase()); authType = authTypeStr == null ? transportType.getDefaultAuthType() : AuthTypes.valueOf(authTypeStr.toUpperCase()); if (transportType == TransTypes.BINARY && authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.name()) && ShimLoader.getHadoopShims().isSecureShimImpl()) { saslServer = ShimLoader.getHadoopThriftAuthBridge() .createServer( conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB), conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL)); // start delegation token manager try { saslServer.startDelegationTokenSecretManager(conf, null, ServerMode.HIVESERVER2); } catch (Exception e) { throw new TTransportException("Failed to start token manager", e); } } else { saslServer = null; } }
public static void baseSetup() throws Exception { MiniDFSShim dfs = ShimLoader.getHadoopShims().getMiniDfs(conf, 4, true, null); fs = dfs.getFileSystem(); baseDfsDir = new Path(new Path(fs.getUri()), "/base"); fs.mkdirs(baseDfsDir); warehouseDir = new Path(baseDfsDir, "warehouse"); fs.mkdirs(warehouseDir); conf.setVar(ConfVars.METASTOREWAREHOUSE, warehouseDir.toString()); // Assuming the tests are run either in C or D drive in Windows OS! dataFileDir = conf.get("test.data.files") .replace('\\', '/') .replace("c:", "") .replace("C:", "") .replace("D:", "") .replace("d:", ""); dataFilePath = new Path(dataFileDir, "kv1.txt"); // Set up scratch directory Path scratchDir = new Path(baseDfsDir, "scratchdir"); conf.setVar(HiveConf.ConfVars.SCRATCHDIR, scratchDir.toString()); // set hive conf vars conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); conf.setBoolVar(HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS, true); conf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); int port = MetaStoreUtils.findFreePort(); MetaStoreUtils.startMetaStore(port, ShimLoader.getHadoopThriftAuthBridge()); SessionState.start(new CliSessionState(conf)); driver = new Driver(conf); setupDataTable(); }
@Override public RecordReader<NullWritable, OrcStruct> createRecordReader( InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) inputSplit; Path path = fileSplit.getPath(); Configuration conf = ShimLoader.getHadoopShims().getConfiguration(context); return new OrcRecordReader( OrcFile.createReader(path, OrcFile.readerOptions(conf)), ShimLoader.getHadoopShims().getConfiguration(context), fileSplit.getStart(), fileSplit.getLength()); }
@Override public List<InputSplit> getSplits(JobContext jobContext) throws IOException, InterruptedException { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ORC_GET_SPLITS); Configuration conf = ShimLoader.getHadoopShims().getConfiguration(jobContext); List<OrcSplit> splits = OrcInputFormat.generateSplitsInfo(ShimLoader.getHadoopShims().getConfiguration(jobContext)); List<InputSplit> result = new ArrayList<InputSplit>(); for (OrcSplit split : OrcInputFormat.generateSplitsInfo(conf)) { result.add(new OrcNewSplit(split)); } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ORC_GET_SPLITS); return result; }
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { Object o0, o1; o0 = arguments[0].get(); if (o0 == null) { return null; } o1 = arguments[1].get(); if (o1 == null) { return null; } switch (compareType) { case COMPARE_TEXT: Text t0, t1; t0 = soi0.getPrimitiveWritableObject(o0); t1 = soi1.getPrimitiveWritableObject(o1); result.set(ShimLoader.getHadoopShims().compareText(t0, t1) > 0); break; case COMPARE_INT: result.set(ioi0.get(o0) > ioi1.get(o1)); break; case COMPARE_LONG: result.set(loi0.get(o0) > loi1.get(o1)); break; case COMPARE_BYTE: result.set(byoi0.get(o0) > byoi1.get(o1)); break; case COMPARE_BOOL: boolean b0 = boi0.get(o0); boolean b1 = boi1.get(o1); result.set(b0 && !b1); break; case COMPARE_STRING: String s0, s1; s0 = soi0.getPrimitiveJavaObject(o0); s1 = soi1.getPrimitiveJavaObject(o1); result.set(s0.compareTo(s1) > 0); break; case SAME_TYPE: result.set(ObjectInspectorUtils.compare(o0, argumentOIs[0], o1, argumentOIs[1]) > 0); break; default: Object converted_o0 = converter0.convert(o0); if (converted_o0 == null) { return null; } Object converted_o1 = converter1.convert(o1); if (converted_o1 == null) { return null; } result.set( ObjectInspectorUtils.compare( converted_o0, compareOI, converted_o1, compareOI) > 0); } return result; }
@Override public void abortJob(JobContext context, int status) throws IOException { JobConf conf = ShimLoader.getHadoopShims().getJobConf(context); Path tmpLocation = new Path(conf.get(TMP_LOCATION)); FileSystem fs = tmpLocation.getFileSystem(conf); LOG.debug("Removing " + tmpLocation.toString()); fs.delete(tmpLocation, true); }
@Override public void checkOutputSpecs(FileSystem ignored, JobConf jc) throws IOException { // delegate to the new api Job job = new Job(jc); JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job); checkOutputSpecs(jobContext); }
private void handleSampling(DriverContext context, MapWork mWork, JobConf job, HiveConf conf) throws Exception { assert mWork.getAliasToWork().keySet().size() == 1; String alias = mWork.getAliases().get(0); Operator<?> topOp = mWork.getAliasToWork().get(alias); PartitionDesc partDesc = mWork.getAliasToPartnInfo().get(alias); ArrayList<String> paths = mWork.getPaths(); ArrayList<PartitionDesc> parts = mWork.getPartitionDescs(); List<Path> inputPaths = new ArrayList<Path>(paths.size()); for (String path : paths) { inputPaths.add(new Path(path)); } Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0)); Path partitionFile = new Path(tmpPath, ".partitions"); ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile); PartitionKeySampler sampler = new PartitionKeySampler(); if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) { console.printInfo("Use sampling data created in previous MR"); // merges sampling data from previous MR and make partition keys for total sort for (Path path : inputPaths) { FileSystem fs = path.getFileSystem(job); for (FileStatus status : fs.globStatus(new Path(path, ".sampling*"))) { sampler.addSampleFile(status.getPath(), job); } } } else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) { console.printInfo("Creating sampling data.."); assert topOp instanceof TableScanOperator; TableScanOperator ts = (TableScanOperator) topOp; FetchWork fetchWork; if (!partDesc.isPartitioned()) { assert paths.size() == 1; fetchWork = new FetchWork(inputPaths.get(0), partDesc.getTableDesc()); } else { fetchWork = new FetchWork(inputPaths, parts, partDesc.getTableDesc()); } fetchWork.setSource(ts); // random sampling FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, conf, job, ts); try { ts.initialize(conf, new ObjectInspector[] {fetcher.getOutputObjectInspector()}); OperatorUtils.setChildrenCollector(ts.getChildOperators(), sampler); while (fetcher.pushRow()) {} } finally { fetcher.clearFetchContext(); } } else { throw new IllegalArgumentException("Invalid sampling type " + mWork.getSamplingType()); } sampler.writePartitionKeys(partitionFile, conf, job); }
// Perform kerberos login using the hadoop shim API if the configuration is available public static void loginFromKeytab(HiveConf hiveConf) throws IOException { String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL); String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB); if (principal.isEmpty() || keyTabFile.isEmpty()) { throw new IOException("HiveServer2 Kerberos principal or keytab is not correctly configured"); } else { ShimLoader.getHadoopShims().loginUserFromKeytab(principal, keyTabFile); } }
// Perform SPNEGO login using the hadoop shim API if the configuration is available public static UserGroupInformation loginFromSpnegoKeytabAndReturnUGI(HiveConf hiveConf) throws IOException { String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL); String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB); if (principal.isEmpty() || keyTabFile.isEmpty()) { throw new IOException("HiveServer2 SPNEGO principal or keytab is not correctly configured"); } else { return ShimLoader.getHadoopShims().loginUserFromKeytabAndReturnUGI(principal, keyTabFile); } }
private String getPrincipalWithoutRealmAndHost(String fullPrincipal) throws HttpAuthenticationException { KerberosNameShim fullKerberosName; try { fullKerberosName = ShimLoader.getHadoopShims().getKerberosNameShim(fullPrincipal); return fullKerberosName.getShortName(); } catch (IOException e) { throw new HttpAuthenticationException(e); } }
// Store the bucket path to bucket number mapping in the table scan operator. // Although one mapper per file is used (BucketizedInputHiveInput), it is possible that // any mapper can pick up any file (depending on the size of the files). The bucket number // corresponding to the input file is stored to name the output bucket file appropriately. private void storeBucketPathMapping(TableScanOperator tsOp, FileStatus[] srcs) { Map<String, Integer> bucketFileNameMapping = new HashMap<String, Integer>(); for (int pos = 0; pos < srcs.length; pos++) { if (ShimLoader.getHadoopShims().isDirectory(srcs[pos])) { throw new RuntimeException( "Was expecting '" + srcs[pos].getPath() + "' to be bucket file."); } bucketFileNameMapping.put(srcs[pos].getPath().getName(), pos); } tsOp.getConf().setBucketFileNameMapping(bucketFileNameMapping); }
/** * createTezDir creates a temporary directory in the scratchDir folder to be used with Tez. * Assumes scratchDir exists. */ public Path createTezDir(Path scratchDir, Configuration conf) throws IOException { UserGroupInformation ugi; String userName = System.getProperty("user.name"); try { ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); userName = ShimLoader.getHadoopShims().getShortUserName(ugi); } catch (LoginException e) { throw new IOException(e); } scratchDir = new Path(scratchDir, userName); Path tezDir = getTezDir(scratchDir); FileSystem fs = tezDir.getFileSystem(conf); LOG.debug("TezDir path set " + tezDir + " for user: " + userName); // since we are adding the user name to the scratch dir, we do not // need to give more permissions here fs.mkdirs(tezDir); return tezDir; }
public static void verifyProxyAccess( String realUser, String proxyUser, String ipAddress, HiveConf hiveConf) throws HiveSQLException { try { UserGroupInformation sessionUgi; if (ShimLoader.getHadoopShims().isSecurityEnabled()) { KerberosNameShim kerbName = ShimLoader.getHadoopShims().getKerberosNameShim(realUser); String shortPrincipalName = kerbName.getServiceName(); sessionUgi = ShimLoader.getHadoopShims().createProxyUser(shortPrincipalName); } else { sessionUgi = ShimLoader.getHadoopShims().createRemoteUser(realUser, null); } if (!proxyUser.equalsIgnoreCase(realUser)) { ShimLoader.getHadoopShims() .authorizeProxyAccess(proxyUser, sessionUgi, ipAddress, hiveConf); } } catch (IOException e) { throw new HiveSQLException( "Failed to validate proxy privilege of " + realUser + " for " + proxyUser, "08S01", e); } }
@Override public void commitJob(JobContext context) throws IOException { JobConf conf = ShimLoader.getHadoopShims().getJobConf(context); Path tmpLocation = new Path(conf.get(TMP_LOCATION)); Path finalLocation = new Path(conf.get(FINAL_LOCATION)); FileSystem fs = tmpLocation.getFileSystem(conf); LOG.debug("Moving contents of " + tmpLocation.toString() + " to " + finalLocation.toString()); FileStatus[] contents = fs.listStatus(tmpLocation); for (int i = 0; i < contents.length; i++) { Path newPath = new Path(finalLocation, contents[i].getPath().getName()); fs.rename(contents[i].getPath(), newPath); } fs.delete(tmpLocation, true); }
/** * @param conf * @return path to destination directory on hdfs * @throws LoginException if we are unable to figure user information * @throws IOException when any dfs operation fails. */ public Path getDefaultDestDir(Configuration conf) throws LoginException, IOException { UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); String userName = ShimLoader.getHadoopShims().getShortUserName(ugi); String userPathStr = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_USER_INSTALL_DIR); Path userPath = new Path(userPathStr); FileSystem fs = userPath.getFileSystem(conf); if (!(fs instanceof DistributedFileSystem)) { throw new IOException(ErrorMsg.INVALID_HDFS_URI.format(userPathStr)); } String jarPathStr = userPathStr + "/" + userName; String hdfsDirPathStr = jarPathStr; Path hdfsDirPath = new Path(hdfsDirPathStr); FileStatus fstatus = fs.getFileStatus(hdfsDirPath); if (!fstatus.isDir()) { throw new IOException(ErrorMsg.INVALID_DIR.format(hdfsDirPath.toString())); } Path retPath = new Path(hdfsDirPath.toString() + "/.hiveJars"); fs.mkdirs(retPath); return retPath; }
public HiveSchemaTool(String hiveHome, HiveConf hiveConf, String dbType) throws HiveMetaException { if (hiveHome == null || hiveHome.isEmpty()) { throw new HiveMetaException("No Hive home directory provided"); } this.hiveConf = hiveConf; this.dbType = dbType; this.metaStoreSchemaInfo = new MetaStoreSchemaInfo(hiveHome, hiveConf, dbType); userName = hiveConf.get(ConfVars.METASTORE_CONNECTION_USER_NAME.varname); try { passWord = ShimLoader.getHadoopShims().getPassword(hiveConf, HiveConf.ConfVars.METASTOREPWD.varname); } catch (IOException err) { throw new HiveMetaException("Error getting metastore password", err); } }
private String getPrincipalWithoutRealm(String fullPrincipal) throws HttpAuthenticationException { KerberosNameShim fullKerberosName; try { fullKerberosName = ShimLoader.getHadoopShims().getKerberosNameShim(fullPrincipal); } catch (IOException e) { throw new HttpAuthenticationException(e); } String serviceName = fullKerberosName.getServiceName(); String hostName = fullKerberosName.getHostName(); String principalWithoutRealm = serviceName; if (hostName != null) { principalWithoutRealm = serviceName + "/" + hostName; } return principalWithoutRealm; }
public HadoopShims.HdfsEncryptionShim getHdfsEncryptionShim() throws HiveException { if (hdfsEncryptionShim == null) { try { FileSystem fs = FileSystem.get(conf); if ("hdfs".equals(fs.getUri().getScheme())) { hdfsEncryptionShim = ShimLoader.getHadoopShims().createHdfsEncryptionShim(fs, conf); } else { LOG.info("Could not get hdfsEncryptionShim, it is only applicable to hdfs filesystem."); } } catch (Exception e) { throw new HiveException(e); } } return hdfsEncryptionShim; }
/* * Creates the configuration object necessary to run a specific vertex from * map work. This includes input formats, input processor, etc. */ private JobConf initializeVertexConf(JobConf baseConf, MapWork mapWork) { JobConf conf = new JobConf(baseConf); if (mapWork.getNumMapTasks() != null) { conf.setInt(MRJobConfig.NUM_MAPS, mapWork.getNumMapTasks().intValue()); } if (mapWork.getMaxSplitSize() != null) { HiveConf.setLongVar( conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mapWork.getMaxSplitSize().longValue()); } if (mapWork.getMinSplitSize() != null) { HiveConf.setLongVar( conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mapWork.getMinSplitSize().longValue()); } if (mapWork.getMinSplitSizePerNode() != null) { HiveConf.setLongVar( conf, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, mapWork.getMinSplitSizePerNode().longValue()); } if (mapWork.getMinSplitSizePerRack() != null) { HiveConf.setLongVar( conf, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, mapWork.getMinSplitSizePerRack().longValue()); } Utilities.setInputAttributes(conf, mapWork); String inpFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT); if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); } if (mapWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); } conf.set("mapred.mapper.class", ExecMapper.class.getName()); conf.set("mapred.input.format.class", inpFormat); return conf; }
@Override protected void setUp() throws Exception { super.setUp(); System.setProperty("hive.metastore.init.hooks", DummyMetaStoreInitListener.class.getName()); int port = MetaStoreUtils.findFreePort(); MetaStoreUtils.startMetaStore(port, ShimLoader.getHadoopThriftAuthBridge()); hiveConf = new HiveConf(this.getClass()); hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + port); hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); SessionState.start(new CliSessionState(hiveConf)); msc = new HiveMetaStoreClient(hiveConf); driver = new Driver(hiveConf); }
@Override public Iterator<HCatRecord> read() throws HCatException { HCatInputFormat inpFmt = new HCatInputFormat(); RecordReader<WritableComparable, HCatRecord> rr; try { TaskAttemptContext cntxt = ShimLoader.getHadoopShims() .getHCatShim() .createTaskAttemptContext(conf, new TaskAttemptID()); rr = inpFmt.createRecordReader(split, cntxt); rr.initialize(split, cntxt); } catch (IOException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } catch (InterruptedException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } return new HCatRecordItr(rr); }
@Override public ReaderContext prepareRead() throws HCatException { try { Job job = new Job(conf); HCatInputFormat hcif = HCatInputFormat.setInput(job, re.getDbName(), re.getTableName(), re.getFilterString()); ReaderContextImpl cntxt = new ReaderContextImpl(); cntxt.setInputSplits( hcif.getSplits( ShimLoader.getHadoopShims() .getHCatShim() .createJobContext(job.getConfiguration(), null))); cntxt.setConf(job.getConfiguration()); return cntxt; } catch (IOException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } catch (InterruptedException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } }
@BeforeClass public static void before() throws Exception { int port = MetaStoreUtils.findFreePort(); hiveConf = new HiveConf(TestMetaStoreMetrics.class); hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + port); hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hiveConf.setBoolVar(HiveConf.ConfVars.METASTORE_METRICS, true); hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); MetricsFactory.close(); MetricsFactory.init(hiveConf); metrics = (CodahaleMetrics) MetricsFactory.getInstance(); // Increments one HMS connection MetaStoreUtils.startMetaStore(port, ShimLoader.getHadoopThriftAuthBridge(), hiveConf); // Increments one HMS connection (Hive.get()) SessionState.start(new CliSessionState(hiveConf)); driver = new Driver(hiveConf); }
/** * Given a Hive Configuration object - generate a command line fragment for passing such * configuration information to ExecDriver. */ public static String generateCmdLine(HiveConf hconf, Context ctx) throws IOException { HiveConf tempConf = new HiveConf(); Path hConfFilePath = new Path(ctx.getLocalTmpPath(), JOBCONF_FILENAME); OutputStream out = null; Properties deltaP = hconf.getChangedProperties(); boolean hadoopLocalMode = ShimLoader.getHadoopShims().isLocalMode(hconf); String hadoopSysDir = "mapred.system.dir"; String hadoopWorkDir = "mapred.local.dir"; for (Object one : deltaP.keySet()) { String oneProp = (String) one; if (hadoopLocalMode && (oneProp.equals(hadoopSysDir) || oneProp.equals(hadoopWorkDir))) { continue; } tempConf.set(oneProp, hconf.get(oneProp)); } // Multiple concurrent local mode job submissions can cause collisions in // working dirs and system dirs // Workaround is to rename map red working dir to a temp dir in such cases if (hadoopLocalMode) { tempConf.set(hadoopSysDir, hconf.get(hadoopSysDir) + "/" + Utilities.randGen.nextInt()); tempConf.set(hadoopWorkDir, hconf.get(hadoopWorkDir) + "/" + Utilities.randGen.nextInt()); } try { out = FileSystem.getLocal(hconf).create(hConfFilePath); tempConf.writeXml(out); } finally { if (out != null) { out.close(); } } return " -jobconffile " + hConfFilePath.toString(); }
@Override public int execute(DriverContext driverContext) { Context ctx = driverContext.getCtx(); boolean ctxCreated = false; try { if (ctx == null) { ctx = new Context(conf); ctxCreated = true; } // estimate number of reducers setNumberOfReducers(); // auto-determine local mode if allowed if (!ctx.isLocalOnlyExecutionMode() && conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) { if (inputSummary == null) { inputSummary = Utilities.getInputSummary(driverContext.getCtx(), work, null); } // set the values of totalInputFileSize and totalInputNumFiles, estimating them // if percentage block sampling is being used estimateInputSize(); // at this point the number of reducers is precisely defined in the plan int numReducers = work.getNumReduceTasks(); if (LOG.isDebugEnabled()) { LOG.debug( "Task: " + getId() + ", Summary: " + totalInputFileSize + "," + totalInputNumFiles + "," + numReducers); } String reason = MapRedTask.isEligibleForLocalMode( conf, numReducers, totalInputFileSize, totalInputNumFiles); if (reason == null) { // clone configuration before modifying it on per-task basis cloneConf(); conf.setVar(HiveConf.ConfVars.HADOOPJT, "local"); console.printInfo("Selecting local mode for task: " + getId()); this.setLocalMode(true); } else { console.printInfo("Cannot run job locally: " + reason); this.setLocalMode(false); } } runningViaChild = "local".equals(conf.getVar(HiveConf.ConfVars.HADOOPJT)) || conf.getBoolVar(HiveConf.ConfVars.SUBMITVIACHILD); if (!runningViaChild) { // we are not running this mapred task via child jvm // so directly invoke ExecDriver return super.execute(driverContext); } // we need to edit the configuration to setup cmdline. clone it first cloneConf(); // propagate input format if necessary super.setInputAttributes(conf); // enable assertion String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN); String hiveJar = conf.getJar(); String libJarsOption; String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR); conf.setVar(ConfVars.HIVEADDEDJARS, addedJars); String auxJars = conf.getAuxJars(); // Put auxjars and addedjars together into libjars if (StringUtils.isEmpty(addedJars)) { if (StringUtils.isEmpty(auxJars)) { libJarsOption = " "; } else { libJarsOption = " -libjars " + auxJars + " "; } } else { if (StringUtils.isEmpty(auxJars)) { libJarsOption = " -libjars " + addedJars + " "; } else { libJarsOption = " -libjars " + addedJars + "," + auxJars + " "; } } // Generate the hiveConfArgs after potentially adding the jars String hiveConfArgs = generateCmdLine(conf); // write out the plan to a local file Path planPath = new Path(ctx.getLocalTmpFileURI(), "plan.xml"); OutputStream out = FileSystem.getLocal(conf).create(planPath); MapredWork plan = getWork(); LOG.info("Generating plan file " + planPath.toString()); Utilities.serializeMapRedWork(plan, out); String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : ""; String jarCmd; if (ShimLoader.getHadoopShims().usesJobShell()) { jarCmd = libJarsOption + hiveJar + " " + ExecDriver.class.getName(); } else { jarCmd = hiveJar + " " + ExecDriver.class.getName() + libJarsOption; } String cmdLine = hadoopExec + " jar " + jarCmd + " -plan " + planPath.toString() + " " + isSilent + " " + hiveConfArgs; String workDir = (new File(".")).getCanonicalPath(); String files = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE); if (!files.isEmpty()) { cmdLine = cmdLine + " -files " + files; workDir = (new Path(ctx.getLocalTmpFileURI())).toUri().getPath(); if (!(new File(workDir)).mkdir()) { throw new IOException("Cannot create tmp working dir: " + workDir); } for (String f : StringUtils.split(files, ',')) { Path p = new Path(f); String target = p.toUri().getPath(); String link = workDir + Path.SEPARATOR + p.getName(); if (FileUtil.symLink(target, link) != 0) { throw new IOException("Cannot link to added file: " + target + " from: " + link); } } } LOG.info("Executing: " + cmdLine); Process executor = null; // Inherit Java system variables String hadoopOpts; StringBuilder sb = new StringBuilder(); Properties p = System.getProperties(); for (String element : HIVE_SYS_PROP) { if (p.containsKey(element)) { sb.append(" -D" + element + "=" + p.getProperty(element)); } } hadoopOpts = sb.toString(); // Inherit the environment variables String[] env; Map<String, String> variables = new HashMap(System.getenv()); // The user can specify the hadoop memory if ("local".equals(conf.getVar(HiveConf.ConfVars.HADOOPJT))) { // if we are running in local mode - then the amount of memory used // by the child jvm can no longer default to the memory used by the // parent jvm int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM); if (hadoopMem == 0) { // remove env var that would default child jvm to use parent's memory // as default. child jvm would use default memory for a hadoop client variables.remove(HADOOP_MEM_KEY); } else { // user specified the memory for local mode hadoop run variables.put(HADOOP_MEM_KEY, String.valueOf(hadoopMem)); } } else { // nothing to do - we are not running in local mode - only submitting // the job via a child process. in this case it's appropriate that the // child jvm use the same memory as the parent jvm } if (variables.containsKey(HADOOP_OPTS_KEY)) { variables.put(HADOOP_OPTS_KEY, variables.get(HADOOP_OPTS_KEY) + hadoopOpts); } else { variables.put(HADOOP_OPTS_KEY, hadoopOpts); } if (variables.containsKey(HIVE_DEBUG_RECURSIVE)) { configureDebugVariablesForChildJVM(variables); } env = new String[variables.size()]; int pos = 0; for (Map.Entry<String, String> entry : variables.entrySet()) { String name = entry.getKey(); String value = entry.getValue(); env[pos++] = name + "=" + value; } // Run ExecDriver in another JVM executor = Runtime.getRuntime().exec(cmdLine, env, new File(workDir)); StreamPrinter outPrinter = new StreamPrinter( executor.getInputStream(), null, SessionState.getConsole().getChildOutStream()); StreamPrinter errPrinter = new StreamPrinter( executor.getErrorStream(), null, SessionState.getConsole().getChildErrStream()); outPrinter.start(); errPrinter.start(); int exitVal = jobExecHelper.progressLocal(executor, getId()); if (exitVal != 0) { LOG.error("Execution failed with exit status: " + exitVal); } else { LOG.info("Execution completed successfully"); } return exitVal; } catch (Exception e) { e.printStackTrace(); LOG.error("Exception: " + e.getMessage()); return (1); } finally { try { // creating the context can create a bunch of files. So make // sure to clear it out if (ctxCreated) { ctx.clear(); } } catch (Exception e) { LOG.error("Exception: " + e.getMessage()); } } }
public HiveTestUtil( String outDir, String logDir, MiniClusterType clusterType, String confDir, String hadoopVer) throws Exception { this.outDir = outDir; this.logDir = logDir; if (confDir != null && !confDir.isEmpty()) { HiveConf.setHiveSiteLocation( new URL("file://" + new File(confDir).toURI().getPath() + "/hive-site.xml")); LOG.info("Setting hive-site: " + HiveConf.getHiveSiteLocation()); } conf = new HiveConf(); String tmpBaseDir = System.getProperty("test.tmp.dir"); if (tmpBaseDir == null || tmpBaseDir == "") { tmpBaseDir = System.getProperty("java.io.tmpdir"); } String metaStoreURL = "jdbc:derby:" + tmpBaseDir + File.separator + "metastore_dbtest;" + "create=true"; conf.set(ConfVars.METASTORECONNECTURLKEY.varname, metaStoreURL); System.setProperty(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, metaStoreURL); // set where derby logs File derbyLogFile = new File(tmpBaseDir + "/derby.log"); derbyLogFile.createNewFile(); System.setProperty("derby.stream.error.file", derbyLogFile.getPath()); this.hadoopVer = getHadoopMainVersion(hadoopVer); qMap = new TreeMap<String, String>(); qSkipSet = new HashSet<String>(); qSortSet = new HashSet<String>(); qSortQuerySet = new HashSet<String>(); qHashQuerySet = new HashSet<String>(); qSortNHashQuerySet = new HashSet<String>(); qJavaVersionSpecificOutput = new HashSet<String>(); this.clusterType = clusterType; // Using randomUUID for dfs cluster System.setProperty("test.build.data", "target/test-data/hive-" + UUID.randomUUID().toString()); HadoopShims shims = ShimLoader.getHadoopShims(); int numberOfDataNodes = 4; if (clusterType != MiniClusterType.none) { dfs = shims.getMiniDfs(conf, numberOfDataNodes, true, null); FileSystem fs = dfs.getFileSystem(); String uriString = WindowsPathUtil.getHdfsUriString(fs.getUri().toString()); if (clusterType == MiniClusterType.tez) { mr = shims.getMiniTezCluster(conf, 4, uriString, 1); } else { mr = shims.getMiniMrCluster(conf, 4, uriString, 1); } } initConf(); // Use the current directory if it is not specified String dataDir = conf.get("test.data.files"); if (dataDir == null) { dataDir = new File(".").getAbsolutePath() + "/data/files"; } testFiles = dataDir; // Use the current directory if it is not specified String scriptsDir = conf.get("test.data.scripts"); if (scriptsDir == null) { scriptsDir = new File(".").getAbsolutePath() + "/data/scripts"; } if (!initScript.isEmpty()) { this.initScript = scriptsDir + "/" + initScript; } if (!cleanupScript.isEmpty()) { this.cleanupScript = scriptsDir + "/" + cleanupScript; } overWrite = "true".equalsIgnoreCase(System.getProperty("test.output.overwrite")); setup = new HiveTestSetup(); setup.preTest(conf); init(); }
private void getTaskInfos() throws IOException, MalformedURLException { int startIndex = 0; while (true) { TaskCompletionEvent[] taskCompletions = rj.getTaskCompletionEvents(startIndex); if (taskCompletions == null || taskCompletions.length == 0) { break; } boolean more = true; boolean firstError = true; for (TaskCompletionEvent t : taskCompletions) { // For each task completion event, get the associated task id, job id // and the logs String taskId = t.getTaskAttemptId().getTaskID().toString(); String jobId = t.getTaskAttemptId().getJobID().toString(); if (firstError) { console.printError("Examining task ID: " + taskId + " (and more) from job " + jobId); firstError = false; } TaskInfo ti = taskIdToInfo.get(taskId); if (ti == null) { ti = new TaskInfo(jobId); taskIdToInfo.put(taskId, ti); } // These tasks should have come from the same job. assert (ti.getJobId() != null && ti.getJobId().equals(jobId)); String taskAttemptLogUrl = ShimLoader.getHadoopShims() .getTaskAttemptLogUrl(conf, t.getTaskTrackerHttp(), t.getTaskId()); if (taskAttemptLogUrl != null) { ti.getLogUrls().add(taskAttemptLogUrl); } // If a task failed, fetch its error code (if available). // Also keep track of the total number of failures for that // task (typically, a task gets re-run up to 4 times if it fails. if (t.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) { String[] diags = rj.getTaskDiagnostics(t.getTaskAttemptId()); ti.setDiagnosticMesgs(diags); if (ti.getErrorCode() == 0) { ti.setErrorCode(extractErrorCode(diags)); } Integer failAttempts = failures.get(taskId); if (failAttempts == null) { failAttempts = Integer.valueOf(0); } failAttempts = Integer.valueOf(failAttempts.intValue() + 1); failures.put(taskId, failAttempts); } else { successes.add(taskId); } } if (!more) { break; } startIndex += taskCompletions.length; } }
/** Execute a query plan using Hadoop. */ @SuppressWarnings({"deprecation", "unchecked"}) @Override public int execute(DriverContext driverContext) { IOPrepareCache ioPrepareCache = IOPrepareCache.get(); ioPrepareCache.clear(); boolean success = true; Context ctx = driverContext.getCtx(); boolean ctxCreated = false; Path emptyScratchDir; MapWork mWork = work.getMapWork(); ReduceWork rWork = work.getReduceWork(); try { if (ctx == null) { ctx = new Context(job); ctxCreated = true; } emptyScratchDir = ctx.getMRTmpPath(); FileSystem fs = emptyScratchDir.getFileSystem(job); fs.mkdirs(emptyScratchDir); } catch (IOException e) { e.printStackTrace(); console.printError( "Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return 5; } HiveFileFormatUtils.prepareJobOutput(job); // See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput() job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(ExecMapper.class); job.setMapOutputKeyClass(HiveKey.class); job.setMapOutputValueClass(BytesWritable.class); try { String partitioner = HiveConf.getVar(job, ConfVars.HIVEPARTITIONER); job.setPartitionerClass(JavaUtils.loadClass(partitioner)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e); } if (mWork.getNumMapTasks() != null) { job.setNumMapTasks(mWork.getNumMapTasks().intValue()); } if (mWork.getMaxSplitSize() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mWork.getMaxSplitSize().longValue()); } if (mWork.getMinSplitSize() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mWork.getMinSplitSize().longValue()); } if (mWork.getMinSplitSizePerNode() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, mWork.getMinSplitSizePerNode().longValue()); } if (mWork.getMinSplitSizePerRack() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, mWork.getMinSplitSizePerRack().longValue()); } job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0); job.setReducerClass(ExecReducer.class); // set input format information if necessary setInputAttributes(job); // Turn on speculative execution for reducers boolean useSpeculativeExecReducers = HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS); HiveConf.setBoolVar( job, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, useSpeculativeExecReducers); String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); if (mWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); } LOG.info("Using " + inpFormat); try { job.setInputFormat(JavaUtils.loadClass(inpFormat)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e); } // No-Op - we don't really write anything here .. job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Transfer HIVEAUXJARS and HIVEADDEDJARS to "tmpjars" so hadoop understands // it String auxJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEAUXJARS); String addedJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDJARS); if (StringUtils.isNotBlank(auxJars) || StringUtils.isNotBlank(addedJars)) { String allJars = StringUtils.isNotBlank(auxJars) ? (StringUtils.isNotBlank(addedJars) ? addedJars + "," + auxJars : auxJars) : addedJars; LOG.info("adding libjars: " + allJars); initializeFiles("tmpjars", allJars); } // Transfer HIVEADDEDFILES to "tmpfiles" so hadoop understands it String addedFiles = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDFILES); if (StringUtils.isNotBlank(addedFiles)) { initializeFiles("tmpfiles", addedFiles); } int returnVal = 0; boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJOBNAME)); if (noName) { // This is for a special case to ensure unit tests pass HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, "JOB" + Utilities.randGen.nextInt()); } String addedArchives = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDARCHIVES); // Transfer HIVEADDEDARCHIVES to "tmparchives" so hadoop understands it if (StringUtils.isNotBlank(addedArchives)) { initializeFiles("tmparchives", addedArchives); } try { MapredLocalWork localwork = mWork.getMapRedLocalWork(); if (localwork != null && localwork.hasStagedAlias()) { if (!ShimLoader.getHadoopShims().isLocalMode(job)) { Path localPath = localwork.getTmpPath(); Path hdfsPath = mWork.getTmpHDFSPath(); FileSystem hdfs = hdfsPath.getFileSystem(job); FileSystem localFS = localPath.getFileSystem(job); FileStatus[] hashtableFiles = localFS.listStatus(localPath); int fileNumber = hashtableFiles.length; String[] fileNames = new String[fileNumber]; for (int i = 0; i < fileNumber; i++) { fileNames[i] = hashtableFiles[i].getPath().getName(); } // package and compress all the hashtable files to an archive file String stageId = this.getId(); String archiveFileName = Utilities.generateTarFileName(stageId); localwork.setStageID(stageId); CompressionUtils.tar(localPath.toUri().getPath(), fileNames, archiveFileName); Path archivePath = Utilities.generateTarPath(localPath, stageId); LOG.info("Archive " + hashtableFiles.length + " hash table files to " + archivePath); // upload archive file to hdfs Path hdfsFilePath = Utilities.generateTarPath(hdfsPath, stageId); short replication = (short) job.getInt("mapred.submit.replication", 10); hdfs.copyFromLocalFile(archivePath, hdfsFilePath); hdfs.setReplication(hdfsFilePath, replication); LOG.info("Upload 1 archive file from" + archivePath + " to: " + hdfsFilePath); // add the archive file to distributed cache DistributedCache.createSymlink(job); DistributedCache.addCacheArchive(hdfsFilePath.toUri(), job); LOG.info( "Add 1 archive file to distributed cache. Archive file: " + hdfsFilePath.toUri()); } } work.configureJobConf(job); List<Path> inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false); Utilities.setInputPaths(job, inputPaths); Utilities.setMapRedWork(job, work, ctx.getMRTmpPath()); if (mWork.getSamplingType() > 0 && rWork != null && job.getNumReduceTasks() > 1) { try { handleSampling(ctx, mWork, job); job.setPartitionerClass(HiveTotalOrderPartitioner.class); } catch (IllegalStateException e) { console.printInfo("Not enough sampling data.. Rolling back to single reducer task"); rWork.setNumReduceTasks(1); job.setNumReduceTasks(1); } catch (Exception e) { LOG.error("Sampling error", e); console.printError( e.toString(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); rWork.setNumReduceTasks(1); job.setNumReduceTasks(1); } } // remove the pwd from conf file so that job tracker doesn't show this // logs String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD); if (pwd != null) { HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE"); } JobClient jc = new JobClient(job); // make this client wait if job tracker is not behaving well. Throttle.checkJobTracker(job, LOG); if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) { // initialize stats publishing table StatsPublisher statsPublisher; StatsFactory factory = StatsFactory.newFactory(job); if (factory != null) { statsPublisher = factory.getStatsPublisher(); List<String> statsTmpDir = Utilities.getStatsTmpDirs(mWork, job); if (rWork != null) { statsTmpDir.addAll(Utilities.getStatsTmpDirs(rWork, job)); } StatsCollectionContext sc = new StatsCollectionContext(job); sc.setStatsTmpDirs(statsTmpDir); if (!statsPublisher.init(sc)) { // creating stats table if not exists if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) { throw new HiveException( ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg()); } } } } Utilities.createTmpDirs(job, mWork); Utilities.createTmpDirs(job, rWork); SessionState ss = SessionState.get(); if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") && ss != null) { TezSessionState session = ss.getTezSession(); TezSessionPoolManager.getInstance().close(session, true); } // Finally SUBMIT the JOB! rj = jc.submitJob(job); // replace it back if (pwd != null) { HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, pwd); } returnVal = jobExecHelper.progress(rj, jc, ctx.getHiveTxnManager()); success = (returnVal == 0); } catch (Exception e) { e.printStackTrace(); String mesg = " with exception '" + Utilities.getNameMessage(e) + "'"; if (rj != null) { mesg = "Ended Job = " + rj.getJobID() + mesg; } else { mesg = "Job Submission failed" + mesg; } // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); success = false; returnVal = 1; } finally { Utilities.clearWork(job); try { if (ctxCreated) { ctx.clear(); } if (rj != null) { if (returnVal != 0) { rj.killJob(); } jobID = rj.getID().toString(); } } catch (Exception e) { LOG.warn("Failed while cleaning up ", e); } finally { HadoopJobExecHelper.runningJobs.remove(rj); } } // get the list of Dynamic partition paths try { if (rj != null) { if (mWork.getAliasToWork() != null) { for (Operator<? extends OperatorDesc> op : mWork.getAliasToWork().values()) { op.jobClose(job, success); } } if (rWork != null) { rWork.getReducer().jobClose(job, success); } } } catch (Exception e) { // jobClose needs to execute successfully otherwise fail task if (success) { success = false; returnVal = 3; String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'"; console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); } } return (returnVal); }
public void restoreOriginalTracker() { if (originalTracker != null) { ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, originalTracker); originalTracker = null; } }