public FosterStorageHandler(String ifName, String ofName, String serdeName) throws ClassNotFoundException { this( (Class<? extends InputFormat>) JavaUtils.loadClass(ifName), (Class<? extends OutputFormat>) JavaUtils.loadClass(ofName), (Class<? extends SerDe>) JavaUtils.loadClass(serdeName)); }
/** * Create an instance of a storage handler. If storageHandler == null, then surrrogate * StorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. This * StorageHandler assumes the other supplied storage artifacts are for a file-based storage * system. * * @param conf job's configuration will be used to configure the Configurable StorageHandler * @param storageHandler fully qualified class name of the desired StorageHandle instance * @param serDe fully qualified class name of the desired SerDe instance * @param inputFormat fully qualified class name of the desired InputFormat instance * @param outputFormat fully qualified class name of the desired outputFormat instance * @return storageHandler instance * @throws IOException */ public static HCatStorageHandler getStorageHandler( Configuration conf, String storageHandler, String serDe, String inputFormat, String outputFormat) throws IOException { if ((storageHandler == null) || (storageHandler.equals(FosterStorageHandler.class.getName()))) { try { FosterStorageHandler fosterStorageHandler = new FosterStorageHandler(inputFormat, outputFormat, serDe); fosterStorageHandler.setConf(conf); return fosterStorageHandler; } catch (ClassNotFoundException e) { throw new IOException("Failed to load " + "foster storage handler", e); } } try { Class<? extends HCatStorageHandler> handlerClass = (Class<? extends HCatStorageHandler>) Class.forName(storageHandler, true, JavaUtils.getClassLoader()); return (HCatStorageHandler) ReflectionUtils.newInstance(handlerClass, conf); } catch (ClassNotFoundException e) { throw new IOException("Error in loading storage handler." + e.getMessage(), e); } }
public SessionState(HiveConf conf, String userName) { this.conf = conf; this.userName = userName; isSilent = conf.getBoolVar(HiveConf.ConfVars.HIVESESSIONSILENT); ls = new LineageState(); // Must be deterministic order map for consistent q-test output across Java versions overriddenConfigurations = new LinkedHashMap<String, String>(); overriddenConfigurations.putAll(HiveConf.getConfSystemProperties()); // if there isn't already a session name, go ahead and create it. if (StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HIVESESSIONID))) { conf.setVar(HiveConf.ConfVars.HIVESESSIONID, makeSessionId()); } parentLoader = JavaUtils.getClassLoader(); }
public void closeCUDFLoaders() { lock.lock(); try { try { for (ClassLoader loader : mSessionUDFLoaders) { JavaUtils.closeClassLoader(loader); } } catch (IOException ie) { LOG.error("Error in close loader: " + ie); } mSessionUDFLoaders.clear(); } finally { lock.unlock(); } }
@SuppressWarnings("deprecation") private static Class<? extends Deserializer> getDeserializerClass(String name) { // CDH uses different names for Parquet if ("parquet.hive.serde.ParquetHiveSerDe".equals(name)) { return ParquetHiveSerDe.class; } try { return Class.forName(name, true, JavaUtils.getClassLoader()).asSubclass(Deserializer.class); } catch (ClassNotFoundException e) { throw new PrestoException(HIVE_SERDE_NOT_FOUND, "deserializer does not exist: " + name); } catch (ClassCastException e) { throw new RuntimeException("invalid deserializer class: " + name); } }
/** * Returns the hooks specified in a configuration variable. The hooks are returned in a list in * the order they were specified in the configuration variable. * * @param hookConfVar The configuration variable specifying a comma separated list of the hook * class names. * @return A list of the hooks, in the order they are listed in the value of hookConfVar * @throws Exception */ private static <T extends Hook> List<T> getHooks(String csHooks) throws Exception { List<T> hooks = new ArrayList<T>(); if (csHooks.isEmpty()) { return hooks; } for (String hookClass : Splitter.on(",").omitEmptyStrings().trimResults().split(csHooks)) { try { @SuppressWarnings("unchecked") T hook = (T) Class.forName(hookClass, true, JavaUtils.getClassLoader()).newInstance(); hooks.add(hook); } catch (ClassNotFoundException e) { LOG.error(hookClass + " Class not found:" + e.getMessage()); throw e; } } return hooks; }
public void close() throws IOException { if (txnMgr != null) txnMgr.closeTxnManager(); JavaUtils.closeClassLoadersTo(conf.getClassLoader(), parentLoader); File resourceDir = new File(getConf().getVar(HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR)); LOG.debug("Removing resource dir " + resourceDir); try { if (resourceDir.exists()) { FileUtils.deleteDirectory(resourceDir); } } catch (IOException e) { LOG.info("Error removing session resource dir " + resourceDir, e); } finally { detachSession(); } try { if (tezSessionState != null) { TezSessionPoolManager.getInstance().close(tezSessionState, false); } } catch (Exception e) { LOG.info("Error closing tez session", e); } finally { tezSessionState = null; } if (sparkSession != null) { try { SparkSessionManagerImpl.getInstance().closeSession(sparkSession); } catch (Exception ex) { LOG.error("Error closing spark session.", ex); } finally { sparkSession = null; } } dropSessionPaths(conf); }
private static <T> T instantiate(Class<T> classType, String classname) throws IOException { T t = null; try { Class c = JavaUtils.loadClass(classname); Object o = c.newInstance(); if (classType.isAssignableFrom(o.getClass())) { t = (T) o; } else { String s = classname + " is not an instance of " + classType.getName(); LOG.error(s); throw new IOException(s); } } catch (ClassNotFoundException e) { LOG.error("Unable to instantiate class, " + StringUtils.stringifyException(e)); throw new IOException(e); } catch (InstantiationException e) { LOG.error("Unable to instantiate class, " + StringUtils.stringifyException(e)); throw new IOException(e); } catch (IllegalAccessException e) { LOG.error("Unable to instantiate class, " + StringUtils.stringifyException(e)); throw new IOException(e); } return t; }
/** Execute a query plan using Hadoop. */ @SuppressWarnings({"deprecation", "unchecked"}) @Override public int execute(DriverContext driverContext) { IOPrepareCache ioPrepareCache = IOPrepareCache.get(); ioPrepareCache.clear(); boolean success = true; Context ctx = driverContext.getCtx(); boolean ctxCreated = false; Path emptyScratchDir; MapWork mWork = work.getMapWork(); ReduceWork rWork = work.getReduceWork(); try { if (ctx == null) { ctx = new Context(job); ctxCreated = true; } emptyScratchDir = ctx.getMRTmpPath(); FileSystem fs = emptyScratchDir.getFileSystem(job); fs.mkdirs(emptyScratchDir); } catch (IOException e) { e.printStackTrace(); console.printError( "Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return 5; } HiveFileFormatUtils.prepareJobOutput(job); // See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput() job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(ExecMapper.class); job.setMapOutputKeyClass(HiveKey.class); job.setMapOutputValueClass(BytesWritable.class); try { String partitioner = HiveConf.getVar(job, ConfVars.HIVEPARTITIONER); job.setPartitionerClass(JavaUtils.loadClass(partitioner)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e); } if (mWork.getNumMapTasks() != null) { job.setNumMapTasks(mWork.getNumMapTasks().intValue()); } if (mWork.getMaxSplitSize() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mWork.getMaxSplitSize().longValue()); } if (mWork.getMinSplitSize() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mWork.getMinSplitSize().longValue()); } if (mWork.getMinSplitSizePerNode() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, mWork.getMinSplitSizePerNode().longValue()); } if (mWork.getMinSplitSizePerRack() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, mWork.getMinSplitSizePerRack().longValue()); } job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0); job.setReducerClass(ExecReducer.class); // set input format information if necessary setInputAttributes(job); // Turn on speculative execution for reducers boolean useSpeculativeExecReducers = HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS); HiveConf.setBoolVar( job, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, useSpeculativeExecReducers); String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); if (mWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); } LOG.info("Using " + inpFormat); try { job.setInputFormat(JavaUtils.loadClass(inpFormat)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e); } // No-Op - we don't really write anything here .. job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Transfer HIVEAUXJARS and HIVEADDEDJARS to "tmpjars" so hadoop understands // it String auxJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEAUXJARS); String addedJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDJARS); if (StringUtils.isNotBlank(auxJars) || StringUtils.isNotBlank(addedJars)) { String allJars = StringUtils.isNotBlank(auxJars) ? (StringUtils.isNotBlank(addedJars) ? addedJars + "," + auxJars : auxJars) : addedJars; LOG.info("adding libjars: " + allJars); initializeFiles("tmpjars", allJars); } // Transfer HIVEADDEDFILES to "tmpfiles" so hadoop understands it String addedFiles = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDFILES); if (StringUtils.isNotBlank(addedFiles)) { initializeFiles("tmpfiles", addedFiles); } int returnVal = 0; boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJOBNAME)); if (noName) { // This is for a special case to ensure unit tests pass HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, "JOB" + Utilities.randGen.nextInt()); } String addedArchives = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDARCHIVES); // Transfer HIVEADDEDARCHIVES to "tmparchives" so hadoop understands it if (StringUtils.isNotBlank(addedArchives)) { initializeFiles("tmparchives", addedArchives); } try { MapredLocalWork localwork = mWork.getMapRedLocalWork(); if (localwork != null && localwork.hasStagedAlias()) { if (!ShimLoader.getHadoopShims().isLocalMode(job)) { Path localPath = localwork.getTmpPath(); Path hdfsPath = mWork.getTmpHDFSPath(); FileSystem hdfs = hdfsPath.getFileSystem(job); FileSystem localFS = localPath.getFileSystem(job); FileStatus[] hashtableFiles = localFS.listStatus(localPath); int fileNumber = hashtableFiles.length; String[] fileNames = new String[fileNumber]; for (int i = 0; i < fileNumber; i++) { fileNames[i] = hashtableFiles[i].getPath().getName(); } // package and compress all the hashtable files to an archive file String stageId = this.getId(); String archiveFileName = Utilities.generateTarFileName(stageId); localwork.setStageID(stageId); CompressionUtils.tar(localPath.toUri().getPath(), fileNames, archiveFileName); Path archivePath = Utilities.generateTarPath(localPath, stageId); LOG.info("Archive " + hashtableFiles.length + " hash table files to " + archivePath); // upload archive file to hdfs Path hdfsFilePath = Utilities.generateTarPath(hdfsPath, stageId); short replication = (short) job.getInt("mapred.submit.replication", 10); hdfs.copyFromLocalFile(archivePath, hdfsFilePath); hdfs.setReplication(hdfsFilePath, replication); LOG.info("Upload 1 archive file from" + archivePath + " to: " + hdfsFilePath); // add the archive file to distributed cache DistributedCache.createSymlink(job); DistributedCache.addCacheArchive(hdfsFilePath.toUri(), job); LOG.info( "Add 1 archive file to distributed cache. Archive file: " + hdfsFilePath.toUri()); } } work.configureJobConf(job); List<Path> inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false); Utilities.setInputPaths(job, inputPaths); Utilities.setMapRedWork(job, work, ctx.getMRTmpPath()); if (mWork.getSamplingType() > 0 && rWork != null && job.getNumReduceTasks() > 1) { try { handleSampling(ctx, mWork, job); job.setPartitionerClass(HiveTotalOrderPartitioner.class); } catch (IllegalStateException e) { console.printInfo("Not enough sampling data.. Rolling back to single reducer task"); rWork.setNumReduceTasks(1); job.setNumReduceTasks(1); } catch (Exception e) { LOG.error("Sampling error", e); console.printError( e.toString(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); rWork.setNumReduceTasks(1); job.setNumReduceTasks(1); } } // remove the pwd from conf file so that job tracker doesn't show this // logs String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD); if (pwd != null) { HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE"); } JobClient jc = new JobClient(job); // make this client wait if job tracker is not behaving well. Throttle.checkJobTracker(job, LOG); if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) { // initialize stats publishing table StatsPublisher statsPublisher; StatsFactory factory = StatsFactory.newFactory(job); if (factory != null) { statsPublisher = factory.getStatsPublisher(); List<String> statsTmpDir = Utilities.getStatsTmpDirs(mWork, job); if (rWork != null) { statsTmpDir.addAll(Utilities.getStatsTmpDirs(rWork, job)); } StatsCollectionContext sc = new StatsCollectionContext(job); sc.setStatsTmpDirs(statsTmpDir); if (!statsPublisher.init(sc)) { // creating stats table if not exists if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) { throw new HiveException( ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg()); } } } } Utilities.createTmpDirs(job, mWork); Utilities.createTmpDirs(job, rWork); SessionState ss = SessionState.get(); if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") && ss != null) { TezSessionState session = ss.getTezSession(); TezSessionPoolManager.getInstance().close(session, true); } // Finally SUBMIT the JOB! rj = jc.submitJob(job); // replace it back if (pwd != null) { HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, pwd); } returnVal = jobExecHelper.progress(rj, jc, ctx.getHiveTxnManager()); success = (returnVal == 0); } catch (Exception e) { e.printStackTrace(); String mesg = " with exception '" + Utilities.getNameMessage(e) + "'"; if (rj != null) { mesg = "Ended Job = " + rj.getJobID() + mesg; } else { mesg = "Job Submission failed" + mesg; } // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); success = false; returnVal = 1; } finally { Utilities.clearWork(job); try { if (ctxCreated) { ctx.clear(); } if (rj != null) { if (returnVal != 0) { rj.killJob(); } jobID = rj.getID().toString(); } } catch (Exception e) { LOG.warn("Failed while cleaning up ", e); } finally { HadoopJobExecHelper.runningJobs.remove(rj); } } // get the list of Dynamic partition paths try { if (rj != null) { if (mWork.getAliasToWork() != null) { for (Operator<? extends OperatorDesc> op : mWork.getAliasToWork().values()) { op.jobClose(job, success); } } if (rWork != null) { rWork.getReducer().jobClose(job, success); } } } catch (Exception e) { // jobClose needs to execute successfully otherwise fail task if (success) { success = false; returnVal = 3; String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'"; console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); } } return (returnVal); }
public void validate() throws SemanticException { if ((this.getCols() == null) || (this.getCols().size() == 0)) { // for now make sure that serde exists if (StringUtils.isEmpty(this.getSerName()) || !SerDeUtils.shouldGetColsFromSerDe(this.getSerName())) { throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg()); } return; } if (this.getStorageHandler() == null) { try { Class<?> origin = Class.forName(this.getOutputFormat(), true, JavaUtils.getClassLoader()); Class<? extends HiveOutputFormat> replaced = HiveFileFormatUtils.getOutputFormatSubstitute(origin); if (replaced == null) { throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); } } catch (ClassNotFoundException e) { throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); } } List<String> colNames = ParseUtils.validateColumnNameUniqueness(this.getCols()); if (this.getBucketCols() != null) { // all columns in cluster and sort are valid columns Iterator<String> bucketCols = this.getBucketCols().iterator(); while (bucketCols.hasNext()) { String bucketCol = bucketCols.next(); boolean found = false; Iterator<String> colNamesIter = colNames.iterator(); while (colNamesIter.hasNext()) { String colName = colNamesIter.next(); if (bucketCol.equalsIgnoreCase(colName)) { found = true; break; } } if (!found) { throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg()); } } } if (this.getSortCols() != null) { // all columns in cluster and sort are valid columns Iterator<Order> sortCols = this.getSortCols().iterator(); while (sortCols.hasNext()) { String sortCol = sortCols.next().getCol(); boolean found = false; Iterator<String> colNamesIter = colNames.iterator(); while (colNamesIter.hasNext()) { String colName = colNamesIter.next(); if (sortCol.equalsIgnoreCase(colName)) { found = true; break; } } if (!found) { throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg()); } } } if (this.getPartCols() != null) { // there is no overlap between columns and partitioning columns Iterator<FieldSchema> partColsIter = this.getPartCols().iterator(); while (partColsIter.hasNext()) { FieldSchema fs = partColsIter.next(); String partCol = fs.getName(); PrimitiveObjectInspectorUtils.PrimitiveTypeEntry pte = PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(fs.getType()); if (null == pte) { throw new SemanticException( ErrorMsg.PARTITION_COLUMN_NON_PRIMITIVE.getMsg() + " Found " + partCol + " of type: " + fs.getType()); } Iterator<String> colNamesIter = colNames.iterator(); while (colNamesIter.hasNext()) { String colName = BaseSemanticAnalyzer.unescapeIdentifier(colNamesIter.next()); if (partCol.equalsIgnoreCase(colName)) { throw new SemanticException(ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg()); } } } } /* Validate skewed information. */ ValidationUtility.validateSkewedInformation( colNames, this.getSkewedColNames(), this.getSkewedColValues()); }