/** Initialization when invoked from QL. */ @Override public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) { super.initialize(conf, queryPlan, driverContext); job = new JobConf(conf, ExecDriver.class); // NOTE: initialize is only called if it is in non-local mode. // In case it's in non-local mode, we need to move the SessionState files // and jars to jobConf. // In case it's in local mode, MapRedTask will set the jobConf. // // "tmpfiles" and "tmpjars" are set by the method ExecDriver.execute(), // which will be called by both local and NON-local mode. String addedFiles = Utilities.getResourceFiles(job, SessionState.ResourceType.FILE); if (StringUtils.isNotBlank(addedFiles)) { HiveConf.setVar(job, ConfVars.HIVEADDEDFILES, addedFiles); } String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR); if (StringUtils.isNotBlank(addedJars)) { HiveConf.setVar(job, ConfVars.HIVEADDEDJARS, addedJars); } String addedArchives = Utilities.getResourceFiles(job, SessionState.ResourceType.ARCHIVE); if (StringUtils.isNotBlank(addedArchives)) { HiveConf.setVar(job, ConfVars.HIVEADDEDARCHIVES, addedArchives); } conf.stripHiddenConfigurations(job); this.jobExecHelper = new HadoopJobExecHelper(job, console, this, this); }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { GroupByOperator op = (GroupByOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; List<String> colLists = new ArrayList<String>(); GroupByDesc conf = op.getConf(); ArrayList<ExprNodeDesc> keys = conf.getKeys(); for (ExprNodeDesc key : keys) { colLists = Utilities.mergeUniqElems(colLists, key.getCols()); } ArrayList<AggregationDesc> aggrs = conf.getAggregators(); for (AggregationDesc aggr : aggrs) { ArrayList<ExprNodeDesc> params = aggr.getParameters(); for (ExprNodeDesc param : params) { colLists = Utilities.mergeUniqElems(colLists, param.getCols()); } } int groupingSetPosition = conf.getGroupingSetPosition(); if (groupingSetPosition >= 0) { List<String> cols = cppCtx.genColLists(op); String groupingColumn = conf.getOutputColumnNames().get(groupingSetPosition); if (!cols.contains(groupingColumn)) { conf.getOutputColumnNames().remove(groupingSetPosition); if (op.getSchema() != null) { op.getSchema().getSignature().remove(groupingSetPosition); } } } cppCtx.getPrunedColLists().put(op, colLists); return null; }
/* * add any input columns referenced in WindowFn args or expressions. */ private ArrayList<String> prunedColumnsList( List<String> prunedCols, WindowTableFunctionDef tDef) { // we create a copy of prunedCols to create a list of pruned columns for PTFOperator ArrayList<String> mergedColList = new ArrayList<String>(prunedCols); if (tDef.getWindowFunctions() != null) { for (WindowFunctionDef wDef : tDef.getWindowFunctions()) { if (wDef.getArgs() == null) { continue; } for (PTFExpressionDef arg : wDef.getArgs()) { ExprNodeDesc exprNode = arg.getExprNode(); Utilities.mergeUniqElems(mergedColList, exprNode.getCols()); } } } if (tDef.getPartition() != null) { for (PTFExpressionDef col : tDef.getPartition().getExpressions()) { ExprNodeDesc exprNode = col.getExprNode(); Utilities.mergeUniqElems(mergedColList, exprNode.getCols()); } } if (tDef.getOrder() != null) { for (PTFExpressionDef col : tDef.getOrder().getExpressions()) { ExprNodeDesc exprNode = col.getExprNode(); Utilities.mergeUniqElems(mergedColList, exprNode.getCols()); } } return mergedColList; }
/** * Method to fetch table data * * @param table table name * @param database database * @return list of columns in comma seperated way * @throws Exception if any error occurs */ private List<String> getTableData(String table, String database) throws Exception { HiveConf conf = new HiveConf(); conf.addResource("hive-site.xml"); ArrayList<String> results = new ArrayList<String>(); ArrayList<String> temp = new ArrayList<String>(); Hive hive = Hive.get(conf); org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table); FetchWork work; if (!tbl.getPartCols().isEmpty()) { List<Partition> partitions = hive.getPartitions(tbl); List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>(); List<String> partLocs = new ArrayList<String>(); for (Partition part : partitions) { partLocs.add(part.getLocation()); partDesc.add(Utilities.getPartitionDesc(part)); } work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl)); work.setLimit(100); } else { work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl)); } FetchTask task = new FetchTask(); task.setWork(work); task.initialize(conf, null, null); task.fetch(temp); for (String str : temp) { results.add(str.replace("\t", ",")); } return results; }
/** * Localizes files, archives and jars the user has instructed us to provide on the cluster as * resources for execution. * * @param conf * @return List<LocalResource> local resources to add to execution * @throws IOException when hdfs operation fails * @throws LoginException when getDefaultDestDir fails with the same exception */ public List<LocalResource> localizeTempFilesFromConf(String hdfsDirPathStr, Configuration conf) throws IOException, LoginException { List<LocalResource> tmpResources = new ArrayList<LocalResource>(); String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE); if (StringUtils.isNotBlank(addedFiles)) { HiveConf.setVar(conf, ConfVars.HIVEADDEDFILES, addedFiles); } String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR); if (StringUtils.isNotBlank(addedJars)) { HiveConf.setVar(conf, ConfVars.HIVEADDEDJARS, addedJars); } String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE); if (StringUtils.isNotBlank(addedArchives)) { HiveConf.setVar(conf, ConfVars.HIVEADDEDARCHIVES, addedArchives); } String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS); // need to localize the additional jars and files // we need the directory on hdfs to which we shall put all these files String allFiles = auxJars + "," + addedJars + "," + addedFiles + "," + addedArchives; addTempFiles(conf, tmpResources, hdfsDirPathStr, allFiles.split(",")); return tmpResources; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { ReduceSinkOperator op = (ReduceSinkOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; RowResolver resolver = cppCtx.getOpToParseCtxMap().get(op).getRowResolver(); ReduceSinkDesc conf = op.getConf(); List<String> colLists = new ArrayList<String>(); ArrayList<ExprNodeDesc> keys = conf.getKeyCols(); LOG.debug("Reduce Sink Operator " + op.getIdentifier() + " key:" + keys); for (ExprNodeDesc key : keys) { colLists = Utilities.mergeUniqElems(colLists, key.getCols()); } assert op.getNumChild() == 1; Operator<? extends OperatorDesc> child = op.getChildOperators().get(0); List<String> childCols; if (child instanceof CommonJoinOperator) { childCols = cppCtx.getJoinPrunedColLists().get(child).get((byte) conf.getTag()); } else { childCols = cppCtx.getPrunedColList(child); } List<ExprNodeDesc> valCols = conf.getValueCols(); List<String> valColNames = conf.getOutputValueColumnNames(); if (childCols != null) { boolean[] flags = new boolean[valCols.size()]; for (String childCol : childCols) { int index = valColNames.indexOf(Utilities.removeValueTag(childCol)); if (index < 0) { continue; } flags[index] = true; colLists = Utilities.mergeUniqElems(colLists, valCols.get(index).getCols()); } Collections.sort(colLists); pruneReduceSinkOperator(flags, op, cppCtx); cppCtx.getPrunedColLists().put(op, colLists); return null; } // Reduce Sink contains the columns needed - no need to aggregate from // children for (ExprNodeDesc val : valCols) { colLists = Utilities.mergeUniqElems(colLists, val.getCols()); } cppCtx.getPrunedColLists().put(op, colLists); return null; }
public List<String> getReferencedColumns() throws SemanticException { MatchPath matchPath = (MatchPath) evaluator; List<String> columns = new ArrayList<String>(); for (ExprNodeDesc exprNode : matchPath.resultExprInfo.resultExprNodes) { Utilities.mergeUniqElems(columns, exprNode.getCols()); } for (ExprNodeDesc exprNode : matchPath.symInfo.symbolExprsDecs) { Utilities.mergeUniqElems(columns, exprNode.getCols()); } return columns; }
public void preTest(HiveConf conf) throws Exception { if (zooKeeperCluster == null) { // create temp dir String tmpBaseDir = System.getProperty("test.tmp.dir"); File tmpDir = Utilities.createTempDir(tmpBaseDir); zooKeeperCluster = new MiniZooKeeperCluster(); zkPort = zooKeeperCluster.startup(tmpDir); } if (zooKeeper != null) { zooKeeper.close(); } int sessionTimeout = (int) conf.getTimeVar( HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT, TimeUnit.MILLISECONDS); zooKeeper = new ZooKeeper( "localhost:" + zkPort, sessionTimeout, new Watcher() { @Override public void process(WatchedEvent arg0) {} }); String zkServer = "localhost"; conf.set("hive.zookeeper.quorum", zkServer); conf.set("hive.zookeeper.client.port", "" + zkPort); }
// TODO#: assumes throw private void localizeJarForClass(FileSystem lfs, Path libDir, String className, boolean doThrow) throws IOException { String jarPath = null; boolean hasException = false; try { Class<?> auxClass = Class.forName(className); jarPath = Utilities.jarFinderGetJar(auxClass); } catch (Throwable t) { if (doThrow) { throw (t instanceof IOException) ? (IOException) t : new IOException(t); } hasException = true; String err = "Cannot find a jar for [" + className + "] due to an exception (" + t.getMessage() + "); not packaging the jar"; LOG.error(err, t); System.err.println(err); } if (jarPath != null) { lfs.copyFromLocalFile(new Path(jarPath), libDir); } else if (!hasException) { String err = "Cannot find a jar for [" + className + "]; not packaging the jar"; if (doThrow) { throw new IOException(err); } LOG.error(err); System.err.println(err); } }
// reloading the jars under the path specified in hive.reloadable.aux.jars.path property public void reloadAuxJars() throws IOException { final Set<String> reloadedAuxJars = new HashSet<String>(); final String renewableJarPath = conf.getVar(ConfVars.HIVERELOADABLEJARS); // do nothing if this property is not specified or empty if (renewableJarPath == null || renewableJarPath.isEmpty()) { return; } Set<String> jarPaths = Utilities.getJarFilesByPath(renewableJarPath); // load jars under the hive.reloadable.aux.jars.path if (!jarPaths.isEmpty()) { reloadedAuxJars.addAll(jarPaths); } // remove the previous renewable jars try { if (preReloadableAuxJars != null && !preReloadableAuxJars.isEmpty()) { Utilities.removeFromClassPath(preReloadableAuxJars.toArray(new String[0])); } } catch (Exception e) { String msg = "Fail to remove the reloaded jars loaded last time: " + e; throw new IOException(msg, e); } try { if (reloadedAuxJars != null && !reloadedAuxJars.isEmpty()) { URLClassLoader currentCLoader = (URLClassLoader) SessionState.get().getConf().getClassLoader(); currentCLoader = (URLClassLoader) Utilities.addToClassPath(currentCLoader, reloadedAuxJars.toArray(new String[0])); conf.setClassLoader(currentCLoader); Thread.currentThread().setContextClassLoader(currentCLoader); } preReloadableAuxJars.clear(); preReloadableAuxJars.addAll(reloadedAuxJars); } catch (Exception e) { String msg = "Fail to add jars from the path specified in hive.reloadable.aux.jars.path property: " + e; throw new IOException(msg, e); } }
/* * Helper function to create Vertex for given ReduceWork. */ private Vertex createVertex( JobConf conf, ReduceWork reduceWork, LocalResource appJarLr, List<LocalResource> additionalLr, FileSystem fs, Path mrScratchDir, Context ctx) throws Exception { // set up operator plan Utilities.setReduceWork(conf, reduceWork, mrScratchDir, false); // create the directories FileSinkOperators need Utilities.createTmpDirs(conf, reduceWork); // Call once here, will be updated when we find edges MultiStageMRConfToTezTranslator.translateVertexConfToTez(conf, null); // create the vertex Vertex reducer = new Vertex( reduceWork.getName(), new ProcessorDescriptor(ReduceTezProcessor.class.getName()) .setUserPayload(MRHelpers.createUserPayloadFromConf(conf)), reduceWork.getNumReduceTasks(), getContainerResource(conf)); Map<String, String> environment = new HashMap<String, String>(); MRHelpers.updateEnvironmentForMRTasks(conf, environment, false); reducer.setTaskEnvironment(environment); reducer.setJavaOpts(getContainerJavaOpts(conf)); Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); localResources.put(getBaseName(appJarLr), appJarLr); for (LocalResource lr : additionalLr) { localResources.put(getBaseName(lr), lr); } reducer.setTaskLocalResources(localResources); return reducer; }
public String cliInit(String tname, boolean recreate) throws Exception { if (recreate) { cleanUp(); createSources(); } HiveConf.setVar( conf, HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER, "org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator"); Utilities.clearWorkMap(); CliSessionState ss = new CliSessionState(conf); assert ss != null; ss.in = System.in; String outFileExtension = getOutFileExtension(tname); String stdoutName = null; if (outDir != null) { File qf = new File(outDir, tname); stdoutName = qf.getName().concat(outFileExtension); } else { stdoutName = tname + outFileExtension; } File outf = new File(logDir, stdoutName); OutputStream fo = new BufferedOutputStream(new FileOutputStream(outf)); if (qSortQuerySet.contains(tname)) { ss.out = new SortPrintStream(fo, "UTF-8"); } else if (qHashQuerySet.contains(tname)) { ss.out = new DigestPrintStream(fo, "UTF-8"); } else if (qSortNHashQuerySet.contains(tname)) { ss.out = new SortAndDigestPrintStream(fo, "UTF-8"); } else { ss.out = new PrintStream(fo, true, "UTF-8"); } ss.err = new CachingPrintStream(fo, true, "UTF-8"); ss.setIsSilent(true); SessionState oldSs = SessionState.get(); if (oldSs != null && clusterType == MiniClusterType.tez) { oldSs.close(); } if (oldSs != null && oldSs.out != null && oldSs.out != System.out) { oldSs.out.close(); } SessionState.start(ss); cliDriver = new CliDriver(); cliDriver.processInitFiles(ss); return outf.getAbsolutePath(); }
public int executeInProcess(DriverContext driverContext) { // check the local work if (work == null) { return -1; } if (execContext == null) { execContext = new ExecMapperContext(job); } memoryMXBean = ManagementFactory.getMemoryMXBean(); long startTime = System.currentTimeMillis(); console.printInfo( Utilities.now() + "\tStarting to launch local task to process map join;\tmaximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax()); execContext.setJc(job); // set the local work, so all the operator can get this context execContext.setLocalWork(work); try { startForward(null); long currentTime = System.currentTimeMillis(); long elapsed = currentTime - startTime; console.printInfo( Utilities.now() + "\tEnd of local task; Time Taken: " + Utilities.showTime(elapsed) + " sec."); } catch (Throwable throwable) { if (throwable instanceof OutOfMemoryError || (throwable instanceof MapJoinMemoryExhaustionException)) { l4j.error("Hive Runtime Error: Map local work exhausted memory", throwable); return 3; } else { l4j.error("Hive Runtime Error: Map local work failed", throwable); return 2; } } return 0; }
/** * Separate from constructor, because initialize() may need to be called in a separate thread. */ synchronized void initialize() { assertState(QueryState.CREATED); this.hiveConf = new HiveConf(Driver.class); // Update configuration with user/group info. if (query.hadoop_user == null) { throw new RuntimeException("User must be specified."); } // Update scratch dir (to have one per user) File scratchDir = new File("/tmp/hive-beeswax-" + query.hadoop_user); hiveConf.set(HiveConf.ConfVars.SCRATCHDIR.varname, scratchDir.getPath()); // Create the temporary directory if necessary. // If mapred.job.tracker is set to local, this is used by MapRedTask. if (!scratchDir.isDirectory()) { if (scratchDir.exists() || !scratchDir.mkdirs()) { LOG.warn("Could not create tmp dir:" + scratchDir); } } driver = new Driver(hiveConf); ClassLoader loader = hiveConf.getClassLoader(); String auxJars = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVEAUXJARS); if (StringUtils.isNotBlank(auxJars)) { try { loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ",")); } catch (Exception e) { LOG.error("Failed to add jars to class loader: " + auxJars, e); } } hiveConf.setClassLoader(loader); Thread.currentThread().setContextClassLoader(loader); SessionState.start(hiveConf); // this is thread-local this.sessionState = SessionState.get(); // If this work has a LogContext, associate the children output to the logContext OutputStream lcOutStream = null; if (this.logContext != null) lcOutStream = this.logContext.getOutputStream(); // A copy of everything goes to the LogContext. // In addition, stderr goes to errStream for error reporting. // Note that child output is explicitly tee to System.{out,err}, // otherwise it'll be swallowed by outStream. this.sessionState.out = new PrintStream(new TeeOutputStream(lcOutStream, this.outStream)); this.sessionState.err = new PrintStream(new TeeOutputStream(lcOutStream, this.errStream)); this.sessionState.childOut = new PrintStream(new TeeOutputStream(System.out, sessionState.out)); this.sessionState.childErr = new PrintStream(new TeeOutputStream(System.err, sessionState.err)); this.state = QueryState.INITIALIZED; }
public static boolean unregisterJar(String jarsToUnregister) { LogHelper console = getConsole(); try { Utilities.removeFromClassPath(StringUtils.split(jarsToUnregister, ",")); console.printInfo("Deleted " + jarsToUnregister + " from class path"); return true; } catch (Exception e) { console.printError( "Unable to unregister " + jarsToUnregister + "\nException: " + e.getMessage(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return false; } }
/** * get the final output path of a given FileOutputFormat. * * @param parent parent dir of the expected final output path * @param jc job configuration */ public static Path getOutputFormatFinalPath( Path parent, String taskId, JobConf jc, HiveOutputFormat<?, ?> hiveOutputFormat, boolean isCompressed, Path defaultFinalPath) throws IOException { if (hiveOutputFormat instanceof HiveIgnoreKeyTextOutputFormat) { return new Path(parent, taskId + Utilities.getFileExtension(jc, isCompressed)); } return defaultFinalPath; }
static void registerJars(List<String> newJars) throws IllegalArgumentException { LogHelper console = getConsole(); try { ClassLoader loader = Thread.currentThread().getContextClassLoader(); ClassLoader newLoader = Utilities.addToClassPath(loader, newJars.toArray(new String[0])); Thread.currentThread().setContextClassLoader(newLoader); SessionState.get().getConf().setClassLoader(newLoader); console.printInfo("Added " + newJars + " to class path"); } catch (Exception e) { String message = "Unable to register " + newJars; throw new IllegalArgumentException(message, e); } }
protected ExecutionMode getExecutionMode(QueryPlan plan) { int numMRJobs = Utilities.getMRTasks(plan.getRootTasks()).size(); int numSparkJobs = Utilities.getSparkTasks(plan.getRootTasks()).size(); int numTezJobs = Utilities.getTezTasks(plan.getRootTasks()).size(); ExecutionMode mode = ExecutionMode.MR; if (0 == (numMRJobs + numSparkJobs + numTezJobs)) { mode = ExecutionMode.NONE; } else if (numSparkJobs > 0) { return ExecutionMode.SPARK; } else if (numTezJobs > 0) { mode = ExecutionMode.TEZ; // Need to go in and check if any of the tasks is running in LLAP mode. for (TezTask tezTask : Utilities.getTezTasks(plan.getRootTasks())) { if (tezTask.getWork().getLlapMode()) { mode = ExecutionMode.LLAP; break; } } } return mode; }
private Object deserializeValue(BytesWritable valueWritable, byte tag) throws HiveException { try { return inputValueDeserializer[tag].deserialize(valueWritable); } catch (SerDeException e) { throw new HiveException( "Error: Unable to deserialize reduce input value (tag=" + tag + ") from " + Utilities.formatBinaryString(valueWritable.getBytes(), 0, valueWritable.getLength()) + " with properties " + valueTableDesc[tag].getProperties(), e); } }
public static boolean registerJar(String newJar) { LogHelper console = getConsole(); try { ClassLoader loader = Thread.currentThread().getContextClassLoader(); Thread.currentThread() .setContextClassLoader(Utilities.addToClassPath(loader, StringUtils.split(newJar, ","))); console.printInfo("Added " + newJar + " to class path"); return true; } catch (Exception e) { console.printError( "Unable to register " + newJar + "\nException: " + e.getMessage(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return false; } }
static void validateFiles(List<String> newFiles) throws IllegalArgumentException { SessionState ss = SessionState.get(); Configuration conf = (ss == null) ? new Configuration() : ss.getConf(); for (String newFile : newFiles) { try { if (Utilities.realFile(newFile, conf) == null) { String message = newFile + " does not exist"; throw new IllegalArgumentException(message); } } catch (IOException e) { String message = "Unable to validate " + newFile; throw new IllegalArgumentException(message, e); } } }
/* * Creates the configuration object necessary to run a specific vertex from * map work. This includes input formats, input processor, etc. */ private JobConf initializeVertexConf(JobConf baseConf, MapWork mapWork) { JobConf conf = new JobConf(baseConf); if (mapWork.getNumMapTasks() != null) { conf.setInt(MRJobConfig.NUM_MAPS, mapWork.getNumMapTasks().intValue()); } if (mapWork.getMaxSplitSize() != null) { HiveConf.setLongVar( conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mapWork.getMaxSplitSize().longValue()); } if (mapWork.getMinSplitSize() != null) { HiveConf.setLongVar( conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mapWork.getMinSplitSize().longValue()); } if (mapWork.getMinSplitSizePerNode() != null) { HiveConf.setLongVar( conf, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, mapWork.getMinSplitSizePerNode().longValue()); } if (mapWork.getMinSplitSizePerRack() != null) { HiveConf.setLongVar( conf, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, mapWork.getMinSplitSizePerRack().longValue()); } Utilities.setInputAttributes(conf, mapWork); String inpFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT); if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); } if (mapWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); } conf.set("mapred.mapper.class", ExecMapper.class.getName()); conf.set("mapred.input.format.class", inpFormat); return conf; }
/** * Create a MapReduce job for a particular partition if Hadoop version is pre 0.20, otherwise * create a Map-only job using CombineHiveInputFormat for all partitions. * * @param fsOp The FileSink operator. * @param ctx The MR processing context. * @param finalName the final destination path the merge job should output. * @throws SemanticException */ private void createMergeJob(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName) throws SemanticException { // if the hadoop version support CombineFileInputFormat (version >= 0.20), // create a Map-only job for merge, otherwise create a MapReduce merge job. ParseContext parseCtx = ctx.getParseCtx(); HiveConf conf = parseCtx.getConf(); if (conf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPONLY) && Utilities.supportCombineFileInputFormat()) { // create Map-only merge job createMap4Merge(fsOp, ctx, finalName); LOG.info("use CombineHiveInputformat for the merge job"); } else { createMapReduce4Merge(fsOp, ctx, finalName); LOG.info("use HiveInputFormat for the merge job"); } }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { LateralViewJoinOperator op = (LateralViewJoinOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; List<String> cols = cppCtx.genColLists(op); if (cols == null) { return null; } Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap(); // As columns go down the DAG, the LVJ will transform internal column // names from something like 'key' to '_col0'. Because of this, we need // to undo this transformation using the column expression map as the // column names propagate up the DAG. // this is SEL(*) cols + UDTF cols List<String> outputCols = op.getConf().getOutputInternalColNames(); // cause we cannot prune columns from UDTF branch currently, extract // columns from SEL(*) branch only and append all columns from UDTF branch to it int numSelColumns = op.getConf().getNumSelColumns(); List<String> colsAfterReplacement = new ArrayList<String>(); ArrayList<String> newColNames = new ArrayList<String>(); for (String col : cols) { int index = outputCols.indexOf(col); // colExprMap.size() == size of cols from SEL(*) branch if (index >= 0 && index < numSelColumns) { ExprNodeDesc transformed = colExprMap.get(col); Utilities.mergeUniqElems(colsAfterReplacement, transformed.getCols()); newColNames.add(col); } } // update number of columns from sel(*) op.getConf().setNumSelColumns(newColNames.size()); // add all UDTF columns // following SEL will do CP for columns from UDTF, not adding SEL in here newColNames.addAll(outputCols.subList(numSelColumns, outputCols.size())); op.getConf().setOutputInternalColNames(newColNames); pruneOperator(ctx, op, newColNames); cppCtx.getPrunedColLists().put(op, colsAfterReplacement); return null; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { FilterOperator op = (FilterOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; ExprNodeDesc condn = op.getConf().getPredicate(); // get list of columns used in the filter List<String> cl = condn.getCols(); // merge it with the downstream col list List<String> filterOpPrunedColLists = Utilities.mergeUniqElems(cppCtx.genColLists(op), cl); List<String> filterOpPrunedColListsOrderPreserved = preserveColumnOrder(op, filterOpPrunedColLists); cppCtx.getPrunedColLists().put(op, filterOpPrunedColListsOrderPreserved); pruneOperator(cppCtx, op, cppCtx.getPrunedColLists().get(op)); return null; }
private Schema getSchema(InputSplit split, JobConf job) { // Inside of a MR job, we can pull out the actual properties if (AvroSerdeUtils.insideMRJob(job)) { MapWork mapWork = Utilities.getMapWork(job); // Iterate over the Path -> Partition descriptions to find the partition // that matches our input split. for (Map.Entry<String, PartitionDesc> pathsAndParts : mapWork.getPathToPartitionInfo().entrySet()) { String partitionPath = pathsAndParts.getKey(); if (pathIsInPartition(((FileSplit) split).getPath(), partitionPath)) { if (LOG.isInfoEnabled()) { LOG.info("Matching partition " + partitionPath + " with input split " + split); } Properties props = pathsAndParts.getValue().getProperties(); if (props.containsKey(AvroSerdeUtils.SCHEMA_LITERAL) || props.containsKey(AvroSerdeUtils.SCHEMA_URL)) { try { return AvroSerdeUtils.determineSchemaOrThrowException(props); } catch (Exception e) { throw new RuntimeException("Avro serde exception", e); } } else { return null; // If it's not in this property, it won't be in any others } } } if (LOG.isInfoEnabled()) { LOG.info("Unable to match filesplit " + split + " with a partition."); } } // In "select * from table" situations (non-MR), we can add things to the job // It's safe to add this to the job since it's not *actually* a mapred job. // Here the global state is confined to just this process. String s = job.get(AvroSerdeUtils.AVRO_SERDE_SCHEMA); if (s != null) { LOG.info("Found the avro schema in the job: " + s); return Schema.parse(s); } // No more places to get the schema from. Give up. May have to re-encode later. return null; }
@Override public RecordReader<LongWritable, MapWritable> getRecordReader( InputSplit split, JobConf conf, Reporter reporter) throws IOException { List<Integer> readColIDs = getReadColumnIDs(conf); boolean addAll = (readColIDs.size() == 0); String columnString = conf.get(ConfigurationUtil.COLUMN_MAPPING); if (StringUtils.isBlank(columnString)) { throw new IOException("no column mapping found!"); } String[] columns = ConfigurationUtil.getAllColumns(columnString); if (readColIDs.size() > columns.length) { throw new IOException("read column count larger than that in column mapping string!"); } String[] cols; if (addAll) { cols = columns; } else { cols = new String[readColIDs.size()]; for (int i = 0; i < cols.length; i++) { cols[i] = columns[readColIDs.get(i)]; } } String filterExprSerialized = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR); if (filterExprSerialized != null) { ExprNodeDesc filterExpr = Utilities.deserializeExpression(filterExprSerialized, conf); /*String columnNameProperty = conf.get( org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS); System.err.println("======list columns:" + columnNameProperty);*/ dumpFilterExpr(filterExpr); // TODO: } return new SolrReader( ConfigurationUtil.getUrl(conf), (SolrSplit) split, cols, ConfigurationUtil.getNumInputBufferRows(conf)); }
public static String validateFile(Set<String> curFiles, String newFile) { SessionState ss = SessionState.get(); LogHelper console = getConsole(); Configuration conf = (ss == null) ? new Configuration() : ss.getConf(); try { if (Utilities.realFile(newFile, conf) != null) { return newFile; } else { console.printError(newFile + " does not exist"); return null; } } catch (IOException e) { console.printError( "Unable to validate " + newFile + "\nException: " + e.getMessage(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return null; } }
public GenericUDAFEvaluator getGenericUDAFEvaluator() { if (genericUDAFEvaluator != null) { return genericUDAFEvaluator; } if (genericUDAFWritableEvaluator != null) { return genericUDAFEvaluator = genericUDAFWritableEvaluator; } try { return genericUDAFEvaluator = ReflectionUtils.newInstance( Class.forName( genericUDAFEvaluatorClassName, true, Utilities.getSessionSpecifiedClassLoader()) .asSubclass(GenericUDAFEvaluator.class), null); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } }
@Override public void close() { // No row was processed if (oc == null) { LOG.trace("Close called without any rows processed"); } try { if (groupKey != null) { // If a operator wants to do some work at the end of a group LOG.trace("End Group"); reducer.endGroup(); } if (isLogInfoEnabled) { logCloseInfo(); } reducer.close(abort); if (localWork != null) { for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) { dummyOp.close(abort); } } ReportStats rps = new ReportStats(rp, jc); reducer.preorderMap(rps); } catch (Exception e) { if (!abort) { // signal new failure to map-reduce LOG.error("Hit error while closing operators - failing tree"); throw new RuntimeException( "Hive Runtime Error while closing operators: " + e.getMessage(), e); } } finally { MapredContext.close(); Utilities.clearWorkMap(); } }