public static void close() { MapredContext context = contexts.get(); if (context != null) { context.closeAll(); } contexts.remove(); }
@SuppressWarnings("unchecked") @Override protected final void completeInitializationOp(Object[] os) throws HiveException { if (os.length != 0) { Pair<MapJoinTableContainer[], MapJoinTableContainerSerDe[]> pair = (Pair<MapJoinTableContainer[], MapJoinTableContainerSerDe[]>) os[0]; boolean spilled = false; for (MapJoinTableContainer container : pair.getLeft()) { if (container != null) { spilled = spilled || container.hasSpill(); } } if (!loadCalled && spilled) { // we can't use the cached table because it has spilled. loadHashTable(getExecContext(), MapredContext.get()); } else { // let's use the table from the cache. mapJoinTables = pair.getLeft(); mapJoinTableSerdes = pair.getRight(); } hashTblInitedOnce = true; } if (this.getExecContext() != null) { // reset exec context so that initialization of the map operator happens // properly this.getExecContext().setLastInputPath(null); this.getExecContext().setCurrentInputPath(null); } }
@Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { String database; if (context != null) { database = context.getJobConf().get("hive.current.database"); } else { database = SessionState.get().getCurrentDatabase(); } return PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( TypeInfoFactory.stringTypeInfo, new Text(database)); }
@Override public void close() { // No row was processed if (oc == null) { LOG.trace("Close called without any rows processed"); } try { if (groupKey != null) { // If a operator wants to do some work at the end of a group LOG.trace("End Group"); reducer.endGroup(); } if (isLogInfoEnabled) { logCloseInfo(); } reducer.close(abort); if (localWork != null) { for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) { dummyOp.close(abort); } } ReportStats rps = new ReportStats(rp, jc); reducer.preorderMap(rps); } catch (Exception e) { if (!abort) { // signal new failure to map-reduce LOG.error("Hit error while closing operators - failing tree"); throw new RuntimeException( "Hive Runtime Error while closing operators: " + e.getMessage(), e); } } finally { MapredContext.close(); Utilities.clearWorkMap(); } }
// Load the hash table @Override public void cleanUpInputFileChangedOp() throws HiveException { loadHashTable(getExecContext(), MapredContext.get()); }
@Override protected Collection<Future<?>> initializeOp(Configuration hconf) throws HiveException { this.hconf = hconf; unwrapContainer = new UnwrapRowContainer[conf.getTagLength()]; Collection<Future<?>> result = super.initializeOp(hconf); if (result == null) { result = new HashSet<Future<?>>(); } int tagLen = conf.getTagLength(); // On Tez only: The hash map might already be cached in the container we run // the task in. On MR: The cache is a no-op. cacheKey = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVEQUERYID) + "__HASH_MAP_" + this.getOperatorId() + "_container"; cache = ObjectCacheFactory.getCache(hconf); loader = getHashTableLoader(hconf); hashMapRowGetters = null; mapJoinTables = new MapJoinTableContainer[tagLen]; mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen]; hashTblInitedOnce = false; generateMapMetaData(); final ExecMapperContext mapContext = getExecContext(); final MapredContext mrContext = MapredContext.get(); if (!conf.isBucketMapJoin() && !conf.isDynamicPartitionHashJoin()) { /* * The issue with caching in case of bucket map join is that different tasks * process different buckets and if the container is reused to join a different bucket, * join results can be incorrect. The cache is keyed on operator id and for bucket map join * the operator does not change but data needed is different. For a proper fix, this * requires changes in the Tez API with regard to finding bucket id and * also ability to schedule tasks to re-use containers that have cached the specific bucket. */ if (isLogInfoEnabled) { LOG.info("This is not bucket map join, so cache"); } Future<Pair<MapJoinTableContainer[], MapJoinTableContainerSerDe[]>> future = cache.retrieveAsync( cacheKey, new Callable<Pair<MapJoinTableContainer[], MapJoinTableContainerSerDe[]>>() { @Override public Pair<MapJoinTableContainer[], MapJoinTableContainerSerDe[]> call() throws HiveException { return loadHashTable(mapContext, mrContext); } }); result.add(future); } else if (!isInputFileChangeSensitive(mapContext)) { loadHashTable(mapContext, mrContext); hashTblInitedOnce = true; } return result; }
@Nonnull public static MapredContext create(boolean isMap, @Nullable JobConf jobConf) { return MapredContext.init(isMap, jobConf); }
@Nullable public static MapredContext get() { return MapredContext.get(); }