public static void close() {
   MapredContext context = contexts.get();
   if (context != null) {
     context.closeAll();
   }
   contexts.remove();
 }
  @SuppressWarnings("unchecked")
  @Override
  protected final void completeInitializationOp(Object[] os) throws HiveException {
    if (os.length != 0) {
      Pair<MapJoinTableContainer[], MapJoinTableContainerSerDe[]> pair =
          (Pair<MapJoinTableContainer[], MapJoinTableContainerSerDe[]>) os[0];

      boolean spilled = false;
      for (MapJoinTableContainer container : pair.getLeft()) {
        if (container != null) {
          spilled = spilled || container.hasSpill();
        }
      }

      if (!loadCalled && spilled) {
        // we can't use the cached table because it has spilled.
        loadHashTable(getExecContext(), MapredContext.get());
      } else {
        // let's use the table from the cache.
        mapJoinTables = pair.getLeft();
        mapJoinTableSerdes = pair.getRight();
      }
      hashTblInitedOnce = true;
    }

    if (this.getExecContext() != null) {
      // reset exec context so that initialization of the map operator happens
      // properly
      this.getExecContext().setLastInputPath(null);
      this.getExecContext().setCurrentInputPath(null);
    }
  }
Exemple #3
0
 @Override
 public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
   String database;
   if (context != null) {
     database = context.getJobConf().get("hive.current.database");
   } else {
     database = SessionState.get().getCurrentDatabase();
   }
   return PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
       TypeInfoFactory.stringTypeInfo, new Text(database));
 }
  @Override
  public void close() {

    // No row was processed
    if (oc == null) {
      LOG.trace("Close called without any rows processed");
    }

    try {
      if (groupKey != null) {
        // If a operator wants to do some work at the end of a group
        LOG.trace("End Group");
        reducer.endGroup();
      }
      if (isLogInfoEnabled) {
        logCloseInfo();
      }

      reducer.close(abort);

      if (localWork != null) {
        for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) {
          dummyOp.close(abort);
        }
      }

      ReportStats rps = new ReportStats(rp, jc);
      reducer.preorderMap(rps);

    } catch (Exception e) {
      if (!abort) {
        // signal new failure to map-reduce
        LOG.error("Hit error while closing operators - failing tree");
        throw new RuntimeException(
            "Hive Runtime Error while closing operators: " + e.getMessage(), e);
      }
    } finally {
      MapredContext.close();
      Utilities.clearWorkMap();
    }
  }
 // Load the hash table
 @Override
 public void cleanUpInputFileChangedOp() throws HiveException {
   loadHashTable(getExecContext(), MapredContext.get());
 }
  @Override
  protected Collection<Future<?>> initializeOp(Configuration hconf) throws HiveException {
    this.hconf = hconf;
    unwrapContainer = new UnwrapRowContainer[conf.getTagLength()];

    Collection<Future<?>> result = super.initializeOp(hconf);
    if (result == null) {
      result = new HashSet<Future<?>>();
    }

    int tagLen = conf.getTagLength();

    // On Tez only: The hash map might already be cached in the container we run
    // the task in. On MR: The cache is a no-op.
    cacheKey =
        HiveConf.getVar(hconf, HiveConf.ConfVars.HIVEQUERYID)
            + "__HASH_MAP_"
            + this.getOperatorId()
            + "_container";

    cache = ObjectCacheFactory.getCache(hconf);
    loader = getHashTableLoader(hconf);

    hashMapRowGetters = null;

    mapJoinTables = new MapJoinTableContainer[tagLen];
    mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen];
    hashTblInitedOnce = false;

    generateMapMetaData();

    final ExecMapperContext mapContext = getExecContext();
    final MapredContext mrContext = MapredContext.get();

    if (!conf.isBucketMapJoin() && !conf.isDynamicPartitionHashJoin()) {
      /*
       * The issue with caching in case of bucket map join is that different tasks
       * process different buckets and if the container is reused to join a different bucket,
       * join results can be incorrect. The cache is keyed on operator id and for bucket map join
       * the operator does not change but data needed is different. For a proper fix, this
       * requires changes in the Tez API with regard to finding bucket id and
       * also ability to schedule tasks to re-use containers that have cached the specific bucket.
       */
      if (isLogInfoEnabled) {
        LOG.info("This is not bucket map join, so cache");
      }

      Future<Pair<MapJoinTableContainer[], MapJoinTableContainerSerDe[]>> future =
          cache.retrieveAsync(
              cacheKey,
              new Callable<Pair<MapJoinTableContainer[], MapJoinTableContainerSerDe[]>>() {
                @Override
                public Pair<MapJoinTableContainer[], MapJoinTableContainerSerDe[]> call()
                    throws HiveException {
                  return loadHashTable(mapContext, mrContext);
                }
              });
      result.add(future);
    } else if (!isInputFileChangeSensitive(mapContext)) {
      loadHashTable(mapContext, mrContext);
      hashTblInitedOnce = true;
    }
    return result;
  }
 @Nonnull
 public static MapredContext create(boolean isMap, @Nullable JobConf jobConf) {
   return MapredContext.init(isMap, jobConf);
 }
 @Nullable
 public static MapredContext get() {
   return MapredContext.get();
 }