@Override
  public void closeOp(boolean abort) throws HiveException {

    // ricardoj checking mapper hashmap usage
    memoryMXBean = ManagementFactory.getMemoryMXBean();
    System.gc();
    System.gc();
    System.gc();
    System.gc();
    System.gc();
    System.gc();
    long usedMemory = memoryMXBean.getHeapMemoryUsage().getUsed();
    LOG.info("ricardoj memory usage after deleting tables: " + usedMemory / (1024 * 1024) + "MB");

    if (mapJoinTables != null) {
      for (HashMapWrapper<?, ?> hashTable : mapJoinTables.values()) {
        hashTable.close();
      }
    }

    mapJoinTables = null;
    // ricardoj
    System.gc();
    System.gc();
    System.gc();
    System.gc();
    System.gc();
    System.gc();
    usedMemory = memoryMXBean.getHeapMemoryUsage().getUsed();
    LOG.info("ricardoj memory usage after deleting tables: " + usedMemory / (1024 * 1024) + "MB");

    super.closeOp(abort);
  }
  @Override
  protected void initializeOp(Configuration hconf) throws HiveException {

    super.initializeOp(hconf);

    metadataValueTag = new int[numAliases];
    for (int pos = 0; pos < numAliases; pos++) {
      metadataValueTag[pos] = -1;
    }

    metadataKeyTag = -1;
    bigTableAlias = order[posBigTable];

    mapJoinTables = new HashMap<Byte, HashMapWrapper<AbstractMapJoinKey, MapJoinObjectValue>>();
    rowContainerMap = new HashMap<Byte, MapJoinRowContainer<ArrayList<Object>>>();
    // initialize the hash tables for other tables
    for (int pos = 0; pos < numAliases; pos++) {
      if (pos == posBigTable) {
        continue;
      }

      HashMapWrapper<AbstractMapJoinKey, MapJoinObjectValue> hashTable =
          new HashMapWrapper<AbstractMapJoinKey, MapJoinObjectValue>();

      mapJoinTables.put(Byte.valueOf((byte) pos), hashTable);
      MapJoinRowContainer<ArrayList<Object>> rowContainer =
          new MapJoinRowContainer<ArrayList<Object>>();
      rowContainerMap.put(Byte.valueOf((byte) pos), rowContainer);
    }

    hashTblInitedOnce = false;
  }
Example #3
0
  @Override
  public void closeOp(boolean abort) throws HiveException {
    boolean spilled = false;
    for (MapJoinTableContainer container : mapJoinTables) {
      if (container != null) {
        spilled = spilled || container.hasSpill();
        container.dumpMetrics();
      }
    }

    // For Hybrid Grace Hash Join, we need to see if there is any spilled data to be processed next
    if (spilled) {
      if (!abort) {
        if (hashMapRowGetters == null) {
          hashMapRowGetters = new ReusableGetAdaptor[mapJoinTables.length];
        }
        int numPartitions = 0;
        // Find out number of partitions for each small table (should be same across tables)
        for (byte pos = 0; pos < mapJoinTables.length; pos++) {
          if (pos != conf.getPosBigTable()) {
            firstSmallTable = (HybridHashTableContainer) mapJoinTables[pos];
            numPartitions = firstSmallTable.getHashPartitions().length;
            break;
          }
        }
        assert numPartitions != 0 : "Number of partitions must be greater than 0!";

        if (firstSmallTable.hasSpill()) {
          spilledMapJoinTables = new MapJoinBytesTableContainer[mapJoinTables.length];
          hybridMapJoinLeftover = true;

          // Clear all in-memory partitions first
          for (byte pos = 0; pos < mapJoinTables.length; pos++) {
            MapJoinTableContainer tableContainer = mapJoinTables[pos];
            if (tableContainer != null && tableContainer instanceof HybridHashTableContainer) {
              HybridHashTableContainer hybridHtContainer =
                  (HybridHashTableContainer) tableContainer;
              hybridHtContainer.dumpStats();

              HashPartition[] hashPartitions = hybridHtContainer.getHashPartitions();
              // Clear all in memory partitions first
              for (int i = 0; i < hashPartitions.length; i++) {
                if (!hashPartitions[i].isHashMapOnDisk()) {
                  hybridHtContainer.setTotalInMemRowCount(
                      hybridHtContainer.getTotalInMemRowCount()
                          - hashPartitions[i].getHashMapFromMemory().getNumValues());
                  hashPartitions[i].getHashMapFromMemory().clear();
                }
              }
              assert hybridHtContainer.getTotalInMemRowCount() == 0;
            }
          }

          // Reprocess the spilled data
          for (int i = 0; i < numPartitions; i++) {
            HashPartition[] hashPartitions = firstSmallTable.getHashPartitions();
            if (hashPartitions[i].isHashMapOnDisk()) {
              try {
                continueProcess(i); // Re-process spilled data
              } catch (Exception e) {
                throw new HiveException(e);
              }
              for (byte pos = 0; pos < order.length; pos++) {
                if (pos != conf.getPosBigTable()) spilledMapJoinTables[pos] = null;
              }
            }
          }
        }
      }

      if (isLogInfoEnabled) {
        LOG.info("spilled: " + spilled + " abort: " + abort + ". Clearing spilled partitions.");
      }

      // spilled tables are loaded always (no sharing), so clear it
      clearAllTableContainers();
      cache.remove(cacheKey);
    }

    // in mapreduce case, we need to always clear up as mapreduce doesn't have object registry.
    if ((this.getExecContext() != null)
        && (this.getExecContext().getLocalWork() != null)
        && (this.getExecContext().getLocalWork().getInputFileChangeSensitive())
        && !(HiveConf.getVar(hconf, ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")
            && SparkUtilities.isDedicatedCluster(hconf))) {
      if (isLogInfoEnabled) {
        LOG.info("MR: Clearing all map join table containers.");
      }
      clearAllTableContainers();
    }

    this.loader = null;
    super.closeOp(abort);
  }