Example #1
0
  /**
   * Reload hashtable from the hash partition. It can have two steps: 1) Deserialize a serialized
   * hash table, and 2) Merge every key/value pair from small table container into the hash table
   *
   * @param pos position of small table
   * @param partitionId the partition of the small table to be reloaded from
   * @throws IOException
   * @throws HiveException
   * @throws SerDeException
   */
  protected void reloadHashTable(byte pos, int partitionId)
      throws IOException, HiveException, SerDeException, ClassNotFoundException {
    HybridHashTableContainer container = (HybridHashTableContainer) mapJoinTables[pos];
    HashPartition partition = container.getHashPartitions()[partitionId];

    // Merge the sidefile into the newly created hash table
    // This is where the spilling may happen again
    KeyValueContainer kvContainer = partition.getSidefileKVContainer();
    int rowCount = kvContainer.size();
    LOG.info(
        "Hybrid Grace Hash Join: Number of rows restored from KeyValueContainer: "
            + kvContainer.size());

    // Deserialize the on-disk hash table
    // We're sure this part is smaller than memory limit
    if (rowCount <= 0) {
      rowCount =
          1024 * 1024; // Since rowCount is used later to instantiate a BytesBytesMultiHashMap
      // as the initialCapacity which cannot be 0, we provide a reasonable
      // positive number here
    }
    BytesBytesMultiHashMap restoredHashMap = partition.getHashMapFromDisk(rowCount);
    rowCount += restoredHashMap.getNumValues();
    LOG.info("Hybrid Grace Hash Join: Deserializing spilled hash partition...");
    LOG.info("Hybrid Grace Hash Join: Number of rows in hashmap: " + rowCount);

    // If based on the new key count, keyCount is smaller than a threshold,
    // then just load the entire restored hashmap into memory.
    // The size of deserialized partition shouldn't exceed half of memory limit
    if (rowCount * container.getTableRowSize() >= container.getMemoryThreshold() / 2) {
      LOG.warn(
          "Hybrid Grace Hash Join: Hash table cannot be reloaded since it"
              + " will be greater than memory limit. Recursive spilling is currently not supported");
    }

    KeyValueHelper writeHelper = container.getWriteHelper();
    while (kvContainer.hasNext()) {
      ObjectPair<HiveKey, BytesWritable> pair = kvContainer.next();
      Writable key = pair.getFirst();
      Writable val = pair.getSecond();
      writeHelper.setKeyValue(key, val);
      restoredHashMap.put(writeHelper, -1);
    }

    container.setTotalInMemRowCount(
        container.getTotalInMemRowCount() + restoredHashMap.getNumValues());
    kvContainer.clear();

    spilledMapJoinTables[pos] = new MapJoinBytesTableContainer(restoredHashMap);
    spilledMapJoinTables[pos].setInternalValueOi(container.getInternalValueOi());
    spilledMapJoinTables[pos].setSortableSortOrders(container.getSortableSortOrders());
  }
 @SuppressWarnings("deprecation")
 @Override
 public MapJoinKey putRow(
     MapJoinObjectSerDeContext keyContext,
     Writable currentKey,
     MapJoinObjectSerDeContext valueContext,
     Writable currentValue)
     throws SerDeException {
   SerDe keySerde = keyContext.getSerDe(), valSerde = valueContext.getSerDe();
   if (writeHelper == null) {
     LOG.info(
         "Initializing container with "
             + keySerde.getClass().getName()
             + " and "
             + valSerde.getClass().getName());
     if (keySerde instanceof BinarySortableSerDe && valSerde instanceof LazyBinarySerDe) {
       LazyBinaryStructObjectInspector valSoi =
           (LazyBinaryStructObjectInspector) valSerde.getObjectInspector();
       writeHelper = new LazyBinaryKvWriter(keySerde, valSoi, valueContext.hasFilterTag());
       internalValueOi = valSoi;
       sortableSortOrders = ((BinarySortableSerDe) keySerde).getSortOrders();
     } else {
       writeHelper = new KeyValueWriter(keySerde, valSerde, valueContext.hasFilterTag());
       internalValueOi = createInternalOi(valueContext);
       sortableSortOrders = null;
     }
   }
   writeHelper.setKeyValue(currentKey, currentValue);
   hashMap.put(writeHelper, -1);
   return null; // there's no key to return
 }
 @Override
 public void dumpMetrics() {
   hashMap.debugDumpMetrics();
 }
 @Override
 public void put(Writable currentKey, Writable currentValue) throws SerDeException {
   directWriteHelper.setKeyValue(currentKey, currentValue);
   hashMap.put(directWriteHelper, -1);
 }
 @Override
 public void seal() {
   hashMap.seal();
 }
 @Override
 public void clear() {
   hashMap.clear();
 }