コード例 #1
0
 /**
  * Postpone processing the big table row temporarily by spilling it to a row container
  *
  * @param hybridHtContainer Hybrid hashtable container
  * @param row big table row
  */
 protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object row)
     throws HiveException {
   HybridHashTableContainer ht = (HybridHashTableContainer) hybridHtContainer;
   int partitionId = ht.getToSpillPartitionId();
   HashPartition hp = ht.getHashPartitions()[partitionId];
   ObjectContainer bigTable = hp.getMatchfileObjContainer();
   bigTable.add(row);
 }
コード例 #2
0
  /**
   * Reload hashtable from the hash partition. It can have two steps: 1) Deserialize a serialized
   * hash table, and 2) Merge every key/value pair from small table container into the hash table
   *
   * @param pos position of small table
   * @param partitionId the partition of the small table to be reloaded from
   * @throws IOException
   * @throws HiveException
   * @throws SerDeException
   */
  protected void reloadHashTable(byte pos, int partitionId)
      throws IOException, HiveException, SerDeException, ClassNotFoundException {
    HybridHashTableContainer container = (HybridHashTableContainer) mapJoinTables[pos];
    HashPartition partition = container.getHashPartitions()[partitionId];

    // Merge the sidefile into the newly created hash table
    // This is where the spilling may happen again
    KeyValueContainer kvContainer = partition.getSidefileKVContainer();
    int rowCount = kvContainer.size();
    LOG.info(
        "Hybrid Grace Hash Join: Number of rows restored from KeyValueContainer: "
            + kvContainer.size());

    // Deserialize the on-disk hash table
    // We're sure this part is smaller than memory limit
    if (rowCount <= 0) {
      rowCount =
          1024 * 1024; // Since rowCount is used later to instantiate a BytesBytesMultiHashMap
      // as the initialCapacity which cannot be 0, we provide a reasonable
      // positive number here
    }
    BytesBytesMultiHashMap restoredHashMap = partition.getHashMapFromDisk(rowCount);
    rowCount += restoredHashMap.getNumValues();
    LOG.info("Hybrid Grace Hash Join: Deserializing spilled hash partition...");
    LOG.info("Hybrid Grace Hash Join: Number of rows in hashmap: " + rowCount);

    // If based on the new key count, keyCount is smaller than a threshold,
    // then just load the entire restored hashmap into memory.
    // The size of deserialized partition shouldn't exceed half of memory limit
    if (rowCount * container.getTableRowSize() >= container.getMemoryThreshold() / 2) {
      LOG.warn(
          "Hybrid Grace Hash Join: Hash table cannot be reloaded since it"
              + " will be greater than memory limit. Recursive spilling is currently not supported");
    }

    KeyValueHelper writeHelper = container.getWriteHelper();
    while (kvContainer.hasNext()) {
      ObjectPair<HiveKey, BytesWritable> pair = kvContainer.next();
      Writable key = pair.getFirst();
      Writable val = pair.getSecond();
      writeHelper.setKeyValue(key, val);
      restoredHashMap.put(writeHelper, -1);
    }

    container.setTotalInMemRowCount(
        container.getTotalInMemRowCount() + restoredHashMap.getNumValues());
    kvContainer.clear();

    spilledMapJoinTables[pos] = new MapJoinBytesTableContainer(restoredHashMap);
    spilledMapJoinTables[pos].setInternalValueOi(container.getInternalValueOi());
    spilledMapJoinTables[pos].setSortableSortOrders(container.getSortableSortOrders());
  }
コード例 #3
0
 /**
  * Iterate over the big table row container and feed process() with leftover rows
  *
  * @param partitionId the partition from which to take out spilled big table rows
  * @throws HiveException
  */
 protected void reProcessBigTable(int partitionId) throws HiveException {
   // For binary join, firstSmallTable is the only small table; it has reference to spilled big
   // table rows;
   // For n-way join, since we only spill once, when processing the first small table, so only the
   // firstSmallTable has reference to the spilled big table rows.
   HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
   ObjectContainer bigTable = partition.getMatchfileObjContainer();
   while (bigTable.hasNext()) {
     Object row = bigTable.next();
     process(row, conf.getPosBigTable());
   }
   bigTable.clear();
 }