@Override public void process(Object row, int tag) throws HiveException { try { alias = (byte) tag; if (hashMapRowGetters == null) { hashMapRowGetters = new ReusableGetAdaptor[mapJoinTables.length]; MapJoinKey refKey = getRefKey(alias); for (byte pos = 0; pos < order.length; pos++) { if (pos != alias) { hashMapRowGetters[pos] = mapJoinTables[pos].createGetter(refKey); } } } // As we're calling processOp again to process the leftover "tuples", we know the "row" is // coming from the spilled matchfile. We need to recreate hashMapRowGetter against new // hashtables if (hybridMapJoinLeftover) { MapJoinKey refKey = getRefKey(alias); for (byte pos = 0; pos < order.length; pos++) { if (pos != alias && spilledMapJoinTables[pos] != null) { hashMapRowGetters[pos] = spilledMapJoinTables[pos].createGetter(refKey); } } } // compute keys and values as StandardObjects ReusableGetAdaptor firstSetKey = null; int fieldCount = joinKeys[alias].size(); boolean joinNeeded = false; boolean bigTableRowSpilled = false; for (byte pos = 0; pos < order.length; pos++) { if (pos != alias) { JoinUtil.JoinResult joinResult; ReusableGetAdaptor adaptor; if (firstSetKey == null) { adaptor = firstSetKey = hashMapRowGetters[pos]; joinResult = setMapJoinKey(firstSetKey, row, alias); } else { // Keys for all tables are the same, so only the first has to deserialize them. adaptor = hashMapRowGetters[pos]; joinResult = adaptor.setFromOther(firstSetKey); } MapJoinRowContainer rowContainer = adaptor.getCurrentRows(); if (rowContainer != null && unwrapContainer[pos] != null) { Object[] currentKey = firstSetKey.getCurrentKey(); rowContainer = unwrapContainer[pos].setInternal(rowContainer, currentKey); } // there is no join-value or join-key has all null elements if (rowContainer == null || firstSetKey.hasAnyNulls(fieldCount, nullsafes)) { if (!noOuterJoin) { // For Hybrid Grace Hash Join, during the 1st round processing, // we only keep the LEFT side if the row is not spilled if (!conf.isHybridHashJoin() || hybridMapJoinLeftover || (!hybridMapJoinLeftover && joinResult != JoinUtil.JoinResult.SPILL)) { joinNeeded = true; storage[pos] = dummyObjVectors[pos]; } } else { storage[pos] = emptyList; } } else { joinNeeded = true; storage[pos] = rowContainer.copy(); aliasFilterTags[pos] = rowContainer.getAliasFilter(); } // Spill the big table rows into appropriate partition: // When the JoinResult is SPILL, it means the corresponding small table row may have been // spilled to disk (at least the partition that holds this row is on disk). So we need to // postpone the join processing for this pair by also spilling this big table row. if (joinResult == JoinUtil.JoinResult.SPILL && !bigTableRowSpilled) { // For n-way join, only spill big table rows once spillBigTableRow(mapJoinTables[pos], row); bigTableRowSpilled = true; } } } if (joinNeeded) { List<Object> value = getFilteredValue(alias, row); // Add the value to the ArrayList storage[alias].addRow(value); // generate the output records checkAndGenObject(); } // done with the row storage[tag].clearRows(); for (byte pos = 0; pos < order.length; pos++) { if (pos != tag) { storage[pos] = null; } } } catch (Exception e) { String msg = "Unexpected exception: " + e.getMessage(); LOG.error(msg, e); throw new HiveException(msg, e); } }
protected JoinUtil.JoinResult setMapJoinKey(ReusableGetAdaptor dest, Object row, byte alias) throws HiveException { return dest.setFromRow(row, joinKeys[alias], joinKeysObjectInspectors[alias]); }