@Override public void processOp(Object row, int tag) throws HiveException { try { if (firstRow) { // generate the map metadata generateMapMetaData(); firstRow = false; } // get alias alias = order[tag]; // alias = (byte)tag; if ((lastAlias == null) || (!lastAlias.equals(alias))) { nextSz = joinEmitInterval; } // compute keys and values as StandardObjects AbstractMapJoinKey key = JoinUtil.computeMapJoinKeys( row, joinKeys.get(alias), joinKeysObjectInspectors.get(alias)); ArrayList<Object> value = JoinUtil.computeValues( row, joinValues.get(alias), joinValuesObjectInspectors.get(alias), joinFilters.get(alias), joinFilterObjectInspectors.get(alias), noOuterJoin); // Add the value to the ArrayList storage.get((byte) tag).add(value); for (Byte pos : order) { if (pos.intValue() != tag) { MapJoinObjectValue o = mapJoinTables.get(pos).get(key); MapJoinRowContainer<ArrayList<Object>> rowContainer = rowContainerMap.get(pos); // there is no join-value or join-key has all null elements if (o == null || key.hasAnyNulls()) { if (noOuterJoin) { storage.put(pos, emptyList); } else { storage.put(pos, dummyObjVectors[pos.intValue()]); } } else { rowContainer.reset(o.getObj()); storage.put(pos, rowContainer); } } } // generate the output records checkAndGenObject(); // done with the row storage.get((byte) tag).clear(); for (Byte pos : order) { if (pos.intValue() != tag) { storage.put(pos, null); } } } catch (SerDeException e) { e.printStackTrace(); throw new HiveException(e); } }
@Override public void processOp(Object row, int tag) throws HiveException { try { reportProgress(); // get alias alias = (byte) tag; if ((lastAlias == null) || (!lastAlias.equals(alias))) { nextSz = joinEmitInterval; } ArrayList<Object> nr = JoinUtil.computeValues( row, joinValues.get(alias), joinValuesObjectInspectors.get(alias), joinFilters.get(alias), joinFilterObjectInspectors.get(alias), noOuterJoin); if (handleSkewJoin) { skewJoinKeyContext.handleSkew(tag); } // number of rows for the key in the given table int sz = storage.get(alias).size(); StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag]; StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString()); Object keyObject = soi.getStructFieldData(row, sf); // Are we consuming too much memory if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0)) { if (sz == joinEmitInterval) { // The input is sorted by alias, so if we are already in the last join // operand, // we can emit some results now. // Note this has to be done before adding the current row to the // storage, // to preserve the correctness for outer joins. checkAndGenObject(); storage.get(alias).clear(); } } else { if (sz == nextSz) { // Output a warning if we reached at least 1000 rows for a join // operand // We won't output a warning for the last join operand since the size // will never goes to joinEmitInterval. LOG.warn("table " + alias + " has " + sz + " rows for join key " + keyObject); nextSz = getNextSize(nextSz); } } // Add the value to the vector storage.get(alias).add(nr); // if join-key is null, process each row in different group. if (SerDeUtils.hasAnyNullObject(keyObject, sf.getFieldObjectInspector())) { endGroup(); startGroup(); } } catch (Exception e) { e.printStackTrace(); throw new HiveException(e); } }