@Override
  public void processOp(Object row, int tag) throws HiveException {

    try {
      if (firstRow) {
        // generate the map metadata
        generateMapMetaData();
        firstRow = false;
      }

      // get alias
      alias = order[tag];
      // alias = (byte)tag;

      if ((lastAlias == null) || (!lastAlias.equals(alias))) {
        nextSz = joinEmitInterval;
      }

      // compute keys and values as StandardObjects
      AbstractMapJoinKey key =
          JoinUtil.computeMapJoinKeys(
              row, joinKeys.get(alias), joinKeysObjectInspectors.get(alias));
      ArrayList<Object> value =
          JoinUtil.computeValues(
              row,
              joinValues.get(alias),
              joinValuesObjectInspectors.get(alias),
              joinFilters.get(alias),
              joinFilterObjectInspectors.get(alias),
              noOuterJoin);

      // Add the value to the ArrayList
      storage.get((byte) tag).add(value);

      for (Byte pos : order) {
        if (pos.intValue() != tag) {

          MapJoinObjectValue o = mapJoinTables.get(pos).get(key);
          MapJoinRowContainer<ArrayList<Object>> rowContainer = rowContainerMap.get(pos);

          // there is no join-value or join-key has all null elements
          if (o == null || key.hasAnyNulls()) {
            if (noOuterJoin) {
              storage.put(pos, emptyList);
            } else {
              storage.put(pos, dummyObjVectors[pos.intValue()]);
            }
          } else {
            rowContainer.reset(o.getObj());
            storage.put(pos, rowContainer);
          }
        }
      }

      // generate the output records
      checkAndGenObject();

      // done with the row
      storage.get((byte) tag).clear();

      for (Byte pos : order) {
        if (pos.intValue() != tag) {
          storage.put(pos, null);
        }
      }

    } catch (SerDeException e) {
      e.printStackTrace();
      throw new HiveException(e);
    }
  }
Example #2
0
  @Override
  public void processOp(Object row, int tag) throws HiveException {
    try {
      reportProgress();

      // get alias
      alias = (byte) tag;

      if ((lastAlias == null) || (!lastAlias.equals(alias))) {
        nextSz = joinEmitInterval;
      }

      ArrayList<Object> nr =
          JoinUtil.computeValues(
              row,
              joinValues.get(alias),
              joinValuesObjectInspectors.get(alias),
              joinFilters.get(alias),
              joinFilterObjectInspectors.get(alias),
              noOuterJoin);

      if (handleSkewJoin) {
        skewJoinKeyContext.handleSkew(tag);
      }

      // number of rows for the key in the given table
      int sz = storage.get(alias).size();
      StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag];
      StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
      Object keyObject = soi.getStructFieldData(row, sf);

      // Are we consuming too much memory
      if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0)) {
        if (sz == joinEmitInterval) {
          // The input is sorted by alias, so if we are already in the last join
          // operand,
          // we can emit some results now.
          // Note this has to be done before adding the current row to the
          // storage,
          // to preserve the correctness for outer joins.
          checkAndGenObject();
          storage.get(alias).clear();
        }
      } else {
        if (sz == nextSz) {
          // Output a warning if we reached at least 1000 rows for a join
          // operand
          // We won't output a warning for the last join operand since the size
          // will never goes to joinEmitInterval.
          LOG.warn("table " + alias + " has " + sz + " rows for join key " + keyObject);
          nextSz = getNextSize(nextSz);
        }
      }

      // Add the value to the vector
      storage.get(alias).add(nr);
      // if join-key is null, process each row in different group.
      if (SerDeUtils.hasAnyNullObject(keyObject, sf.getFieldObjectInspector())) {
        endGroup();
        startGroup();
      }
    } catch (Exception e) {
      e.printStackTrace();
      throw new HiveException(e);
    }
  }