Пример #1
0
  @Override
  public void processOp(Object row, int tag) throws HiveException {
    try {
      reportProgress();

      // get alias
      alias = (byte) tag;

      if ((lastAlias == null) || (!lastAlias.equals(alias))) {
        nextSz = joinEmitInterval;
      }

      ArrayList<Object> nr =
          JoinUtil.computeValues(
              row,
              joinValues.get(alias),
              joinValuesObjectInspectors.get(alias),
              joinFilters.get(alias),
              joinFilterObjectInspectors.get(alias),
              noOuterJoin);

      if (handleSkewJoin) {
        skewJoinKeyContext.handleSkew(tag);
      }

      // number of rows for the key in the given table
      int sz = storage.get(alias).size();
      StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[tag];
      StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
      Object keyObject = soi.getStructFieldData(row, sf);

      // Are we consuming too much memory
      if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0)) {
        if (sz == joinEmitInterval) {
          // The input is sorted by alias, so if we are already in the last join
          // operand,
          // we can emit some results now.
          // Note this has to be done before adding the current row to the
          // storage,
          // to preserve the correctness for outer joins.
          checkAndGenObject();
          storage.get(alias).clear();
        }
      } else {
        if (sz == nextSz) {
          // Output a warning if we reached at least 1000 rows for a join
          // operand
          // We won't output a warning for the last join operand since the size
          // will never goes to joinEmitInterval.
          LOG.warn("table " + alias + " has " + sz + " rows for join key " + keyObject);
          nextSz = getNextSize(nextSz);
        }
      }

      // Add the value to the vector
      storage.get(alias).add(nr);
      // if join-key is null, process each row in different group.
      if (SerDeUtils.hasAnyNullObject(keyObject, sf.getFieldObjectInspector())) {
        endGroup();
        startGroup();
      }
    } catch (Exception e) {
      e.printStackTrace();
      throw new HiveException(e);
    }
  }