Exemple #1
0
  @Override
  public Object clone() {
    JoinDesc ret = new JoinDesc();
    Map<Byte, List<ExprNodeDesc>> cloneExprs = new HashMap<Byte, List<ExprNodeDesc>>();
    cloneExprs.putAll(getExprs());
    ret.setExprs(cloneExprs);
    Map<Byte, List<ExprNodeDesc>> cloneFilters = new HashMap<Byte, List<ExprNodeDesc>>();
    cloneFilters.putAll(getFilters());
    ret.setFilters(cloneFilters);
    ret.setConds(getConds().clone());
    ret.setNoOuterJoin(getNoOuterJoin());
    ret.setNullSafes(getNullSafes());
    ret.setHandleSkewJoin(handleSkewJoin);
    ret.setSkewKeyDefinition(getSkewKeyDefinition());
    ret.setTagOrder(getTagOrder().clone());
    if (getKeyTableDesc() != null) {
      ret.setKeyTableDesc((TableDesc) getKeyTableDesc().clone());
    }

    if (getBigKeysDirMap() != null) {
      Map<Byte, String> cloneBigKeysDirMap = new HashMap<Byte, String>();
      cloneBigKeysDirMap.putAll(getBigKeysDirMap());
      ret.setBigKeysDirMap(cloneBigKeysDirMap);
    }
    if (getSmallKeysDirMap() != null) {
      Map<Byte, Map<Byte, String>> cloneSmallKeysDirMap = new HashMap<Byte, Map<Byte, String>>();
      cloneSmallKeysDirMap.putAll(getSmallKeysDirMap());
      ret.setSmallKeysDirMap(cloneSmallKeysDirMap);
    }
    if (getSkewKeysValuesTables() != null) {
      Map<Byte, TableDesc> cloneSkewKeysValuesTables = new HashMap<Byte, TableDesc>();
      cloneSkewKeysValuesTables.putAll(getSkewKeysValuesTables());
      ret.setSkewKeysValuesTables(cloneSkewKeysValuesTables);
    }
    if (getOutputColumnNames() != null) {
      List<String> cloneOutputColumnNames = new ArrayList<String>();
      cloneOutputColumnNames.addAll(getOutputColumnNames());
      ret.setOutputColumnNames(cloneOutputColumnNames);
    }
    if (getReversedExprs() != null) {
      Map<String, Byte> cloneReversedExprs = new HashMap<String, Byte>();
      cloneReversedExprs.putAll(getReversedExprs());
      ret.setReversedExprs(cloneReversedExprs);
    }
    return ret;
  }
  private static void pruneJoinOperator(
      NodeProcessorCtx ctx,
      CommonJoinOperator op,
      JoinDesc conf,
      Map<String, ExprNodeDesc> columnExprMap,
      Map<Byte, List<Integer>> retainMap,
      boolean mapJoin)
      throws SemanticException {
    ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
    List<Operator<? extends OperatorDesc>> childOperators = op.getChildOperators();

    LOG.info("JOIN " + op.getIdentifier() + " oldExprs: " + conf.getExprs());
    List<String> childColLists = cppCtx.genColLists(op);
    if (childColLists == null) {
      return;
    }

    Map<Byte, List<String>> prunedColLists = new HashMap<Byte, List<String>>();
    for (byte tag : conf.getTagOrder()) {
      prunedColLists.put(tag, new ArrayList<String>());
    }

    // add the columns in join filters
    Set<Map.Entry<Byte, List<ExprNodeDesc>>> filters = conf.getFilters().entrySet();
    Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iter = filters.iterator();
    while (iter.hasNext()) {
      Map.Entry<Byte, List<ExprNodeDesc>> entry = iter.next();
      Byte tag = entry.getKey();
      for (ExprNodeDesc desc : entry.getValue()) {
        List<String> cols = prunedColLists.get(tag);
        cols = Utilities.mergeUniqElems(cols, desc.getCols());
        prunedColLists.put(tag, cols);
      }
    }

    RowResolver joinRR = cppCtx.getOpToParseCtxMap().get(op).getRowResolver();
    RowResolver newJoinRR = new RowResolver();
    ArrayList<String> outputCols = new ArrayList<String>();
    ArrayList<ColumnInfo> rs = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> newColExprMap = new HashMap<String, ExprNodeDesc>();

    for (int i = 0; i < conf.getOutputColumnNames().size(); i++) {
      String internalName = conf.getOutputColumnNames().get(i);
      ExprNodeDesc desc = columnExprMap.get(internalName);
      Byte tag = conf.getReversedExprs().get(internalName);
      if (!childColLists.contains(internalName)) {
        int index = conf.getExprs().get(tag).indexOf(desc);
        if (index < 0) {
          continue;
        }
        conf.getExprs().get(tag).remove(desc);
        if (retainMap != null) {
          retainMap.get(tag).remove(index);
        }
      } else {
        List<String> prunedRSList = prunedColLists.get(tag);
        if (prunedRSList == null) {
          prunedRSList = new ArrayList<String>();
          prunedColLists.put(tag, prunedRSList);
        }
        prunedRSList = Utilities.mergeUniqElems(prunedRSList, desc.getCols());
        outputCols.add(internalName);
        newColExprMap.put(internalName, desc);
      }
    }

    if (mapJoin) {
      // regenerate the valueTableDesc
      List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
      for (int pos = 0; pos < op.getParentOperators().size(); pos++) {
        List<ExprNodeDesc> valueCols = conf.getExprs().get(Byte.valueOf((byte) pos));
        StringBuilder keyOrder = new StringBuilder();
        for (int i = 0; i < valueCols.size(); i++) {
          keyOrder.append("+");
        }

        TableDesc valueTableDesc =
            PlanUtils.getMapJoinValueTableDesc(
                PlanUtils.getFieldSchemasFromColumnList(valueCols, "mapjoinvalue"));

        valueTableDescs.add(valueTableDesc);
      }
      ((MapJoinDesc) conf).setValueTblDescs(valueTableDescs);

      Set<Map.Entry<Byte, List<ExprNodeDesc>>> exprs = ((MapJoinDesc) conf).getKeys().entrySet();
      Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iters = exprs.iterator();
      while (iters.hasNext()) {
        Map.Entry<Byte, List<ExprNodeDesc>> entry = iters.next();
        List<ExprNodeDesc> lists = entry.getValue();
        for (int j = 0; j < lists.size(); j++) {
          ExprNodeDesc desc = lists.get(j);
          Byte tag = entry.getKey();
          List<String> cols = prunedColLists.get(tag);
          cols = Utilities.mergeUniqElems(cols, desc.getCols());
          prunedColLists.put(tag, cols);
        }
      }
    }

    for (Operator<? extends OperatorDesc> child : childOperators) {
      if (child instanceof ReduceSinkOperator) {
        boolean[] flags =
            getPruneReduceSinkOpRetainFlags(childColLists, (ReduceSinkOperator) child);
        pruneReduceSinkOperator(flags, (ReduceSinkOperator) child, cppCtx);
      }
    }

    for (int i = 0; i < outputCols.size(); i++) {
      String internalName = outputCols.get(i);
      String[] nm = joinRR.reverseLookup(internalName);
      ColumnInfo col = joinRR.get(nm[0], nm[1]);
      newJoinRR.put(nm[0], nm[1], col);
      rs.add(col);
    }

    LOG.info("JOIN " + op.getIdentifier() + " newExprs: " + conf.getExprs());
    op.setColumnExprMap(newColExprMap);
    conf.setOutputColumnNames(outputCols);
    op.getSchema().setSignature(rs);
    cppCtx.getOpToParseCtxMap().get(op).setRowResolver(newJoinRR);
    cppCtx.getJoinPrunedColLists().put(op, prunedColLists);
  }