Ejemplos de PlanUtils en Java

Lenguaje de programación: Java

Namespace/Package Name: org.apache.hadoop.hive.ql.plan

Clase / Tipo: PlanUtils

Ejemplos en hotexamples.com: 6

Java PlanUtils - 6 ejemplos encontrados. Estos son los ejemplos en Java del mundo real mejor valorados de org.apache.hadoop.hive.ql.plan.PlanUtils extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

getFieldSchemasFromColumnList(4)

getReduceKeyTableDesc(2)

getReduceValueTableDesc(2)

getFieldSchemasFromRowSchema(1)

getIntermediateFileTableDesc(1)

getMapJoinValueTableDesc(1)

getReduceSinkDesc(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: ColumnPrunerProcFactory.java Proyecto: cschenyuan/hive-hack

  private static void pruneReduceSinkOperator(
      boolean[] retainFlags, ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx)
      throws SemanticException {
    ReduceSinkDesc reduceConf = reduce.getConf();
    Map<String, ExprNodeDesc> oldMap = reduce.getColumnExprMap();
    LOG.info("RS " + reduce.getIdentifier() + " oldColExprMap: " + oldMap);
    RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(reduce).getRowResolver();
    ArrayList<ColumnInfo> old_signature = oldRR.getRowSchema().getSignature();
    ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>(old_signature);

    List<String> valueColNames = reduceConf.getOutputValueColumnNames();
    ArrayList<String> newValueColNames = new ArrayList<String>();

    List<ExprNodeDesc> keyExprs = reduceConf.getKeyCols();
    List<ExprNodeDesc> valueExprs = reduceConf.getValueCols();
    ArrayList<ExprNodeDesc> newValueExprs = new ArrayList<ExprNodeDesc>();

    for (int i = 0; i < retainFlags.length; i++) {
      String outputCol = valueColNames.get(i);
      ExprNodeDesc outputColExpr = valueExprs.get(i);
      if (!retainFlags[i]) {
        String[] nm = oldRR.reverseLookup(outputCol);
        if (nm == null) {
          outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol;
          nm = oldRR.reverseLookup(outputCol);
        }

        // In case there are multiple columns referenced to the same column name, we won't
        // do row resolve once more because the ColumnInfo in row resolver is already removed
        if (nm == null) {
          continue;
        }

        // Only remove information of a column if it is not a key,
        // i.e. this column is not appearing in keyExprs of the RS
        if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) {
          ColumnInfo colInfo = oldRR.getFieldMap(nm[0]).remove(nm[1]);
          oldRR.getInvRslvMap().remove(colInfo.getInternalName());
          oldMap.remove(outputCol);
          signature.remove(colInfo);
        }

      } else {
        newValueColNames.add(outputCol);
        newValueExprs.add(outputColExpr);
      }
    }

    oldRR.getRowSchema().setSignature(signature);
    reduce.getSchema().setSignature(signature);
    reduceConf.setOutputValueColumnNames(newValueColNames);
    reduceConf.setValueCols(newValueExprs);
    TableDesc newValueTable =
        PlanUtils.getReduceValueTableDesc(
            PlanUtils.getFieldSchemasFromColumnList(
                reduceConf.getValueCols(), newValueColNames, 0, ""));
    reduceConf.setValueSerializeInfo(newValueTable);
    LOG.info("RS " + reduce.getIdentifier() + " newColExprMap: " + oldMap);
  }

Ejemplo n.º 2

Mostrar archivo

Archivo: GenMRFileSink1.java Proyecto: Carlie20083/hive-0.7.0

  private void createMapReduce4Merge(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName)
      throws SemanticException {
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    RowSchema inputRS = fsOp.getSchema();

    // create a reduce Sink operator - key is the first column
    ArrayList<ExprNodeDesc> keyCols = new ArrayList<ExprNodeDesc>();
    keyCols.add(TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand"));

    // value is all the columns in the FileSink operator input
    ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
    for (ColumnInfo ci : inputRS.getSignature()) {
      valueCols.add(
          new ExprNodeColumnDesc(
              ci.getType(), ci.getInternalName(), ci.getTabAlias(), ci.getIsVirtualCol()));
    }

    // create a dummy tableScan operator
    Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS);

    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < valueCols.size(); i++) {
      outputColumns.add(SemanticAnalyzer.getColumnInternalName(i));
    }

    ReduceSinkDesc rsDesc =
        PlanUtils.getReduceSinkDesc(
            new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1, -1);
    OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge);
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsConf = fsOp.getConf();

    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver();
    Integer pos = Integer.valueOf(0);
    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(
          info[0],
          info[1],
          new ColumnInfo(
              pos.toString(),
              colInfo.getType(),
              info[0],
              colInfo.getIsVirtualCol(),
              colInfo.isHiddenVirtualCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }

    Operator<ExtractDesc> extract =
        OperatorFactory.getAndMakeChild(
            new ExtractDesc(
                new ExprNodeColumnDesc(
                    TypeInfoFactory.stringTypeInfo,
                    Utilities.ReduceField.VALUE.toString(),
                    "",
                    false)),
            new RowSchema(out_rwsch.getColumnInfos()));

    TableDesc ts = (TableDesc) fsConf.getTableInfo().clone();
    fsConf
        .getTableInfo()
        .getProperties()
        .remove(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);

    FileSinkDesc newFSD =
        new FileSinkDesc(
            finalName, ts, parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator newOutput =
        (FileSinkOperator) OperatorFactory.getAndMakeChild(newFSD, inputRS, extract);

    HiveConf conf = parseCtx.getConf();
    MapredWork cplan = createMergeTask(conf, tsMerge, fsConf);
    cplan.setReducer(extract);

    // NOTE: we should gather stats in MR1 (rather than the merge MR job)
    // since it is unknown if the merge MR will be triggered at execution time.

    MoveWork dummyMv =
        new MoveWork(
            null,
            null,
            null,
            new LoadFileDesc(fsConf.getDirName(), finalName, true, null, null),
            false);

    ConditionalTask cndTsk = createCondTask(conf, currTask, dummyMv, cplan, fsConf.getDirName());

    LinkMoveTask(ctx, newOutput, cndTsk);
  }

Ejemplo n.º 3

Mostrar archivo

Archivo: GenMapRedUtils.java Proyecto: Carlie20083/hive-0.7.0

  @SuppressWarnings("nls")
  /**
   * Merge the tasks - by creating a temporary file between them.
   *
   * @param op reduce sink operator being processed
   * @param oldTask the parent task
   * @param task the child task
   * @param opProcCtx context
   * @param setReducer does the reducer needs to be set
   * @param pos position of the parent
   */
  public static void splitTasks(
      Operator<? extends Serializable> op,
      Task<? extends Serializable> parentTask,
      Task<? extends Serializable> childTask,
      GenMRProcContext opProcCtx,
      boolean setReducer,
      boolean local,
      int posn)
      throws SemanticException {
    childTask.getWork();
    Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();

    ParseContext parseCtx = opProcCtx.getParseCtx();
    parentTask.addDependentTask(childTask);

    // Root Task cannot depend on any other task, therefore childTask cannot be
    // a root Task
    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();
    if (rootTasks.contains(childTask)) {
      rootTasks.remove(childTask);
    }

    // generate the temporary file
    Context baseCtx = parseCtx.getContext();
    String taskTmpDir = baseCtx.getMRTmpFileURI();

    Operator<? extends Serializable> parent = op.getParentOperators().get(posn);
    TableDesc tt_desc =
        PlanUtils.getIntermediateFileTableDesc(
            PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));

    // Create a file sink operator for this file name
    boolean compressIntermediate =
        parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSINTERMEDIATE);
    FileSinkDesc desc = new FileSinkDesc(taskTmpDir, tt_desc, compressIntermediate);
    if (compressIntermediate) {
      desc.setCompressCodec(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC));
      desc.setCompressType(parseCtx.getConf().getVar(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE));
    }
    Operator<? extends Serializable> fs_op =
        putOpInsertMap(OperatorFactory.get(desc, parent.getSchema()), null, parseCtx);

    // replace the reduce child with this operator
    List<Operator<? extends Serializable>> childOpList = parent.getChildOperators();
    for (int pos = 0; pos < childOpList.size(); pos++) {
      if (childOpList.get(pos) == op) {
        childOpList.set(pos, fs_op);
        break;
      }
    }

    List<Operator<? extends Serializable>> parentOpList =
        new ArrayList<Operator<? extends Serializable>>();
    parentOpList.add(parent);
    fs_op.setParentOperators(parentOpList);

    // create a dummy tableScan operator on top of op
    // TableScanOperator is implicitly created here for each MapOperator
    RowResolver rowResolver = opProcCtx.getParseCtx().getOpParseCtx().get(parent).getRowResolver();
    Operator<? extends Serializable> ts_op =
        putOpInsertMap(
            OperatorFactory.get(TableScanDesc.class, parent.getSchema()), rowResolver, parseCtx);

    childOpList = new ArrayList<Operator<? extends Serializable>>();
    childOpList.add(op);
    ts_op.setChildOperators(childOpList);
    op.getParentOperators().set(posn, ts_op);

    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
    mapCurrCtx.put(ts_op, new GenMapRedCtx(childTask, null, null));

    String streamDesc = taskTmpDir;
    MapredWork cplan = (MapredWork) childTask.getWork();

    if (setReducer) {
      Operator<? extends Serializable> reducer = op.getChildOperators().get(0);

      if (reducer.getClass() == JoinOperator.class) {
        String origStreamDesc;
        streamDesc = "$INTNAME";
        origStreamDesc = streamDesc;
        int pos = 0;
        while (cplan.getAliasToWork().get(streamDesc) != null) {
          streamDesc = origStreamDesc.concat(String.valueOf(++pos));
        }
      }

      // TODO: Allocate work to remove the temporary files and make that
      // dependent on the redTask
      if (reducer.getClass() == JoinOperator.class) {
        cplan.setNeedsTagging(true);
      }
    }

    // Add the path to alias mapping
    setTaskPlan(taskTmpDir, streamDesc, ts_op, cplan, local, tt_desc);

    // This can be cleaned up as a function table in future
    if (op instanceof AbstractMapJoinOperator<?>) {
      AbstractMapJoinOperator<? extends MapJoinDesc> mjOp =
          (AbstractMapJoinOperator<? extends MapJoinDesc>) op;
      opProcCtx.setCurrMapJoinOp(mjOp);
      GenMRMapJoinCtx mjCtx = opProcCtx.getMapJoinCtx(mjOp);
      if (mjCtx == null) {
        mjCtx = new GenMRMapJoinCtx(taskTmpDir, tt_desc, ts_op, null);
      } else {
        mjCtx.setTaskTmpDir(taskTmpDir);
        mjCtx.setTTDesc(tt_desc);
        mjCtx.setRootMapJoinOp(ts_op);
      }
      opProcCtx.setMapJoinCtx(mjOp, mjCtx);
      opProcCtx.getMapCurrCtx().put(parent, new GenMapRedCtx(childTask, null, null));
      setupBucketMapJoinInfo(cplan, mjOp, false);
    }

    currTopOp = null;
    String currAliasId = null;

    opProcCtx.setCurrTopOp(currTopOp);
    opProcCtx.setCurrAliasId(currAliasId);
    opProcCtx.setCurrTask(childTask);
  }

Ejemplo n.º 4

Mostrar archivo

Archivo: ReduceSinkDeDuplication.java Proyecto: phoenixhadoop/hive

    /**
     * Current RSDedup remove/replace child RS. For key columns, sorting order, and the number of
     * reducers, copy more specific part of configurations of child RS to that of parent RS. For
     * partitioning columns, if both child RS and parent RS have been assigned partitioning columns,
     * we will choose the more general partitioning columns. If parent RS has not been assigned any
     * partitioning column, we will use partitioning columns (if exist) of child RS.
     */
    protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
        throws SemanticException {
      int[] result = checkStatus(cRS, pRS, minReducer);
      if (result == null) {
        return false;
      }

      if (result[0] > 0) {
        // The sorting columns of the child RS are more specific than
        // those of the parent RS. Assign sorting columns of the child RS
        // to the parent RS.
        List<ExprNodeDesc> childKCs = cRS.getConf().getKeyCols();
        pRS.getConf().setKeyCols(ExprNodeDescUtils.backtrack(childKCs, cRS, pRS));
      }

      if (result[1] < 0) {
        // The partitioning columns of the parent RS are more specific than
        // those of the child RS.
        List<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
        if (childPCs != null && !childPCs.isEmpty()) {
          // If partitioning columns of the child RS are assigned,
          // assign these to the partitioning columns of the parent RS.
          pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
        }
      } else if (result[1] > 0) {
        // The partitioning columns of the child RS are more specific than
        // those of the parent RS.
        List<ExprNodeDesc> parentPCs = pRS.getConf().getPartitionCols();
        if (parentPCs == null || parentPCs.isEmpty()) {
          // If partitioning columns of the parent RS are not assigned,
          // assign partitioning columns of the child RS to the parent RS.
          ArrayList<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
          pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
        }
      }

      if (result[2] > 0) {
        // The sorting order of the child RS is more specific than
        // that of the parent RS. Assign the sorting order of the child RS
        // to the parent RS.
        if (result[0] <= 0) {
          // Sorting columns of the parent RS are more specific than those of the
          // child RS but Sorting order of the child RS is more specific than
          // that of the parent RS.
          throw new SemanticException(
              "Sorting columns and order don't match. "
                  + "Try set "
                  + HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION
                  + "=false;");
        }
        pRS.getConf().setOrder(cRS.getConf().getOrder());
      }

      if (result[3] > 0) {
        // The number of reducers of the child RS is more specific than
        // that of the parent RS. Assign the number of reducers of the child RS
        // to the parent RS.
        pRS.getConf().setNumReducers(cRS.getConf().getNumReducers());
      }

      if (result[4] > 0) {
        // This case happens only when pRS key is empty in which case we can use
        // number of distribution keys and key serialization info from cRS
        pRS.getConf().setNumDistributionKeys(cRS.getConf().getNumDistributionKeys());
        List<FieldSchema> fields =
            PlanUtils.getFieldSchemasFromColumnList(pRS.getConf().getKeyCols(), "reducesinkkey");
        TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, pRS.getConf().getOrder());
        ArrayList<String> outputKeyCols = Lists.newArrayList();
        for (int i = 0; i < fields.size(); i++) {
          outputKeyCols.add(fields.get(i).getName());
        }
        pRS.getConf().setOutputKeyColumnNames(outputKeyCols);
        pRS.getConf().setKeySerializeInfo(keyTable);
      }
      return true;
    }

Ejemplo n.º 5

Mostrar archivo

Archivo: SortedDynPartitionOptimizer.java Proyecto: hadoop-zuiwanyuan/hive

    public ReduceSinkOperator getReduceSinkOp(
        List<Integer> partitionPositions,
        List<Integer> sortPositions,
        List<Integer> sortOrder,
        List<Integer> sortNullOrder,
        ArrayList<ExprNodeDesc> allCols,
        ArrayList<ExprNodeDesc> bucketColumns,
        int numBuckets,
        Operator<? extends OperatorDesc> parent,
        AcidUtils.Operation writeType)
        throws SemanticException {

      // Order of KEY columns
      // 1) Partition columns
      // 2) Bucket number column
      // 3) Sort columns
      Set<Integer> keyColsPosInVal = Sets.newLinkedHashSet();
      ArrayList<ExprNodeDesc> keyCols = Lists.newArrayList();
      List<Integer> newSortOrder = Lists.newArrayList();
      List<Integer> newSortNullOrder = Lists.newArrayList();
      int numPartAndBuck = partitionPositions.size();

      keyColsPosInVal.addAll(partitionPositions);
      if (!bucketColumns.isEmpty()
          || writeType == Operation.DELETE
          || writeType == Operation.UPDATE) {
        keyColsPosInVal.add(-1);
        numPartAndBuck += 1;
      }
      keyColsPosInVal.addAll(sortPositions);

      // by default partition and bucket columns are sorted in ascending order
      Integer order = 1;
      if (sortOrder != null && !sortOrder.isEmpty()) {
        if (sortOrder.get(0).intValue() == 0) {
          order = 0;
        }
      }
      for (int i = 0; i < numPartAndBuck; i++) {
        newSortOrder.add(order);
      }
      newSortOrder.addAll(sortOrder);

      String orderStr = "";
      for (Integer i : newSortOrder) {
        if (i.intValue() == 1) {
          orderStr += "+";
        } else {
          orderStr += "-";
        }
      }

      // if partition and bucket columns are sorted in ascending order, by default
      // nulls come first; otherwise nulls come last
      Integer nullOrder = order == 1 ? 0 : 1;
      if (sortNullOrder != null && !sortNullOrder.isEmpty()) {
        if (sortNullOrder.get(0).intValue() == 0) {
          nullOrder = 0;
        } else {
          nullOrder = 1;
        }
      }
      for (int i = 0; i < numPartAndBuck; i++) {
        newSortNullOrder.add(nullOrder);
      }
      newSortNullOrder.addAll(sortNullOrder);

      String nullOrderStr = "";
      for (Integer i : newSortNullOrder) {
        if (i.intValue() == 0) {
          nullOrderStr += "a";
        } else {
          nullOrderStr += "z";
        }
      }

      Map<String, ExprNodeDesc> colExprMap = Maps.newHashMap();
      ArrayList<ExprNodeDesc> partCols = Lists.newArrayList();

      // we will clone here as RS will update bucket column key with its
      // corresponding with bucket number and hence their OIs
      for (Integer idx : keyColsPosInVal) {
        if (idx < 0) {
          ExprNodeConstantDesc bucketNumCol =
              new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, BUCKET_NUMBER_COL_NAME);
          keyCols.add(bucketNumCol);
          colExprMap.put(
              Utilities.ReduceField.KEY + ".'" + BUCKET_NUMBER_COL_NAME + "'", bucketNumCol);
        } else {
          keyCols.add(allCols.get(idx).clone());
        }
      }

      ArrayList<ExprNodeDesc> valCols = Lists.newArrayList();
      for (int i = 0; i < allCols.size(); i++) {
        if (!keyColsPosInVal.contains(i)) {
          valCols.add(allCols.get(i).clone());
        }
      }

      for (Integer idx : partitionPositions) {
        partCols.add(allCols.get(idx).clone());
      }

      // in the absence of SORTED BY clause, the sorted dynamic partition insert
      // should honor the ordering of records provided by ORDER BY in SELECT statement
      ReduceSinkOperator parentRSOp =
          OperatorUtils.findSingleOperatorUpstream(parent, ReduceSinkOperator.class);
      if (parentRSOp != null && parseCtx.getQueryProperties().hasOuterOrderBy()) {
        String parentRSOpOrder = parentRSOp.getConf().getOrder();
        String parentRSOpNullOrder = parentRSOp.getConf().getNullOrder();
        if (parentRSOpOrder != null && !parentRSOpOrder.isEmpty() && sortPositions.isEmpty()) {
          keyCols.addAll(parentRSOp.getConf().getKeyCols());
          orderStr += parentRSOpOrder;
          nullOrderStr += parentRSOpNullOrder;
        }
      }

      // map _col0 to KEY._col0, etc
      Map<String, String> nameMapping = new HashMap<>();
      ArrayList<String> keyColNames = Lists.newArrayList();
      for (ExprNodeDesc keyCol : keyCols) {
        String keyColName = keyCol.getExprString();
        keyColNames.add(keyColName);
        colExprMap.put(Utilities.ReduceField.KEY + "." + keyColName, keyCol);
        nameMapping.put(keyColName, Utilities.ReduceField.KEY + "." + keyColName);
      }
      ArrayList<String> valColNames = Lists.newArrayList();
      for (ExprNodeDesc valCol : valCols) {
        String colName = valCol.getExprString();
        valColNames.add(colName);
        colExprMap.put(Utilities.ReduceField.VALUE + "." + colName, valCol);
        nameMapping.put(colName, Utilities.ReduceField.VALUE + "." + colName);
      }

      // Create Key/Value TableDesc. When the operator plan is split into MR tasks,
      // the reduce operator will initialize Extract operator with information
      // from Key and Value TableDesc
      List<FieldSchema> fields =
          PlanUtils.getFieldSchemasFromColumnList(keyCols, keyColNames, 0, "");
      TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, orderStr, nullOrderStr);
      List<FieldSchema> valFields =
          PlanUtils.getFieldSchemasFromColumnList(valCols, valColNames, 0, "");
      TableDesc valueTable = PlanUtils.getReduceValueTableDesc(valFields);
      List<List<Integer>> distinctColumnIndices = Lists.newArrayList();

      // Number of reducers is set to default (-1)
      ReduceSinkDesc rsConf =
          new ReduceSinkDesc(
              keyCols,
              keyCols.size(),
              valCols,
              keyColNames,
              distinctColumnIndices,
              valColNames,
              -1,
              partCols,
              -1,
              keyTable,
              valueTable,
              writeType);
      rsConf.setBucketCols(bucketColumns);
      rsConf.setNumBuckets(numBuckets);

      ArrayList<ColumnInfo> signature = new ArrayList<>();
      for (int index = 0; index < parent.getSchema().getSignature().size(); index++) {
        ColumnInfo colInfo = new ColumnInfo(parent.getSchema().getSignature().get(index));
        colInfo.setInternalName(nameMapping.get(colInfo.getInternalName()));
        signature.add(colInfo);
      }
      ReduceSinkOperator op =
          (ReduceSinkOperator)
              OperatorFactory.getAndMakeChild(rsConf, new RowSchema(signature), parent);
      op.setColumnExprMap(colExprMap);
      return op;
    }

Ejemplo n.º 6

Mostrar archivo

Archivo: ColumnPrunerProcFactory.java Proyecto: cschenyuan/hive-hack

  private static void pruneJoinOperator(
      NodeProcessorCtx ctx,
      CommonJoinOperator op,
      JoinDesc conf,
      Map<String, ExprNodeDesc> columnExprMap,
      Map<Byte, List<Integer>> retainMap,
      boolean mapJoin)
      throws SemanticException {
    ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
    List<Operator<? extends OperatorDesc>> childOperators = op.getChildOperators();

    LOG.info("JOIN " + op.getIdentifier() + " oldExprs: " + conf.getExprs());
    List<String> childColLists = cppCtx.genColLists(op);
    if (childColLists == null) {
      return;
    }

    Map<Byte, List<String>> prunedColLists = new HashMap<Byte, List<String>>();
    for (byte tag : conf.getTagOrder()) {
      prunedColLists.put(tag, new ArrayList<String>());
    }

    // add the columns in join filters
    Set<Map.Entry<Byte, List<ExprNodeDesc>>> filters = conf.getFilters().entrySet();
    Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iter = filters.iterator();
    while (iter.hasNext()) {
      Map.Entry<Byte, List<ExprNodeDesc>> entry = iter.next();
      Byte tag = entry.getKey();
      for (ExprNodeDesc desc : entry.getValue()) {
        List<String> cols = prunedColLists.get(tag);
        cols = Utilities.mergeUniqElems(cols, desc.getCols());
        prunedColLists.put(tag, cols);
      }
    }

    RowResolver joinRR = cppCtx.getOpToParseCtxMap().get(op).getRowResolver();
    RowResolver newJoinRR = new RowResolver();
    ArrayList<String> outputCols = new ArrayList<String>();
    ArrayList<ColumnInfo> rs = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> newColExprMap = new HashMap<String, ExprNodeDesc>();

    for (int i = 0; i < conf.getOutputColumnNames().size(); i++) {
      String internalName = conf.getOutputColumnNames().get(i);
      ExprNodeDesc desc = columnExprMap.get(internalName);
      Byte tag = conf.getReversedExprs().get(internalName);
      if (!childColLists.contains(internalName)) {
        int index = conf.getExprs().get(tag).indexOf(desc);
        if (index < 0) {
          continue;
        }
        conf.getExprs().get(tag).remove(desc);
        if (retainMap != null) {
          retainMap.get(tag).remove(index);
        }
      } else {
        List<String> prunedRSList = prunedColLists.get(tag);
        if (prunedRSList == null) {
          prunedRSList = new ArrayList<String>();
          prunedColLists.put(tag, prunedRSList);
        }
        prunedRSList = Utilities.mergeUniqElems(prunedRSList, desc.getCols());
        outputCols.add(internalName);
        newColExprMap.put(internalName, desc);
      }
    }

    if (mapJoin) {
      // regenerate the valueTableDesc
      List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
      for (int pos = 0; pos < op.getParentOperators().size(); pos++) {
        List<ExprNodeDesc> valueCols = conf.getExprs().get(Byte.valueOf((byte) pos));
        StringBuilder keyOrder = new StringBuilder();
        for (int i = 0; i < valueCols.size(); i++) {
          keyOrder.append("+");
        }

        TableDesc valueTableDesc =
            PlanUtils.getMapJoinValueTableDesc(
                PlanUtils.getFieldSchemasFromColumnList(valueCols, "mapjoinvalue"));

        valueTableDescs.add(valueTableDesc);
      }
      ((MapJoinDesc) conf).setValueTblDescs(valueTableDescs);

      Set<Map.Entry<Byte, List<ExprNodeDesc>>> exprs = ((MapJoinDesc) conf).getKeys().entrySet();
      Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iters = exprs.iterator();
      while (iters.hasNext()) {
        Map.Entry<Byte, List<ExprNodeDesc>> entry = iters.next();
        List<ExprNodeDesc> lists = entry.getValue();
        for (int j = 0; j < lists.size(); j++) {
          ExprNodeDesc desc = lists.get(j);
          Byte tag = entry.getKey();
          List<String> cols = prunedColLists.get(tag);
          cols = Utilities.mergeUniqElems(cols, desc.getCols());
          prunedColLists.put(tag, cols);
        }
      }
    }

    for (Operator<? extends OperatorDesc> child : childOperators) {
      if (child instanceof ReduceSinkOperator) {
        boolean[] flags =
            getPruneReduceSinkOpRetainFlags(childColLists, (ReduceSinkOperator) child);
        pruneReduceSinkOperator(flags, (ReduceSinkOperator) child, cppCtx);
      }
    }

    for (int i = 0; i < outputCols.size(); i++) {
      String internalName = outputCols.get(i);
      String[] nm = joinRR.reverseLookup(internalName);
      ColumnInfo col = joinRR.get(nm[0], nm[1]);
      newJoinRR.put(nm[0], nm[1], col);
      rs.add(col);
    }

    LOG.info("JOIN " + op.getIdentifier() + " newExprs: " + conf.getExprs());
    op.setColumnExprMap(newColExprMap);
    conf.setOutputColumnNames(outputCols);
    op.getSchema().setSignature(rs);
    cppCtx.getOpToParseCtxMap().get(op).setRowResolver(newJoinRR);
    cppCtx.getJoinPrunedColLists().put(op, prunedColLists);
  }