Exemplo n.º 1
0
  /**
   * Build ExprNodeColumnDesc for the projections in the input operator from sartpos to endpos(both
   * included). Operator must have an associated colExprMap.
   *
   * @param inputOp Input Hive Operator
   * @param startPos starting position in the input operator schema; must be >=0 and <= endPos
   * @param endPos end position in the input operator schema; must be >=0.
   * @return List of ExprNodeDesc
   */
  public static ArrayList<ExprNodeDesc> genExprNodeDesc(
      Operator inputOp,
      int startPos,
      int endPos,
      boolean addEmptyTabAlias,
      boolean setColToNonVirtual) {
    ArrayList<ExprNodeDesc> exprColLst = new ArrayList<ExprNodeDesc>();
    List<ColumnInfo> colInfoLst = inputOp.getSchema().getSignature();

    String tabAlias;
    boolean vc;
    ColumnInfo ci;
    for (int i = startPos; i <= endPos; i++) {
      ci = colInfoLst.get(i);
      tabAlias = ci.getTabAlias();
      if (addEmptyTabAlias) {
        tabAlias = "";
      }
      vc = ci.getIsVirtualCol();
      if (setColToNonVirtual) {
        vc = false;
      }
      exprColLst.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), tabAlias, vc));
    }

    return exprColLst;
  }
  private static void pruneReduceSinkOperator(
      boolean[] retainFlags, ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx)
      throws SemanticException {
    ReduceSinkDesc reduceConf = reduce.getConf();
    Map<String, ExprNodeDesc> oldMap = reduce.getColumnExprMap();
    LOG.info("RS " + reduce.getIdentifier() + " oldColExprMap: " + oldMap);
    RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(reduce).getRowResolver();
    ArrayList<ColumnInfo> old_signature = oldRR.getRowSchema().getSignature();
    ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>(old_signature);

    List<String> valueColNames = reduceConf.getOutputValueColumnNames();
    ArrayList<String> newValueColNames = new ArrayList<String>();

    List<ExprNodeDesc> keyExprs = reduceConf.getKeyCols();
    List<ExprNodeDesc> valueExprs = reduceConf.getValueCols();
    ArrayList<ExprNodeDesc> newValueExprs = new ArrayList<ExprNodeDesc>();

    for (int i = 0; i < retainFlags.length; i++) {
      String outputCol = valueColNames.get(i);
      ExprNodeDesc outputColExpr = valueExprs.get(i);
      if (!retainFlags[i]) {
        String[] nm = oldRR.reverseLookup(outputCol);
        if (nm == null) {
          outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol;
          nm = oldRR.reverseLookup(outputCol);
        }

        // In case there are multiple columns referenced to the same column name, we won't
        // do row resolve once more because the ColumnInfo in row resolver is already removed
        if (nm == null) {
          continue;
        }

        // Only remove information of a column if it is not a key,
        // i.e. this column is not appearing in keyExprs of the RS
        if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) {
          ColumnInfo colInfo = oldRR.getFieldMap(nm[0]).remove(nm[1]);
          oldRR.getInvRslvMap().remove(colInfo.getInternalName());
          oldMap.remove(outputCol);
          signature.remove(colInfo);
        }

      } else {
        newValueColNames.add(outputCol);
        newValueExprs.add(outputColExpr);
      }
    }

    oldRR.getRowSchema().setSignature(signature);
    reduce.getSchema().setSignature(signature);
    reduceConf.setOutputValueColumnNames(newValueColNames);
    reduceConf.setValueCols(newValueExprs);
    TableDesc newValueTable =
        PlanUtils.getReduceValueTableDesc(
            PlanUtils.getFieldSchemasFromColumnList(
                reduceConf.getValueCols(), newValueColNames, 0, ""));
    reduceConf.setValueSerializeInfo(newValueTable);
    LOG.info("RS " + reduce.getIdentifier() + " newColExprMap: " + oldMap);
  }
Exemplo n.º 3
0
  public int getPosition(String internalName) {
    int pos = -1;

    for (ColumnInfo var : rowSchema.getSignature()) {
      ++pos;
      if (var.getInternalName().equals(internalName)) {
        return pos;
      }
    }

    return -1;
  }
 private RowResolver buildPrunedRR(
     List<String> prunedCols, RowResolver oldRR, ArrayList<ColumnInfo> sig)
     throws SemanticException {
   RowResolver newRR = new RowResolver();
   HashSet<String> prunedColsSet = new HashSet<String>(prunedCols);
   for (ColumnInfo cInfo : oldRR.getRowSchema().getSignature()) {
     if (prunedColsSet.contains(cInfo.getInternalName())) {
       String[] nm = oldRR.reverseLookup(cInfo.getInternalName());
       newRR.put(nm[0], nm[1], cInfo);
       sig.add(cInfo);
     }
   }
   return newRR;
 }
 /**
  * The pruning needs to preserve the order of columns in the input schema
  *
  * @param op
  * @param cols
  * @return
  * @throws SemanticException
  */
 private static List<String> preserveColumnOrder(
     Operator<? extends OperatorDesc> op, List<String> cols) throws SemanticException {
   RowSchema inputSchema = op.getSchema();
   if (inputSchema != null) {
     ArrayList<String> rs = new ArrayList<String>();
     ArrayList<ColumnInfo> inputCols = inputSchema.getSignature();
     for (ColumnInfo i : inputCols) {
       if (cols.contains(i.getInternalName())) {
         rs.add(i.getInternalName());
       }
     }
     return rs;
   } else {
     return cols;
   }
 }
  public static void setupNeededColumns(
      TableScanOperator scanOp, RowResolver inputRR, List<String> cols) throws SemanticException {
    List<Integer> neededColumnIds = new ArrayList<Integer>();
    List<String> neededColumnNames = new ArrayList<String>();
    List<String> referencedColumnNames = new ArrayList<String>();
    TableScanDesc desc = scanOp.getConf();
    List<VirtualColumn> virtualCols = desc.getVirtualCols();
    List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>();

    // add virtual columns for ANALYZE TABLE
    if (scanOp.getConf().isGatherStats()) {
      cols.add(VirtualColumn.RAWDATASIZE.getName());
    }

    for (String column : cols) {
      String[] tabCol = inputRR.reverseLookup(column);
      if (tabCol == null) {
        continue;
      }
      referencedColumnNames.add(column);
      ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]);
      if (colInfo.getIsVirtualCol()) {
        // part is also a virtual column, but part col should not in this
        // list.
        for (int j = 0; j < virtualCols.size(); j++) {
          VirtualColumn vc = virtualCols.get(j);
          if (vc.getName().equals(colInfo.getInternalName())) {
            newVirtualCols.add(vc);
          }
        }
        // no need to pass virtual columns to reader.
        continue;
      }
      int position = inputRR.getPosition(column);
      if (position >= 0) {
        // get the needed columns by id and name
        neededColumnIds.add(position);
        neededColumnNames.add(column);
      }
    }

    desc.setVirtualCols(newVirtualCols);
    scanOp.setNeededColumnIDs(neededColumnIds);
    scanOp.setNeededColumns(neededColumnNames);
    scanOp.setReferencedColumns(referencedColumnNames);
  }
Exemplo n.º 7
0
  public boolean addMappingOnly(String tab_alias, String col_alias, ColumnInfo colInfo) {
    if (tab_alias != null) {
      tab_alias = tab_alias.toLowerCase();
    }
    col_alias = col_alias.toLowerCase();

    /*
     * allow multiple mappings to the same ColumnInfo.
     * When a ColumnInfo is mapped multiple times, only the
     * first inverse mapping is captured.
     */
    boolean colPresent = invRslvMap.containsKey(colInfo.getInternalName());

    LinkedHashMap<String, ColumnInfo> f_map = rslvMap.get(tab_alias);
    if (f_map == null) {
      f_map = new LinkedHashMap<String, ColumnInfo>();
      rslvMap.put(tab_alias, f_map);
    }
    ColumnInfo oldColInfo = f_map.put(col_alias, colInfo);
    if (oldColInfo != null) {
      LOG.warn(
          "Duplicate column info for "
              + tab_alias
              + "."
              + col_alias
              + " was overwritten in RowResolver map: "
              + oldColInfo
              + " by "
              + colInfo);
    }

    String[] qualifiedAlias = new String[2];
    qualifiedAlias[0] = tab_alias;
    qualifiedAlias[1] = col_alias;
    if (!colPresent) {
      invRslvMap.put(colInfo.getInternalName(), qualifiedAlias);
    } else {
      altInvRslvMap.put(colInfo.getInternalName(), qualifiedAlias);
    }

    return colPresent;
  }
Exemplo n.º 8
0
  // TODO: 1) How to handle collisions? 2) Should we be cloning ColumnInfo or not?
  private static boolean add(
      RowResolver rrToAddTo, RowResolver rrToAddFrom, IntRef outputColPosRef, int numColumns)
      throws SemanticException {
    boolean hasDuplicates = false;
    String tabAlias;
    String colAlias;
    String[] qualifiedColName;
    int i = 0;

    int outputColPos = outputColPosRef == null ? 0 : outputColPosRef.val;
    for (ColumnInfo cInfoFrmInput : rrToAddFrom.getRowSchema().getSignature()) {
      if (numColumns >= 0 && i == numColumns) {
        break;
      }
      ColumnInfo newCI = null;
      String internalName = cInfoFrmInput.getInternalName();
      qualifiedColName = rrToAddFrom.reverseLookup(internalName);
      tabAlias = qualifiedColName[0];
      colAlias = qualifiedColName[1];

      newCI = new ColumnInfo(cInfoFrmInput);
      newCI.setInternalName(SemanticAnalyzer.getColumnInternalName(outputColPos));

      outputColPos++;

      boolean isUnique = rrToAddTo.putWithCheck(tabAlias, colAlias, internalName, newCI);
      hasDuplicates |= (!isUnique);

      qualifiedColName = rrToAddFrom.getAlternateMappings(internalName);
      if (qualifiedColName != null) {
        tabAlias = qualifiedColName[0];
        colAlias = qualifiedColName[1];
        rrToAddTo.put(tabAlias, colAlias, newCI);
      }
      i++;
    }

    if (outputColPosRef != null) {
      outputColPosRef.val = outputColPos;
    }
    return !hasDuplicates;
  }
 private static void pruneOperator(
     NodeProcessorCtx ctx, Operator<? extends OperatorDesc> op, List<String> cols)
     throws SemanticException {
   // the pruning needs to preserve the order of columns in the input schema
   RowSchema inputSchema = op.getSchema();
   if (inputSchema != null) {
     ArrayList<ColumnInfo> rs = new ArrayList<ColumnInfo>();
     RowResolver oldRR = ((ColumnPrunerProcCtx) ctx).getOpToParseCtxMap().get(op).getRowResolver();
     RowResolver newRR = new RowResolver();
     for (ColumnInfo i : oldRR.getRowSchema().getSignature()) {
       if (cols.contains(i.getInternalName())) {
         String[] nm = oldRR.reverseLookup(i.getInternalName());
         newRR.put(nm[0], nm[1], i);
         rs.add(i);
       }
     }
     ((ColumnPrunerProcCtx) ctx).getOpToParseCtxMap().get(op).setRowResolver(newRR);
     op.getSchema().setSignature(rs);
   }
 }
Exemplo n.º 10
0
    @Override
    protected ExprNodeColumnDesc processQualifiedColRef(
        TypeCheckCtx ctx, ASTNode expr, Object... nodeOutputs) throws SemanticException {
      String tableAlias =
          BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText());
      // NOTE: tableAlias must be a valid non-ambiguous table alias,
      // because we've checked that in TOK_TABLE_OR_COL's process method.
      ColumnInfo colInfo =
          getColInfo(
              (JoinTypeCheckCtx) ctx,
              tableAlias,
              ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString(),
              expr);

      if (colInfo == null) {
        ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr);
        return null;
      }
      return new ExprNodeColumnDesc(
          colInfo.getType(), colInfo.getInternalName(), tableAlias, colInfo.getIsVirtualCol());
    }
Exemplo n.º 11
0
 /**
  * Adds column to RR, checking for duplicate columns. Needed because CBO cannot handle the Hive
  * behavior of blindly overwriting old mapping in RR and still somehow working after that.
  *
  * @return True if mapping was added without duplicates.
  */
 public boolean putWithCheck(
     String tabAlias, String colAlias, String internalName, ColumnInfo newCI)
     throws SemanticException {
   ColumnInfo existing = get(tabAlias, colAlias);
   // Hive adds the same mapping twice... I wish we could fix stuff like that.
   if (existing == null) {
     put(tabAlias, colAlias, newCI);
     return true;
   } else if (existing.isSameColumnForRR(newCI)) {
     return true;
   }
   LOG.warn(
       "Found duplicate column alias in RR: "
           + existing.toMappingString(tabAlias, colAlias)
           + " adding "
           + newCI.toMappingString(tabAlias, colAlias));
   if (internalName != null) {
     existing = get(tabAlias, internalName);
     if (existing == null) {
       put(tabAlias, internalName, newCI);
       return true;
     } else if (existing.isSameColumnForRR(newCI)) {
       return true;
     }
     LOG.warn(
         "Failed to use internal name after finding a duplicate: "
             + existing.toMappingString(tabAlias, internalName));
   }
   return false;
 }
Exemplo n.º 12
0
  /*
   * add array<struct> to the list of columns
   */
  protected static RowResolver createSelectListRR(MatchPath evaluator, PTFInputDef inpDef)
      throws SemanticException {
    RowResolver rr = new RowResolver();
    RowResolver inputRR = inpDef.getOutputShape().getRr();

    evaluator.inputColumnNamesMap = new HashMap<String, String>();
    ArrayList<String> inputColumnNames = new ArrayList<String>();

    ArrayList<ObjectInspector> inpColOIs = new ArrayList<ObjectInspector>();

    for (ColumnInfo inpCInfo : inputRR.getColumnInfos()) {
      ColumnInfo cInfo = new ColumnInfo(inpCInfo);
      String colAlias = cInfo.getAlias();

      String[] tabColAlias = inputRR.reverseLookup(inpCInfo.getInternalName());
      if (tabColAlias != null) {
        colAlias = tabColAlias[1];
      }
      ASTNode inExpr = null;
      inExpr = PTFTranslator.getASTNode(inpCInfo, inputRR);
      if (inExpr != null) {
        rr.putExpression(inExpr, cInfo);
        colAlias = inExpr.toStringTree().toLowerCase();
      } else {
        colAlias = colAlias == null ? cInfo.getInternalName() : colAlias;
        rr.put(cInfo.getTabAlias(), colAlias, cInfo);
      }

      evaluator.inputColumnNamesMap.put(cInfo.getInternalName(), colAlias);
      inputColumnNames.add(colAlias);
      inpColOIs.add(cInfo.getObjectInspector());
    }

    StandardListObjectInspector pathAttrOI =
        ObjectInspectorFactory.getStandardListObjectInspector(
            ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, inpColOIs));

    ColumnInfo pathColumn =
        new ColumnInfo(
            PATHATTR_NAME,
            TypeInfoUtils.getTypeInfoFromObjectInspector(pathAttrOI),
            null,
            false,
            false);
    rr.put(null, PATHATTR_NAME, pathColumn);

    return rr;
  }
Exemplo n.º 13
0
  /**
   * Get a list of aliases for non-hidden columns
   *
   * @param max the maximum number of columns to return
   * @return a list of non-hidden column names no greater in size than max
   */
  public List<String> getReferenceableColumnAliases(String tableAlias, int max) {
    int count = 0;
    Set<String> columnNames = new LinkedHashSet<String>();

    int tables = rslvMap.size();

    Map<String, ColumnInfo> mapping = rslvMap.get(tableAlias);
    if (mapping != null) {
      for (Map.Entry<String, ColumnInfo> entry : mapping.entrySet()) {
        if (max > 0 && count >= max) {
          break;
        }
        ColumnInfo columnInfo = entry.getValue();
        if (!columnInfo.isHiddenVirtualCol()) {
          columnNames.add(entry.getKey());
          count++;
        }
      }
    } else {
      for (ColumnInfo columnInfo : getColumnInfos()) {
        if (max > 0 && count >= max) {
          break;
        }
        if (!columnInfo.isHiddenVirtualCol()) {
          String[] inverse = !isExprResolver ? reverseLookup(columnInfo.getInternalName()) : null;
          if (inverse != null) {
            columnNames.add(
                inverse[0] == null || tables <= 1 ? inverse[1] : inverse[0] + "." + inverse[1]);
          } else {
            columnNames.add(columnInfo.getAlias());
          }
          count++;
        }
      }
    }
    return new ArrayList<String>(columnNames);
  }
Exemplo n.º 14
0
  // main work
  @SuppressWarnings("unchecked")
  public static void analyzeHelper(Operator sinkOp, int level) {

    println(level, sinkOp.getClass());
    if (sinkOp instanceof TableScanOperator) {
      // System.out.println("=========== " +
      // opParseCtx.get(sinkOp).getRowResolver().tableOriginalName);

      // System.out.println("========= " + ((TableScanOperator)(sinkOp)).getNeededColumnIDs());
      // System.out.println("========= " + ((TableScanOperator)(sinkOp)).getNeededColumns());
      // System.out.println("======Table Desc " + ((TableScanOperator)(sinkOp)).getTableDesc());
      // System.out.println(qb.getTabNameForAlias("a"));
      // System.out.println(qb.getTabNameForAlias("b"));
    }

    println(level, "Column Expr Map: ");

    Map<String, ExprNodeDesc> map = sinkOp.getColumnExprMap();
    if (map != null && map.entrySet() != null) {
      for (Entry<String, ExprNodeDesc> entry : map.entrySet()) {
        if (entry.getValue() instanceof ExprNodeColumnDesc) {
          println(
              level,
              entry.getKey()
                  + ": "
                  + ((ExprNodeColumnDesc) entry.getValue()).getTabAlias()
                  + ((ExprNodeColumnDesc) entry.getValue()).getCols());
        } else if (entry.getValue() instanceof ExprNodeConstantDesc) {
          println(
              level,
              entry.getKey() + ":: " + ((ExprNodeConstantDesc) entry.getValue()).getExprString());
          // + ((ExprNodeConstantDesc)entry.getValue()).getCols());
        } else {
          println(level, entry.getValue().getExprString());
          // throw new RuntimeException("ExprNode Type does not supported!");
        }
      }
    }

    println(level, "Schema: ");
    RowSchema schema = sinkOp.getSchema();
    for (ColumnInfo info : schema.getSignature()) {
      println(level, info.getTabAlias() + "[" + info.getInternalName() + "]");
    }

    if (sinkOp instanceof JoinOperator) {

      // println(level, ((JoinOperator) sinkOp).getPosToAliasMap());
      // println(level, "Reversed Mapping: " + ((JoinOperator)sinkOp).getConf().getReversedExprs());
      // println(level, ((JoinOperator)sinkOp).getConf());

      // for (ExprNodeDesc nodeDesc: ((JoinOperator)sinkOp).getConf().getExprs()) {}
      // println(level, ((JoinOperator)sinkOp).getColumnExprMap());

      // for exprs
      /*
      for (List<ExprNodeDesc> lst : ((JoinOperator)sinkOp).getConf().getExprs().values()) {
      	printLevel(level);
      	for (ExprNodeDesc desc: lst) {
      		print(((ExprNodeColumnDesc)desc).getTabAlias() + " " + ((ExprNodeColumnDesc)desc).getCols());
      	}
      	println();
      }

      //for filters
      for (List<ExprNodeDesc> lst : ((JoinOperator)sinkOp).getConf().getFilters().values()) {
      	printLevel(level);
      	//print(((JoinOperator)sinkOp).getConf().getFilters());
      	for (ExprNodeDesc desc: lst) {
      		print(desc.getClass() + " ");
      		//print(((ExprNodeColumnDesc)desc).getTabAlias() + " " + ((ExprNodeColumnDesc)desc).getCols());
      	}
      	println();
      }

      println(level, "output");

      println(level, ((JoinOperator)sinkOp).getConf().getOutputColumnNames());
       */

      // println(level, ((JoinOperator)sinkOp).getConf().getExprsStringMap());
    }

    if (sinkOp instanceof ReduceSinkOperator) {
      // println(level, ((ReduceSinkOperator)sinkOp).getConf().getOutputKeyColumnNames());
      /*
      for (ExprNodeDesc desc: ((ReduceSinkOperator)sinkOp).getConf().getValueCols()) {
      	println(level, ((ExprNodeColumnDesc)desc).getTabAlias() + " "
      					+ ((ExprNodeColumnDesc)desc).getCols());
      }
       */

    }

    if (sinkOp instanceof SelectOperator) {
      /*
      for (ExprNodeDesc desc: ((SelectOperator)sinkOp).getConf().getColList()) {
      	println(level, ((ExprNodeColumnDesc)desc).getTabAlias() + " "
      					+ ((ExprNodeColumnDesc)desc).getCols());
      }*/
      // println(level, ((SelectOperator)sinkOp).getConf().getColList());
      // println(level, ((SelectOperator)sinkOp).getConf().getOutputColumnNames());
    }

    if (sinkOp instanceof TableScanOperator) {
      // TableScanDesc desc = ((TableScanOperator)sinkOp).getConf();
      // println(level, desc.getAlias());

      // println(level, desc.getFilterExpr());
      // println(level, desc.getBucketFileNameMapping());
      // println(level, desc.getVirtualCols());
      // println(level, desc.getPartColumns());
    }

    if (sinkOp instanceof FilterOperator) {
      println(level, ((FilterOperator) sinkOp).getConf().getPredicate().getExprString());
      // ExprNodeDesc desc = ((FilterOperator)sinkOp).getConf().getPredicate();
      // (ExprNodeGenericFuncDesc)((FilterOperator)sinkOp).getConf().getPredicate()
      // println(level, ((ExprNodeGenericFuncDesc)desc).getExprString());
      // println(level, ((ExprNodeGenericFuncDesc)desc).getCols());
    }

    if (sinkOp instanceof LimitOperator) {
      println(level, ((LimitOperator) sinkOp).getConf().getClass());
      // ExprNodeDesc desc = ((FilterOperator)sinkOp).getConf().getPredicate();
      // (ExprNodeGenericFuncDesc)((FilterOperator)sinkOp).getConf().getPredicate()
      // println(level, ((ExprNodeGenericFuncDesc)desc).getExprString());
      // println(level, ((ExprNodeGenericFuncDesc)desc).getCols());
    }

    List<Operator> lst = sinkOp.getParentOperators();
    if (lst != null) {
      for (Operator l : lst) {
        analyzeHelper(l, level + 1);
      }
    }
  }
    public ReduceSinkOperator getReduceSinkOp(
        List<Integer> partitionPositions,
        List<Integer> sortPositions,
        List<Integer> sortOrder,
        List<Integer> sortNullOrder,
        ArrayList<ExprNodeDesc> allCols,
        ArrayList<ExprNodeDesc> bucketColumns,
        int numBuckets,
        Operator<? extends OperatorDesc> parent,
        AcidUtils.Operation writeType)
        throws SemanticException {

      // Order of KEY columns
      // 1) Partition columns
      // 2) Bucket number column
      // 3) Sort columns
      Set<Integer> keyColsPosInVal = Sets.newLinkedHashSet();
      ArrayList<ExprNodeDesc> keyCols = Lists.newArrayList();
      List<Integer> newSortOrder = Lists.newArrayList();
      List<Integer> newSortNullOrder = Lists.newArrayList();
      int numPartAndBuck = partitionPositions.size();

      keyColsPosInVal.addAll(partitionPositions);
      if (!bucketColumns.isEmpty()
          || writeType == Operation.DELETE
          || writeType == Operation.UPDATE) {
        keyColsPosInVal.add(-1);
        numPartAndBuck += 1;
      }
      keyColsPosInVal.addAll(sortPositions);

      // by default partition and bucket columns are sorted in ascending order
      Integer order = 1;
      if (sortOrder != null && !sortOrder.isEmpty()) {
        if (sortOrder.get(0).intValue() == 0) {
          order = 0;
        }
      }
      for (int i = 0; i < numPartAndBuck; i++) {
        newSortOrder.add(order);
      }
      newSortOrder.addAll(sortOrder);

      String orderStr = "";
      for (Integer i : newSortOrder) {
        if (i.intValue() == 1) {
          orderStr += "+";
        } else {
          orderStr += "-";
        }
      }

      // if partition and bucket columns are sorted in ascending order, by default
      // nulls come first; otherwise nulls come last
      Integer nullOrder = order == 1 ? 0 : 1;
      if (sortNullOrder != null && !sortNullOrder.isEmpty()) {
        if (sortNullOrder.get(0).intValue() == 0) {
          nullOrder = 0;
        } else {
          nullOrder = 1;
        }
      }
      for (int i = 0; i < numPartAndBuck; i++) {
        newSortNullOrder.add(nullOrder);
      }
      newSortNullOrder.addAll(sortNullOrder);

      String nullOrderStr = "";
      for (Integer i : newSortNullOrder) {
        if (i.intValue() == 0) {
          nullOrderStr += "a";
        } else {
          nullOrderStr += "z";
        }
      }

      Map<String, ExprNodeDesc> colExprMap = Maps.newHashMap();
      ArrayList<ExprNodeDesc> partCols = Lists.newArrayList();

      // we will clone here as RS will update bucket column key with its
      // corresponding with bucket number and hence their OIs
      for (Integer idx : keyColsPosInVal) {
        if (idx < 0) {
          ExprNodeConstantDesc bucketNumCol =
              new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, BUCKET_NUMBER_COL_NAME);
          keyCols.add(bucketNumCol);
          colExprMap.put(
              Utilities.ReduceField.KEY + ".'" + BUCKET_NUMBER_COL_NAME + "'", bucketNumCol);
        } else {
          keyCols.add(allCols.get(idx).clone());
        }
      }

      ArrayList<ExprNodeDesc> valCols = Lists.newArrayList();
      for (int i = 0; i < allCols.size(); i++) {
        if (!keyColsPosInVal.contains(i)) {
          valCols.add(allCols.get(i).clone());
        }
      }

      for (Integer idx : partitionPositions) {
        partCols.add(allCols.get(idx).clone());
      }

      // in the absence of SORTED BY clause, the sorted dynamic partition insert
      // should honor the ordering of records provided by ORDER BY in SELECT statement
      ReduceSinkOperator parentRSOp =
          OperatorUtils.findSingleOperatorUpstream(parent, ReduceSinkOperator.class);
      if (parentRSOp != null && parseCtx.getQueryProperties().hasOuterOrderBy()) {
        String parentRSOpOrder = parentRSOp.getConf().getOrder();
        String parentRSOpNullOrder = parentRSOp.getConf().getNullOrder();
        if (parentRSOpOrder != null && !parentRSOpOrder.isEmpty() && sortPositions.isEmpty()) {
          keyCols.addAll(parentRSOp.getConf().getKeyCols());
          orderStr += parentRSOpOrder;
          nullOrderStr += parentRSOpNullOrder;
        }
      }

      // map _col0 to KEY._col0, etc
      Map<String, String> nameMapping = new HashMap<>();
      ArrayList<String> keyColNames = Lists.newArrayList();
      for (ExprNodeDesc keyCol : keyCols) {
        String keyColName = keyCol.getExprString();
        keyColNames.add(keyColName);
        colExprMap.put(Utilities.ReduceField.KEY + "." + keyColName, keyCol);
        nameMapping.put(keyColName, Utilities.ReduceField.KEY + "." + keyColName);
      }
      ArrayList<String> valColNames = Lists.newArrayList();
      for (ExprNodeDesc valCol : valCols) {
        String colName = valCol.getExprString();
        valColNames.add(colName);
        colExprMap.put(Utilities.ReduceField.VALUE + "." + colName, valCol);
        nameMapping.put(colName, Utilities.ReduceField.VALUE + "." + colName);
      }

      // Create Key/Value TableDesc. When the operator plan is split into MR tasks,
      // the reduce operator will initialize Extract operator with information
      // from Key and Value TableDesc
      List<FieldSchema> fields =
          PlanUtils.getFieldSchemasFromColumnList(keyCols, keyColNames, 0, "");
      TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, orderStr, nullOrderStr);
      List<FieldSchema> valFields =
          PlanUtils.getFieldSchemasFromColumnList(valCols, valColNames, 0, "");
      TableDesc valueTable = PlanUtils.getReduceValueTableDesc(valFields);
      List<List<Integer>> distinctColumnIndices = Lists.newArrayList();

      // Number of reducers is set to default (-1)
      ReduceSinkDesc rsConf =
          new ReduceSinkDesc(
              keyCols,
              keyCols.size(),
              valCols,
              keyColNames,
              distinctColumnIndices,
              valColNames,
              -1,
              partCols,
              -1,
              keyTable,
              valueTable,
              writeType);
      rsConf.setBucketCols(bucketColumns);
      rsConf.setNumBuckets(numBuckets);

      ArrayList<ColumnInfo> signature = new ArrayList<>();
      for (int index = 0; index < parent.getSchema().getSignature().size(); index++) {
        ColumnInfo colInfo = new ColumnInfo(parent.getSchema().getSignature().get(index));
        colInfo.setInternalName(nameMapping.get(colInfo.getInternalName()));
        signature.add(colInfo);
      }
      ReduceSinkOperator op =
          (ReduceSinkOperator)
              OperatorFactory.getAndMakeChild(rsConf, new RowSchema(signature), parent);
      op.setColumnExprMap(colExprMap);
      return op;
    }
Exemplo n.º 16
0
  private void createMapReduce4Merge(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName)
      throws SemanticException {
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    RowSchema inputRS = fsOp.getSchema();

    // create a reduce Sink operator - key is the first column
    ArrayList<ExprNodeDesc> keyCols = new ArrayList<ExprNodeDesc>();
    keyCols.add(TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand"));

    // value is all the columns in the FileSink operator input
    ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
    for (ColumnInfo ci : inputRS.getSignature()) {
      valueCols.add(
          new ExprNodeColumnDesc(
              ci.getType(), ci.getInternalName(), ci.getTabAlias(), ci.getIsVirtualCol()));
    }

    // create a dummy tableScan operator
    Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS);

    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < valueCols.size(); i++) {
      outputColumns.add(SemanticAnalyzer.getColumnInternalName(i));
    }

    ReduceSinkDesc rsDesc =
        PlanUtils.getReduceSinkDesc(
            new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1, -1);
    OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge);
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsConf = fsOp.getConf();

    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver();
    Integer pos = Integer.valueOf(0);
    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(
          info[0],
          info[1],
          new ColumnInfo(
              pos.toString(),
              colInfo.getType(),
              info[0],
              colInfo.getIsVirtualCol(),
              colInfo.isHiddenVirtualCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }

    Operator<ExtractDesc> extract =
        OperatorFactory.getAndMakeChild(
            new ExtractDesc(
                new ExprNodeColumnDesc(
                    TypeInfoFactory.stringTypeInfo,
                    Utilities.ReduceField.VALUE.toString(),
                    "",
                    false)),
            new RowSchema(out_rwsch.getColumnInfos()));

    TableDesc ts = (TableDesc) fsConf.getTableInfo().clone();
    fsConf
        .getTableInfo()
        .getProperties()
        .remove(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);

    FileSinkDesc newFSD =
        new FileSinkDesc(
            finalName, ts, parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator newOutput =
        (FileSinkOperator) OperatorFactory.getAndMakeChild(newFSD, inputRS, extract);

    HiveConf conf = parseCtx.getConf();
    MapredWork cplan = createMergeTask(conf, tsMerge, fsConf);
    cplan.setReducer(extract);

    // NOTE: we should gather stats in MR1 (rather than the merge MR job)
    // since it is unknown if the merge MR will be triggered at execution time.

    MoveWork dummyMv =
        new MoveWork(
            null,
            null,
            null,
            new LoadFileDesc(fsConf.getDirName(), finalName, true, null, null),
            false);

    ConditionalTask cndTsk = createCondTask(conf, currTask, dummyMv, cplan, fsConf.getDirName());

    LinkMoveTask(ctx, newOutput, cndTsk);
  }
    @Override
    @SuppressWarnings("unchecked")
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {

      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
      RowResolver inputRR = cppCtx.getParseContext().getOpParseCtx().get(op).getRowResolver();

      List<String> prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0));
      Operator<? extends OperatorDesc> parent = op.getParentOperators().get(0);
      RowResolver parentRR = cppCtx.getParseContext().getOpParseCtx().get(parent).getRowResolver();
      List<ColumnInfo> sig = parentRR.getRowSchema().getSignature();
      List<String> colList = new ArrayList<String>();
      for (ColumnInfo cI : sig) {
        colList.add(cI.getInternalName());
      }

      if (prunedCols.size() != inputRR.getRowSchema().getSignature().size()
          && !(op.getChildOperators().get(0) instanceof SelectOperator)) {
        ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
        ArrayList<String> outputs = new ArrayList<String>();
        Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
        RowResolver outputRS = new RowResolver();
        for (String internalName : prunedCols) {
          String[] nm = inputRR.reverseLookup(internalName);
          ColumnInfo valueInfo = inputRR.get(nm[0], nm[1]);
          ExprNodeDesc colDesc =
              new ExprNodeColumnDesc(
                  valueInfo.getType(),
                  valueInfo.getInternalName(),
                  nm[0],
                  valueInfo.getIsVirtualCol());
          exprs.add(colDesc);
          outputs.add(internalName);
          outputRS.put(
              nm[0],
              nm[1],
              new ColumnInfo(
                  internalName,
                  valueInfo.getType(),
                  nm[0],
                  valueInfo.getIsVirtualCol(),
                  valueInfo.isHiddenVirtualCol()));
          colExprMap.put(internalName, colDesc);
        }
        SelectDesc select = new SelectDesc(exprs, outputs, false);

        Operator<? extends OperatorDesc> child = op.getChildOperators().get(0);
        op.removeChild(child);
        SelectOperator sel =
            (SelectOperator)
                OperatorFactory.getAndMakeChild(
                    select, new RowSchema(outputRS.getColumnInfos()), op);
        OperatorFactory.makeChild(sel, child);

        OpParseContext parseCtx = new OpParseContext(outputRS);
        cppCtx.getParseContext().getOpParseCtx().put(sel, parseCtx);

        sel.setColumnExprMap(colExprMap);
      }

      cppCtx.getPrunedColLists().put(op, colList);
      return null;
    }