Esempio n. 1
0
  /**
   * Build ExprNodeColumnDesc for the projections in the input operator from sartpos to endpos(both
   * included). Operator must have an associated colExprMap.
   *
   * @param inputOp Input Hive Operator
   * @param startPos starting position in the input operator schema; must be >=0 and <= endPos
   * @param endPos end position in the input operator schema; must be >=0.
   * @return List of ExprNodeDesc
   */
  public static ArrayList<ExprNodeDesc> genExprNodeDesc(
      Operator inputOp,
      int startPos,
      int endPos,
      boolean addEmptyTabAlias,
      boolean setColToNonVirtual) {
    ArrayList<ExprNodeDesc> exprColLst = new ArrayList<ExprNodeDesc>();
    List<ColumnInfo> colInfoLst = inputOp.getSchema().getSignature();

    String tabAlias;
    boolean vc;
    ColumnInfo ci;
    for (int i = startPos; i <= endPos; i++) {
      ci = colInfoLst.get(i);
      tabAlias = ci.getTabAlias();
      if (addEmptyTabAlias) {
        tabAlias = "";
      }
      vc = ci.getIsVirtualCol();
      if (setColToNonVirtual) {
        vc = false;
      }
      exprColLst.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), tabAlias, vc));
    }

    return exprColLst;
  }
  public static void setupNeededColumns(
      TableScanOperator scanOp, RowResolver inputRR, List<String> cols) throws SemanticException {
    List<Integer> neededColumnIds = new ArrayList<Integer>();
    List<String> neededColumnNames = new ArrayList<String>();
    List<String> referencedColumnNames = new ArrayList<String>();
    TableScanDesc desc = scanOp.getConf();
    List<VirtualColumn> virtualCols = desc.getVirtualCols();
    List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>();

    // add virtual columns for ANALYZE TABLE
    if (scanOp.getConf().isGatherStats()) {
      cols.add(VirtualColumn.RAWDATASIZE.getName());
    }

    for (String column : cols) {
      String[] tabCol = inputRR.reverseLookup(column);
      if (tabCol == null) {
        continue;
      }
      referencedColumnNames.add(column);
      ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]);
      if (colInfo.getIsVirtualCol()) {
        // part is also a virtual column, but part col should not in this
        // list.
        for (int j = 0; j < virtualCols.size(); j++) {
          VirtualColumn vc = virtualCols.get(j);
          if (vc.getName().equals(colInfo.getInternalName())) {
            newVirtualCols.add(vc);
          }
        }
        // no need to pass virtual columns to reader.
        continue;
      }
      int position = inputRR.getPosition(column);
      if (position >= 0) {
        // get the needed columns by id and name
        neededColumnIds.add(position);
        neededColumnNames.add(column);
      }
    }

    desc.setVirtualCols(newVirtualCols);
    scanOp.setNeededColumnIDs(neededColumnIds);
    scanOp.setNeededColumns(neededColumnNames);
    scanOp.setReferencedColumns(referencedColumnNames);
  }
    @Override
    protected ExprNodeColumnDesc processQualifiedColRef(
        TypeCheckCtx ctx, ASTNode expr, Object... nodeOutputs) throws SemanticException {
      String tableAlias =
          BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText());
      // NOTE: tableAlias must be a valid non-ambiguous table alias,
      // because we've checked that in TOK_TABLE_OR_COL's process method.
      ColumnInfo colInfo =
          getColInfo(
              (JoinTypeCheckCtx) ctx,
              tableAlias,
              ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString(),
              expr);

      if (colInfo == null) {
        ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr);
        return null;
      }
      return new ExprNodeColumnDesc(
          colInfo.getType(), colInfo.getInternalName(), tableAlias, colInfo.getIsVirtualCol());
    }
Esempio n. 4
0
  private void createMapReduce4Merge(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName)
      throws SemanticException {
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    RowSchema inputRS = fsOp.getSchema();

    // create a reduce Sink operator - key is the first column
    ArrayList<ExprNodeDesc> keyCols = new ArrayList<ExprNodeDesc>();
    keyCols.add(TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand"));

    // value is all the columns in the FileSink operator input
    ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
    for (ColumnInfo ci : inputRS.getSignature()) {
      valueCols.add(
          new ExprNodeColumnDesc(
              ci.getType(), ci.getInternalName(), ci.getTabAlias(), ci.getIsVirtualCol()));
    }

    // create a dummy tableScan operator
    Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS);

    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < valueCols.size(); i++) {
      outputColumns.add(SemanticAnalyzer.getColumnInternalName(i));
    }

    ReduceSinkDesc rsDesc =
        PlanUtils.getReduceSinkDesc(
            new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1, -1);
    OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge);
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsConf = fsOp.getConf();

    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver();
    Integer pos = Integer.valueOf(0);
    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(
          info[0],
          info[1],
          new ColumnInfo(
              pos.toString(),
              colInfo.getType(),
              info[0],
              colInfo.getIsVirtualCol(),
              colInfo.isHiddenVirtualCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }

    Operator<ExtractDesc> extract =
        OperatorFactory.getAndMakeChild(
            new ExtractDesc(
                new ExprNodeColumnDesc(
                    TypeInfoFactory.stringTypeInfo,
                    Utilities.ReduceField.VALUE.toString(),
                    "",
                    false)),
            new RowSchema(out_rwsch.getColumnInfos()));

    TableDesc ts = (TableDesc) fsConf.getTableInfo().clone();
    fsConf
        .getTableInfo()
        .getProperties()
        .remove(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);

    FileSinkDesc newFSD =
        new FileSinkDesc(
            finalName, ts, parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator newOutput =
        (FileSinkOperator) OperatorFactory.getAndMakeChild(newFSD, inputRS, extract);

    HiveConf conf = parseCtx.getConf();
    MapredWork cplan = createMergeTask(conf, tsMerge, fsConf);
    cplan.setReducer(extract);

    // NOTE: we should gather stats in MR1 (rather than the merge MR job)
    // since it is unknown if the merge MR will be triggered at execution time.

    MoveWork dummyMv =
        new MoveWork(
            null,
            null,
            null,
            new LoadFileDesc(fsConf.getDirName(), finalName, true, null, null),
            false);

    ConditionalTask cndTsk = createCondTask(conf, currTask, dummyMv, cplan, fsConf.getDirName());

    LinkMoveTask(ctx, newOutput, cndTsk);
  }
    @Override
    @SuppressWarnings("unchecked")
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {

      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
      RowResolver inputRR = cppCtx.getParseContext().getOpParseCtx().get(op).getRowResolver();

      List<String> prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0));
      Operator<? extends OperatorDesc> parent = op.getParentOperators().get(0);
      RowResolver parentRR = cppCtx.getParseContext().getOpParseCtx().get(parent).getRowResolver();
      List<ColumnInfo> sig = parentRR.getRowSchema().getSignature();
      List<String> colList = new ArrayList<String>();
      for (ColumnInfo cI : sig) {
        colList.add(cI.getInternalName());
      }

      if (prunedCols.size() != inputRR.getRowSchema().getSignature().size()
          && !(op.getChildOperators().get(0) instanceof SelectOperator)) {
        ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
        ArrayList<String> outputs = new ArrayList<String>();
        Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
        RowResolver outputRS = new RowResolver();
        for (String internalName : prunedCols) {
          String[] nm = inputRR.reverseLookup(internalName);
          ColumnInfo valueInfo = inputRR.get(nm[0], nm[1]);
          ExprNodeDesc colDesc =
              new ExprNodeColumnDesc(
                  valueInfo.getType(),
                  valueInfo.getInternalName(),
                  nm[0],
                  valueInfo.getIsVirtualCol());
          exprs.add(colDesc);
          outputs.add(internalName);
          outputRS.put(
              nm[0],
              nm[1],
              new ColumnInfo(
                  internalName,
                  valueInfo.getType(),
                  nm[0],
                  valueInfo.getIsVirtualCol(),
                  valueInfo.isHiddenVirtualCol()));
          colExprMap.put(internalName, colDesc);
        }
        SelectDesc select = new SelectDesc(exprs, outputs, false);

        Operator<? extends OperatorDesc> child = op.getChildOperators().get(0);
        op.removeChild(child);
        SelectOperator sel =
            (SelectOperator)
                OperatorFactory.getAndMakeChild(
                    select, new RowSchema(outputRS.getColumnInfos()), op);
        OperatorFactory.makeChild(sel, child);

        OpParseContext parseCtx = new OpParseContext(outputRS);
        cppCtx.getParseContext().getOpParseCtx().put(sel, parseCtx);

        sel.setColumnExprMap(colExprMap);
      }

      cppCtx.getPrunedColLists().put(op, colList);
      return null;
    }