Ejemplo n.º 1
0
  /**
   * Get a list of aliases for non-hidden columns
   *
   * @param max the maximum number of columns to return
   * @return a list of non-hidden column names no greater in size than max
   */
  public List<String> getReferenceableColumnAliases(String tableAlias, int max) {
    int count = 0;
    Set<String> columnNames = new LinkedHashSet<String>();

    int tables = rslvMap.size();

    Map<String, ColumnInfo> mapping = rslvMap.get(tableAlias);
    if (mapping != null) {
      for (Map.Entry<String, ColumnInfo> entry : mapping.entrySet()) {
        if (max > 0 && count >= max) {
          break;
        }
        ColumnInfo columnInfo = entry.getValue();
        if (!columnInfo.isHiddenVirtualCol()) {
          columnNames.add(entry.getKey());
          count++;
        }
      }
    } else {
      for (ColumnInfo columnInfo : getColumnInfos()) {
        if (max > 0 && count >= max) {
          break;
        }
        if (!columnInfo.isHiddenVirtualCol()) {
          String[] inverse = !isExprResolver ? reverseLookup(columnInfo.getInternalName()) : null;
          if (inverse != null) {
            columnNames.add(
                inverse[0] == null || tables <= 1 ? inverse[1] : inverse[0] + "." + inverse[1]);
          } else {
            columnNames.add(columnInfo.getAlias());
          }
          count++;
        }
      }
    }
    return new ArrayList<String>(columnNames);
  }
Ejemplo n.º 2
0
  private void createMapReduce4Merge(FileSinkOperator fsOp, GenMRProcContext ctx, String finalName)
      throws SemanticException {
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    RowSchema inputRS = fsOp.getSchema();

    // create a reduce Sink operator - key is the first column
    ArrayList<ExprNodeDesc> keyCols = new ArrayList<ExprNodeDesc>();
    keyCols.add(TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand"));

    // value is all the columns in the FileSink operator input
    ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
    for (ColumnInfo ci : inputRS.getSignature()) {
      valueCols.add(
          new ExprNodeColumnDesc(
              ci.getType(), ci.getInternalName(), ci.getTabAlias(), ci.getIsVirtualCol()));
    }

    // create a dummy tableScan operator
    Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS);

    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < valueCols.size(); i++) {
      outputColumns.add(SemanticAnalyzer.getColumnInternalName(i));
    }

    ReduceSinkDesc rsDesc =
        PlanUtils.getReduceSinkDesc(
            new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1, -1);
    OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge);
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsConf = fsOp.getConf();

    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver();
    Integer pos = Integer.valueOf(0);
    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(
          info[0],
          info[1],
          new ColumnInfo(
              pos.toString(),
              colInfo.getType(),
              info[0],
              colInfo.getIsVirtualCol(),
              colInfo.isHiddenVirtualCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }

    Operator<ExtractDesc> extract =
        OperatorFactory.getAndMakeChild(
            new ExtractDesc(
                new ExprNodeColumnDesc(
                    TypeInfoFactory.stringTypeInfo,
                    Utilities.ReduceField.VALUE.toString(),
                    "",
                    false)),
            new RowSchema(out_rwsch.getColumnInfos()));

    TableDesc ts = (TableDesc) fsConf.getTableInfo().clone();
    fsConf
        .getTableInfo()
        .getProperties()
        .remove(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);

    FileSinkDesc newFSD =
        new FileSinkDesc(
            finalName, ts, parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator newOutput =
        (FileSinkOperator) OperatorFactory.getAndMakeChild(newFSD, inputRS, extract);

    HiveConf conf = parseCtx.getConf();
    MapredWork cplan = createMergeTask(conf, tsMerge, fsConf);
    cplan.setReducer(extract);

    // NOTE: we should gather stats in MR1 (rather than the merge MR job)
    // since it is unknown if the merge MR will be triggered at execution time.

    MoveWork dummyMv =
        new MoveWork(
            null,
            null,
            null,
            new LoadFileDesc(fsConf.getDirName(), finalName, true, null, null),
            false);

    ConditionalTask cndTsk = createCondTask(conf, currTask, dummyMv, cplan, fsConf.getDirName());

    LinkMoveTask(ctx, newOutput, cndTsk);
  }
    @Override
    @SuppressWarnings("unchecked")
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {

      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
      Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
      RowResolver inputRR = cppCtx.getParseContext().getOpParseCtx().get(op).getRowResolver();

      List<String> prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0));
      Operator<? extends OperatorDesc> parent = op.getParentOperators().get(0);
      RowResolver parentRR = cppCtx.getParseContext().getOpParseCtx().get(parent).getRowResolver();
      List<ColumnInfo> sig = parentRR.getRowSchema().getSignature();
      List<String> colList = new ArrayList<String>();
      for (ColumnInfo cI : sig) {
        colList.add(cI.getInternalName());
      }

      if (prunedCols.size() != inputRR.getRowSchema().getSignature().size()
          && !(op.getChildOperators().get(0) instanceof SelectOperator)) {
        ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
        ArrayList<String> outputs = new ArrayList<String>();
        Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
        RowResolver outputRS = new RowResolver();
        for (String internalName : prunedCols) {
          String[] nm = inputRR.reverseLookup(internalName);
          ColumnInfo valueInfo = inputRR.get(nm[0], nm[1]);
          ExprNodeDesc colDesc =
              new ExprNodeColumnDesc(
                  valueInfo.getType(),
                  valueInfo.getInternalName(),
                  nm[0],
                  valueInfo.getIsVirtualCol());
          exprs.add(colDesc);
          outputs.add(internalName);
          outputRS.put(
              nm[0],
              nm[1],
              new ColumnInfo(
                  internalName,
                  valueInfo.getType(),
                  nm[0],
                  valueInfo.getIsVirtualCol(),
                  valueInfo.isHiddenVirtualCol()));
          colExprMap.put(internalName, colDesc);
        }
        SelectDesc select = new SelectDesc(exprs, outputs, false);

        Operator<? extends OperatorDesc> child = op.getChildOperators().get(0);
        op.removeChild(child);
        SelectOperator sel =
            (SelectOperator)
                OperatorFactory.getAndMakeChild(
                    select, new RowSchema(outputRS.getColumnInfos()), op);
        OperatorFactory.makeChild(sel, child);

        OpParseContext parseCtx = new OpParseContext(outputRS);
        cppCtx.getParseContext().getOpParseCtx().put(sel, parseCtx);

        sel.setColumnExprMap(colExprMap);
      }

      cppCtx.getPrunedColLists().put(op, colList);
      return null;
    }