Exemplo n.º 1
0
 // check same filter exists already
 private boolean filterExists(ReduceSinkOperator target, ExprNodeDesc replaced) {
   Operator<?> operator = target.getParentOperators().get(0);
   for (; operator instanceof FilterOperator; operator = operator.getParentOperators().get(0)) {
     ExprNodeDesc predicate = ((FilterOperator) operator).getConf().getPredicate();
     if (ExprNodeDescUtils.containsPredicate(predicate, replaced)) {
       return true;
     }
   }
   return false;
 }
 // Remove the reduce sink operator
 // Use BucketizedHiveInputFormat so that one mapper processes exactly one file
 private void removeReduceSink(
     ReduceSinkOperator rsOp, TableScanOperator tsOp, FileSinkOperator fsOp) {
   Operator<? extends OperatorDesc> parRSOp = rsOp.getParentOperators().get(0);
   parRSOp.getChildOperators().set(0, fsOp);
   fsOp.getParentOperators().set(0, parRSOp);
   fsOp.getConf().setMultiFileSpray(false);
   fsOp.getConf().setTotalFiles(1);
   fsOp.getConf().setNumFiles(1);
   fsOp.getConf().setRemovedReduceSinkBucketSort(true);
   tsOp.setUseBucketizedHiveInputFormat(true);
 }
Exemplo n.º 3
0
  @Override
  public ParseContext transform(ParseContext pctx) throws SemanticException {
    pGraphContext = pctx;

    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(
        new RuleRegExp(
            "R1",
            "("
                + FilterOperator.getOperatorName()
                + "%"
                + ReduceSinkOperator.getOperatorName()
                + "%"
                + JoinOperator.getOperatorName()
                + "%)"),
        new JoinTransitive());

    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    TransitiveContext context = new TransitiveContext();
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, context);
    GraphWalker ogw = new LevelOrderWalker(disp, 2);

    // Create a list of topop nodes
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);

    Map<ReduceSinkOperator, List<ExprNodeDesc>> newFilters = context.getNewfilters();

    // insert new filter between RS and parent of RS
    for (Map.Entry<ReduceSinkOperator, List<ExprNodeDesc>> entry : newFilters.entrySet()) {
      ReduceSinkOperator reducer = entry.getKey();
      Operator<?> parent = reducer.getParentOperators().get(0);

      List<ExprNodeDesc> exprs = entry.getValue();
      if (parent instanceof FilterOperator) {
        exprs = ExprNodeDescUtils.split(((FilterOperator) parent).getConf().getPredicate(), exprs);
        ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs);
        ((FilterOperator) parent).getConf().setPredicate(merged);
      } else {
        ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs);
        RowSchema parentRS = parent.getSchema();
        Operator<FilterDesc> newFilter = createFilter(reducer, parent, parentRS, merged);
      }
    }

    return pGraphContext;
  }
Exemplo n.º 4
0
  /**
   * Initialize the current plan by adding it to root tasks.
   *
   * @param op the reduce sink operator encountered
   * @param opProcCtx processing context
   */
  public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx)
      throws SemanticException {
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
    MapredWork plan = (MapredWork) currTask.getWork();
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap =
        opProcCtx.getOpTaskMap();
    Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();

    opTaskMap.put(reducer, currTask);
    plan.setReducer(reducer);
    ReduceSinkDesc desc = op.getConf();

    plan.setNumReduceTasks(desc.getNumReducers());

    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();

    if (!rootTasks.contains(currTask)) {
      rootTasks.add(currTask);
    }
    if (reducer.getClass() == JoinOperator.class) {
      plan.setNeedsTagging(true);
    }

    assert currTopOp != null;
    List<Operator<? extends Serializable>> seenOps = opProcCtx.getSeenOps();
    String currAliasId = opProcCtx.getCurrAliasId();

    if (!seenOps.contains(currTopOp)) {
      seenOps.add(currTopOp);
      setTaskPlan(currAliasId, currTopOp, plan, false, opProcCtx);
    }

    currTopOp = null;
    currAliasId = null;

    opProcCtx.setCurrTask(currTask);
    opProcCtx.setCurrTopOp(currTopOp);
    opProcCtx.setCurrAliasId(currAliasId);
  }
Exemplo n.º 5
0
  /**
   * Initialize the current union plan.
   *
   * @param op the reduce sink operator encountered
   * @param opProcCtx processing context
   */
  public static void initUnionPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx)
      throws SemanticException {
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
    MapredWork plan = (MapredWork) currTask.getWork();
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap =
        opProcCtx.getOpTaskMap();

    opTaskMap.put(reducer, currTask);
    plan.setReducer(reducer);
    ReduceSinkDesc desc = op.getConf();

    plan.setNumReduceTasks(desc.getNumReducers());

    if (reducer.getClass() == JoinOperator.class) {
      plan.setNeedsTagging(true);
    }

    initUnionPlan(opProcCtx, currTask, false);
  }