// Remove RS and SEL introduced by enforce bucketing/sorting config // Convert PARENT -> RS -> SEL -> FS to PARENT -> FS private boolean removeRSInsertedByEnforceBucketing(FileSinkOperator fsOp) { Set<ReduceSinkOperator> reduceSinks = OperatorUtils.findOperatorsUpstream(fsOp, ReduceSinkOperator.class); Operator<? extends OperatorDesc> rsToRemove = null; List<ReduceSinkOperator> rsOps = parseCtx.getReduceSinkOperatorsAddedByEnforceBucketingSorting(); boolean found = false; // iterate through all RS and locate the one introduce by enforce bucketing for (ReduceSinkOperator reduceSink : reduceSinks) { for (ReduceSinkOperator rsOp : rsOps) { if (reduceSink.equals(rsOp)) { rsToRemove = reduceSink; found = true; break; } } if (found) { break; } } // iF RS is found remove it and its child (EX) and connect its parent // and grand child if (found) { Operator<? extends OperatorDesc> rsParent = rsToRemove.getParentOperators().get(0); Operator<? extends OperatorDesc> rsChild = rsToRemove.getChildOperators().get(0); Operator<? extends OperatorDesc> rsGrandChild = rsChild.getChildOperators().get(0); if (rsChild instanceof SelectOperator) { // if schema size cannot be matched, then it could be because of constant folding // converting partition column expression to constant expression. The constant // expression will then get pruned by column pruner since it will not reference to // any columns. if (rsParent.getSchema().getSignature().size() != rsChild.getSchema().getSignature().size()) { return false; } rsParent.getChildOperators().clear(); rsParent.getChildOperators().add(rsGrandChild); rsGrandChild.getParentOperators().clear(); rsGrandChild.getParentOperators().add(rsParent); LOG.info( "Removed " + rsToRemove.getOperatorId() + " and " + rsChild.getOperatorId() + " as it was introduced by enforce bucketing/sorting."); } } return true; }