private ILogicalExpression makeCondition( List<Mutable<ILogicalExpression>> predList, IOptimizationContext context) { if (predList.size() > 1) { IFunctionInfo finfo = context.getMetadataProvider().lookupFunction(AlgebricksBuiltinFunctions.AND); return new ScalarFunctionCallExpression(finfo, predList); } else { return predList.get(0).getValue(); } }
private int removeFromAssigns( AbstractLogicalOperator op, Set<LogicalVariable> toRemove, IOptimizationContext context) throws AlgebricksException { switch (op.getOperatorTag()) { case ASSIGN: { AssignOperator assign = (AssignOperator) op; if (removeUnusedVarsAndExprs(toRemove, assign.getVariables(), assign.getExpressions())) { context.computeAndSetTypeEnvironmentForOperator(assign); } return assign.getVariables().size(); } case AGGREGATE: { AggregateOperator agg = (AggregateOperator) op; if (removeUnusedVarsAndExprs(toRemove, agg.getVariables(), agg.getExpressions())) { context.computeAndSetTypeEnvironmentForOperator(agg); } return agg.getVariables().size(); } case UNNEST: { UnnestOperator uOp = (UnnestOperator) op; LogicalVariable pVar = uOp.getPositionalVariable(); if (pVar != null && toRemove.contains(pVar)) { uOp.setPositionalVariable(null); } break; } case UNIONALL: { UnionAllOperator unionOp = (UnionAllOperator) op; if (removeUnusedVarsFromUnionAll(unionOp, toRemove)) { context.computeAndSetTypeEnvironmentForOperator(unionOp); } return unionOp.getVariableMappings().size(); } } return -1; }
@Override public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException { if (context.checkIfInDontApplySet(this, opRef.getValue())) { return false; } Set<LogicalVariable> toRemove = new HashSet<LogicalVariable>(); collectUnusedAssignedVars((AbstractLogicalOperator) opRef.getValue(), toRemove, true, context); boolean smthToRemove = !toRemove.isEmpty(); if (smthToRemove) { removeUnusedAssigns(opRef, toRemove, context); } return !toRemove.isEmpty(); }
@Override public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) { AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue(); if (op.getOperatorTag() != LogicalOperatorTag.GROUP) { return false; } if (context.checkIfInDontApplySet(this, op)) { return false; } vars.clear(); boolean modified = false; GroupByOperator groupOp = (GroupByOperator) op; Iterator<Pair<LogicalVariable, Mutable<ILogicalExpression>>> iter = groupOp.getDecorList().iterator(); while (iter.hasNext()) { Pair<LogicalVariable, Mutable<ILogicalExpression>> decor = iter.next(); if (decor.first != null || decor.second.getValue().getExpressionTag() != LogicalExpressionTag.VARIABLE) { continue; } VariableReferenceExpression varRefExpr = (VariableReferenceExpression) decor.second.getValue(); LogicalVariable var = varRefExpr.getVariableReference(); if (vars.contains(var)) { iter.remove(); modified = true; } else { vars.add(var); } } if (modified) { context.addToDontApplySet(this, op); } return modified; }
@Override public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException { AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue(); if (op.getOperatorTag() != LogicalOperatorTag.INNERJOIN) { return false; } AbstractBinaryJoinOperator join = (AbstractBinaryJoinOperator) op; ILogicalExpression expr = join.getCondition().getValue(); if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) { return false; } AbstractFunctionCallExpression fexp = (AbstractFunctionCallExpression) expr; FunctionIdentifier fi = fexp.getFunctionIdentifier(); if (!fi.equals(AlgebricksBuiltinFunctions.AND)) { return false; } List<Mutable<ILogicalExpression>> eqVarVarComps = new ArrayList<Mutable<ILogicalExpression>>(); List<Mutable<ILogicalExpression>> otherPredicates = new ArrayList<Mutable<ILogicalExpression>>(); for (Mutable<ILogicalExpression> arg : fexp.getArguments()) { if (isEqVarVar(arg.getValue())) { eqVarVarComps.add(arg); } else { otherPredicates.add(arg); } } if (eqVarVarComps.isEmpty() || otherPredicates.isEmpty()) { return false; } // pull up ILogicalExpression pulledCond = makeCondition(otherPredicates, context); SelectOperator select = new SelectOperator(new MutableObject<ILogicalExpression>(pulledCond), false, null); ILogicalExpression newJoinCond = makeCondition(eqVarVarComps, context); join.getCondition().setValue(newJoinCond); select.getInputs().add(new MutableObject<ILogicalOperator>(join)); opRef.setValue(select); context.computeAndSetTypeEnvironmentForOperator(select); return true; }
private void collectUnusedAssignedVars( AbstractLogicalOperator op, Set<LogicalVariable> toRemove, boolean first, IOptimizationContext context) throws AlgebricksException { if (!first) { context.addToDontApplySet(this, op); } for (Mutable<ILogicalOperator> c : op.getInputs()) { collectUnusedAssignedVars((AbstractLogicalOperator) c.getValue(), toRemove, false, context); } if (op.hasNestedPlans()) { AbstractOperatorWithNestedPlans opWithNested = (AbstractOperatorWithNestedPlans) op; for (ILogicalPlan plan : opWithNested.getNestedPlans()) { for (Mutable<ILogicalOperator> r : plan.getRoots()) { collectUnusedAssignedVars( (AbstractLogicalOperator) r.getValue(), toRemove, false, context); } } } boolean removeUsedVars = true; switch (op.getOperatorTag()) { case ASSIGN: { AssignOperator assign = (AssignOperator) op; toRemove.addAll(assign.getVariables()); break; } case AGGREGATE: { AggregateOperator agg = (AggregateOperator) op; toRemove.addAll(agg.getVariables()); break; } case UNNEST: { UnnestOperator uOp = (UnnestOperator) op; LogicalVariable pVar = uOp.getPositionalVariable(); if (pVar != null) { toRemove.add(pVar); } break; } case UNIONALL: { UnionAllOperator unionOp = (UnionAllOperator) op; for (Triple<LogicalVariable, LogicalVariable, LogicalVariable> varMapping : unionOp.getVariableMappings()) { toRemove.add(varMapping.third); } removeUsedVars = false; break; } } if (removeUsedVars) { List<LogicalVariable> used = new LinkedList<LogicalVariable>(); VariableUtilities.getUsedVariables(op, used); toRemove.removeAll(used); } }
@Override public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException { AbstractLogicalOperator unnest = (AbstractLogicalOperator) opRef.getValue(); if (unnest.getOperatorTag() != LogicalOperatorTag.UNNEST) { return false; } UnnestOperator unnestOpRef = (UnnestOperator) opRef.getValue(); Mutable<ILogicalOperator> unionOp = unnest.getInputs().get(0); AbstractLogicalOperator unionAbstractOp = (AbstractLogicalOperator) unionOp.getValue(); if (unionAbstractOp.getOperatorTag() != LogicalOperatorTag.UNIONALL) { return false; } LogicalVariable unnestVar1 = context.newVar(); UnnestOperator unnest1 = new UnnestOperator( unnestVar1, new MutableObject<ILogicalExpression>( unnestOpRef.getExpressionRef().getValue().cloneExpression())); LogicalVariable unnestVar2 = context.newVar(); UnnestOperator unnest2 = new UnnestOperator( unnestVar2, new MutableObject<ILogicalExpression>( unnestOpRef.getExpressionRef().getValue().cloneExpression())); // Getting the two topmost branched and adding them as an input to the unnests: Mutable<ILogicalOperator> branch1 = unionAbstractOp.getInputs().get(0); ILogicalOperator agg1 = branch1.getValue(); List<LogicalVariable> agg1_var = new ArrayList<LogicalVariable>(); VariableUtilities.getLiveVariables(agg1, agg1_var); Mutable<ILogicalOperator> branch2 = unionAbstractOp.getInputs().get(1); ILogicalOperator agg2 = branch2.getValue(); List<LogicalVariable> agg2_var = new ArrayList<LogicalVariable>(); VariableUtilities.getLiveVariables(agg2, agg2_var); // Modifying the unnest so it has the right variable List<LogicalVariable> var_unnest_1 = new ArrayList<LogicalVariable>(); unnest1.getExpressionRef().getValue().getUsedVariables(var_unnest_1); unnest1.getExpressionRef().getValue().substituteVar(var_unnest_1.get(0), agg1_var.get(0)); List<LogicalVariable> var_unnest2 = new ArrayList<LogicalVariable>(); unnest2.getExpressionRef().getValue().getUsedVariables(var_unnest2); unnest2.getExpressionRef().getValue().substituteVar(var_unnest2.get(0), agg2_var.get(0)); unnest1.getInputs().add(branch1); unnest2.getInputs().add(branch2); context.computeAndSetTypeEnvironmentForOperator(unnest1); context.computeAndSetTypeEnvironmentForOperator(unnest2); // creating a new union operator with the updated logical variables List<Triple<LogicalVariable, LogicalVariable, LogicalVariable>> varMap = new ArrayList<Triple<LogicalVariable, LogicalVariable, LogicalVariable>>(1); Triple<LogicalVariable, LogicalVariable, LogicalVariable> union_triple_vars = new Triple<LogicalVariable, LogicalVariable, LogicalVariable>( unnestVar1, unnestVar2, unnestOpRef.getVariables().get(0)); varMap.add(union_triple_vars); UnionAllOperator unionOpFinal = new UnionAllOperator(varMap); unionOpFinal.getInputs().add(new MutableObject<ILogicalOperator>(unnest1)); unionOpFinal.getInputs().add(new MutableObject<ILogicalOperator>(unnest2)); context.computeAndSetTypeEnvironmentForOperator(unionOpFinal); opRef.setValue(unionOpFinal); return true; }
@Override public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException { AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue(); // current opperator is join if (op.getOperatorTag() != LogicalOperatorTag.INNERJOIN && op.getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) { return false; } // Find GET_ITEM function. AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) op; Mutable<ILogicalExpression> expRef = joinOp.getCondition(); Mutable<ILogicalExpression> getItemExprRef = getSimilarityExpression(expRef); if (getItemExprRef == null) { return false; } // Check if the GET_ITEM function is on one of the supported similarity-check functions. AbstractFunctionCallExpression getItemFuncExpr = (AbstractFunctionCallExpression) getItemExprRef.getValue(); Mutable<ILogicalExpression> argRef = getItemFuncExpr.getArguments().get(0); AbstractFunctionCallExpression simFuncExpr = (AbstractFunctionCallExpression) argRef.getValue(); if (!simFuncs.contains(simFuncExpr.getFunctionIdentifier())) { return false; } // Skip this rule based on annotations. if (simFuncExpr.getAnnotations().containsKey(IndexedNLJoinExpressionAnnotation.INSTANCE)) { return false; } List<Mutable<ILogicalOperator>> inputOps = joinOp.getInputs(); ILogicalOperator leftInputOp = inputOps.get(0).getValue(); ILogicalOperator rightInputOp = inputOps.get(1).getValue(); List<Mutable<ILogicalExpression>> inputExps = simFuncExpr.getArguments(); ILogicalExpression inputExp0 = inputExps.get(0).getValue(); ILogicalExpression inputExp1 = inputExps.get(1).getValue(); // left and right expressions are variables if (inputExp0.getExpressionTag() != LogicalExpressionTag.VARIABLE || inputExp1.getExpressionTag() != LogicalExpressionTag.VARIABLE) { return false; } LogicalVariable inputVar0 = ((VariableReferenceExpression) inputExp0).getVariableReference(); LogicalVariable inputVar1 = ((VariableReferenceExpression) inputExp1).getVariableReference(); LogicalVariable leftInputVar; LogicalVariable rightInputVar; liveVars.clear(); VariableUtilities.getLiveVariables(leftInputOp, liveVars); if (liveVars.contains(inputVar0)) { leftInputVar = inputVar0; rightInputVar = inputVar1; } else { leftInputVar = inputVar1; rightInputVar = inputVar0; } List<LogicalVariable> leftInputPKs = context.findPrimaryKey(leftInputVar); List<LogicalVariable> rightInputPKs = context.findPrimaryKey(rightInputVar); // Bail if primary keys could not be inferred. if (leftInputPKs == null || rightInputPKs == null) { return false; } // primary key has only one variable if (leftInputPKs.size() != 1 || rightInputPKs.size() != 1) { return false; } IAType leftType = (IAType) context.getOutputTypeEnvironment(leftInputOp).getVarType(leftInputVar); IAType rightType = (IAType) context.getOutputTypeEnvironment(rightInputOp).getVarType(rightInputVar); // left-hand side and right-hand side of "~=" has the same type IAType left2 = TypeHelper.getNonOptionalType(leftType); IAType right2 = TypeHelper.getNonOptionalType(rightType); if (!left2.deepEqual(right2)) { return false; } // // -- - FIRE - -- // AqlMetadataProvider metadataProvider = ((AqlMetadataProvider) context.getMetadataProvider()); FunctionIdentifier funcId = FuzzyUtils.getTokenizer(leftType.getTypeTag()); String tokenizer; if (funcId == null) { tokenizer = ""; } else { tokenizer = funcId.getName(); } float simThreshold = FuzzyUtils.getSimThreshold(metadataProvider); String simFunction = FuzzyUtils.getSimFunction(metadataProvider); // finalize AQL+ query String prepareJoin; switch (joinOp.getJoinKind()) { case INNER: { prepareJoin = "join" + AQLPLUS; break; } case LEFT_OUTER: { // TODO To make it work for Left Outer Joins, we should permute // the #LEFT and #RIGHT at the top of the AQL+ query. But, when // doing this, the // fuzzyjoin/user-vis-int-vis-user-lot-aqlplus_1.aql (the one // doing 3-way fuzzy joins) gives a different result. But even // if we don't change the FuzzyJoinRule, permuting the for // clauses in fuzzyjoin/user-vis-int-vis-user-lot-aqlplus_1.aql // leads to different results, which suggests there is some // other sort of bug. return false; // prepareJoin = "loj" + AQLPLUS; // break; } default: { throw new IllegalStateException(); } } String aqlPlus = String.format( Locale.US, prepareJoin, tokenizer, tokenizer, simFunction, simThreshold, tokenizer, tokenizer, simFunction, simThreshold, simFunction, simThreshold, simThreshold); LogicalVariable leftPKVar = leftInputPKs.get(0); LogicalVariable rightPKVar = rightInputPKs.get(0); Counter counter = new Counter(context.getVarCounter()); AQLPlusParser parser = new AQLPlusParser(new StringReader(aqlPlus)); parser.initScope(); parser.setVarCounter(counter); List<Clause> clauses; try { clauses = parser.Clauses(); } catch (ParseException e) { throw new AlgebricksException(e); } // The translator will compile metadata internally. Run this compilation // under the same transaction id as the "outer" compilation. AqlPlusExpressionToPlanTranslator translator = new AqlPlusExpressionToPlanTranslator( metadataProvider.getJobId(), metadataProvider, counter, null, null); context.setVarCounter(counter.get()); LogicalOperatorDeepCopyWithNewVariablesVisitor deepCopyVisitor = new LogicalOperatorDeepCopyWithNewVariablesVisitor(context); translator.addOperatorToMetaScope(new Identifier("#LEFT"), leftInputOp); translator.addVariableToMetaScope(new Identifier("$$LEFT"), leftInputVar); translator.addVariableToMetaScope(new Identifier("$$LEFTPK"), leftPKVar); translator.addOperatorToMetaScope(new Identifier("#RIGHT"), rightInputOp); translator.addVariableToMetaScope(new Identifier("$$RIGHT"), rightInputVar); translator.addVariableToMetaScope(new Identifier("$$RIGHTPK"), rightPKVar); translator.addOperatorToMetaScope( new Identifier("#LEFT_1"), deepCopyVisitor.deepCopy(leftInputOp, null)); translator.addVariableToMetaScope( new Identifier("$$LEFT_1"), deepCopyVisitor.varCopy(leftInputVar)); translator.addVariableToMetaScope( new Identifier("$$LEFTPK_1"), deepCopyVisitor.varCopy(leftPKVar)); deepCopyVisitor.updatePrimaryKeys(context); deepCopyVisitor.reset(); // translator.addOperatorToMetaScope(new Identifier("#LEFT_2"), // deepCopyVisitor.deepCopy(leftInputOp, null)); // translator.addVariableToMetaScope(new Identifier("$$LEFT_2"), // deepCopyVisitor.varCopy(leftInputVar)); // translator.addVariableToMetaScope(new Identifier("$$LEFTPK_2"), // deepCopyVisitor.varCopy(leftPKVar)); // deepCopyVisitor.updatePrimaryKeys(context); // deepCopyVisitor.reset(); // // translator.addOperatorToMetaScope(new Identifier("#LEFT_3"), // deepCopyVisitor.deepCopy(leftInputOp, null)); // translator.addVariableToMetaScope(new Identifier("$$LEFT_3"), // deepCopyVisitor.varCopy(leftInputVar)); // translator.addVariableToMetaScope(new Identifier("$$LEFTPK_3"), // deepCopyVisitor.varCopy(leftPKVar)); // deepCopyVisitor.updatePrimaryKeys(context); // deepCopyVisitor.reset(); translator.addOperatorToMetaScope( new Identifier("#RIGHT_1"), deepCopyVisitor.deepCopy(rightInputOp, null)); translator.addVariableToMetaScope( new Identifier("$$RIGHT_1"), deepCopyVisitor.varCopy(rightInputVar)); translator.addVariableToMetaScope( new Identifier("$$RIGHTPK_1"), deepCopyVisitor.varCopy(rightPKVar)); deepCopyVisitor.updatePrimaryKeys(context); deepCopyVisitor.reset(); // TODO pick side to run Stage 1, currently always picks RIGHT side translator.addOperatorToMetaScope( new Identifier("#RIGHT_2"), deepCopyVisitor.deepCopy(rightInputOp, null)); translator.addVariableToMetaScope( new Identifier("$$RIGHT_2"), deepCopyVisitor.varCopy(rightInputVar)); translator.addVariableToMetaScope( new Identifier("$$RIGHTPK_2"), deepCopyVisitor.varCopy(rightPKVar)); deepCopyVisitor.updatePrimaryKeys(context); deepCopyVisitor.reset(); translator.addOperatorToMetaScope( new Identifier("#RIGHT_3"), deepCopyVisitor.deepCopy(rightInputOp, null)); translator.addVariableToMetaScope( new Identifier("$$RIGHT_3"), deepCopyVisitor.varCopy(rightInputVar)); translator.addVariableToMetaScope( new Identifier("$$RIGHTPK_3"), deepCopyVisitor.varCopy(rightPKVar)); deepCopyVisitor.updatePrimaryKeys(context); deepCopyVisitor.reset(); ILogicalPlan plan; try { plan = translator.translate(clauses); } catch (AsterixException e) { throw new AlgebricksException(e); } context.setVarCounter(counter.get()); ILogicalOperator outputOp = plan.getRoots().get(0).getValue(); SelectOperator extraSelect = null; if (getItemExprRef != expRef) { // more than one join condition getItemExprRef.setValue(ConstantExpression.TRUE); switch (joinOp.getJoinKind()) { case INNER: { extraSelect = new SelectOperator(expRef, false, null); extraSelect.getInputs().add(new MutableObject<ILogicalOperator>(outputOp)); outputOp = extraSelect; break; } case LEFT_OUTER: { if (((AbstractLogicalOperator) outputOp).getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) { throw new IllegalStateException(); } LeftOuterJoinOperator topJoin = (LeftOuterJoinOperator) outputOp; topJoin.getCondition().setValue(expRef.getValue()); break; } default: { throw new IllegalStateException(); } } } opRef.setValue(outputOp); OperatorPropertiesUtil.typeOpRec(opRef, context); return true; }
@Override public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException { if (!opRef.getValue().getOperatorTag().equals(LogicalOperatorTag.ASSIGN)) { return false; } AssignOperator assignUnion = (AssignOperator) opRef.getValue(); if (assignUnion.getExpressions().get(0).getValue().getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) return false; AbstractFunctionCallExpression u = (AbstractFunctionCallExpression) assignUnion.getExpressions().get(0).getValue(); if (!AsterixBuiltinFunctions.UNION.equals(u.getFunctionIdentifier())) { return false; } // Retrieving the logical variables for the union from the two aggregates which are inputs to // the join Mutable<ILogicalOperator> join = assignUnion.getInputs().get(0); LogicalOperatorTag tag1 = join.getValue().getOperatorTag(); if (tag1 != LogicalOperatorTag.INNERJOIN && tag1 != LogicalOperatorTag.LEFTOUTERJOIN) { return false; } AbstractBinaryJoinOperator join1 = (AbstractBinaryJoinOperator) join.getValue(); ILogicalExpression cond1 = join1.getCondition().getValue(); // don't try to push a product down if (!OperatorPropertiesUtil.isAlwaysTrueCond(cond1)) { return false; } List<Mutable<ILogicalOperator>> joinInputs = join.getValue().getInputs(); Mutable<ILogicalOperator> left_branch = joinInputs.get(0); Mutable<ILogicalOperator> right_branch = joinInputs.get(1); List<LogicalVariable> input1Var = new ArrayList<LogicalVariable>(); VariableUtilities.getProducedVariables(left_branch.getValue(), input1Var); List<LogicalVariable> input2Var = new ArrayList<LogicalVariable>(); VariableUtilities.getProducedVariables(right_branch.getValue(), input2Var); List<Triple<LogicalVariable, LogicalVariable, LogicalVariable>> varMap = new ArrayList<Triple<LogicalVariable, LogicalVariable, LogicalVariable>>(1); Triple<LogicalVariable, LogicalVariable, LogicalVariable> triple = new Triple<LogicalVariable, LogicalVariable, LogicalVariable>( input1Var.get(0), input2Var.get(0), assignUnion.getVariables().get(0)); varMap.add(triple); UnionAllOperator unionOp = new UnionAllOperator(varMap); unionOp.getInputs().add(left_branch); unionOp.getInputs().add(right_branch); context.computeAndSetTypeEnvironmentForOperator(unionOp); opRef.setValue(unionOp); return true; }