/** * Pipeline joins * * @return */ public Operator pipeline() { EqualJoin firstJoin = joinList.get(0); TimeCalc.begin(firstJoin.toString()); OperatorHashJoin hashJoin = decideWhichToCreate(firstJoin, operMap); TimeCalc.end(firstJoin.toString() + " Finish create the first hash join."); joinedHistory.add(firstJoin.getLeftTableName()); joinedHistory.add(firstJoin.getRightTableName()); for (int i = 1; i < joinList.size(); i++) { EqualJoin ej = joinList.get(i); Operator leftOper = operMap.get(ej.getLeftTableName()); Operator rightOper = operMap.get(ej.getRightTableName()); Operator neverJoinedOper; if (joinedHistory.contains(ej.getLeftTableName())) neverJoinedOper = rightOper; else neverJoinedOper = leftOper; TimeCalc.begin(ej.toString()); hashJoin = decideWhichToCreate(ej, hashJoin, neverJoinedOper); TimeCalc.end(ej.toString() + " Finish pipeline one hash join " + ej); joinedHistory.add(ej.getLeftTableName()); joinedHistory.add(ej.getRightTableName()); } return hashJoin; }
/** * put the length of the largest difference of join to the first based on the max difference, the * first join in the list, reorder it by the finding the one which overlap the previous join, each * join should have one table overlapped in the previous join in order to pipeline them * * @param joins */ private void reorder(List<EqualJoin> joins) { // int n = joins.size(); int maxIndex = 0; long min = Integer.MIN_VALUE; // find the index of the max difference between left and right for (int i = 0; i < n; i++) { EqualJoin ej = joins.get(i); long diff = ej.getLeftSize() - ej.getRightSize(); if (diff < 0) diff = diff * (-1); if (diff > min) { maxIndex = i; min = diff; } } // swap the max diff value to the first swap(joins, 0, maxIndex); // reorder based one the first Set<String> previous = new HashSet<String>(n + 1); previous.add(joins.get(0).getLeftTableName()); previous.add(joins.get(0).getRightTableName()); for (int i = 1; i < n; i++) { EqualJoin current = joins.get(i); boolean left = previous.contains(current.getLeftTableName()); boolean right = previous.contains(current.getRightTableName()); if (!left && !right) { // both not contains, then switch for (int j = i + 1; j < n; j++) { EqualJoin after = joins.get(j); boolean leftAfter = previous.contains(after.getLeftTableName()); boolean rightAfter = previous.contains(after.getRightTableName()); if (leftAfter || rightAfter) { swap(joins, i, j); previous.add(after.getLeftTableName()); previous.add(after.getRightTableName()); break; } } // end for j } else { previous.add(current.getLeftTableName()); previous.add(current.getRightTableName()); } } // end for i }
/** * Decide which join to create the first join * * @param ej * @param operMap * @return */ private OperatorHashJoin decideWhichToCreate(EqualJoin ej, Map<String, Operator> operMap) { Operator left = operMap.get(ej.getLeftTableName()); Operator right = operMap.get(ej.getRightTableName()); if (left.getLength() > Config.FileThreshold_MB && right.getLength() > Config.FileThreshold_MB) { return new OperatorHashJoinExternal(ej, left, right); } else { return new OperatorHashJoinMem(ej, left, right); } }
public JoinManager(List<EqualJoin> joins, Map<String, Operator> operMapIn, File swapDirIn) { joinedHistory = new HashSet<String>(); joinList = joins; operMap = operMapIn; // find and set size for (EqualJoin ej : joins) { ej.setLeftSize(operMap.get(ej.getLeftTableName()).getLength()); ej.setRightSize(operMap.get(ej.getRightTableName()).getLength()); } // move the biggest last, and // make each of them has an overlap, // so that easy to pipeline reorder(joinList); }