private static QueryPlan getHintedQueryPlan( PhoenixStatement statement, SelectStatement select, List<PTable> indexes, List<? extends PDatum> targetColumns, ParallelIteratorFactory parallelIteratorFactory, List<QueryPlan> plans) throws SQLException { QueryPlan dataPlan = plans.get(0); String indexHint = select.getHint().getHint(Hint.INDEX); if (indexHint == null) { return null; } int startIndex = 0; String alias = dataPlan.getTableRef().getTableAlias(); String prefix = HintNode.PREFIX + (alias == null ? dataPlan.getTableRef().getTable().getName().getString() : alias) + HintNode.SEPARATOR; while (startIndex < indexHint.length()) { startIndex = indexHint.indexOf(prefix, startIndex); if (startIndex < 0) { return null; } startIndex += prefix.length(); boolean done = false; // true when SUFFIX found while (startIndex < indexHint.length() && !done) { int endIndex; int endIndex1 = indexHint.indexOf(HintNode.SEPARATOR, startIndex); int endIndex2 = indexHint.indexOf(HintNode.SUFFIX, startIndex); if (endIndex1 < 0 && endIndex2 < 0) { // Missing SUFFIX shouldn't happen endIndex = indexHint.length(); } else if (endIndex1 < 0) { done = true; endIndex = endIndex2; } else if (endIndex2 < 0) { endIndex = endIndex1; } else { endIndex = Math.min(endIndex1, endIndex2); done = endIndex2 == endIndex; } String indexName = indexHint.substring(startIndex, endIndex); int indexPos = getIndexPosition(indexes, indexName); if (indexPos >= 0) { // Hinted index is applicable, so return it's index PTable index = indexes.get(indexPos); indexes.remove(indexPos); QueryPlan plan = addPlan( statement, select, index, targetColumns, parallelIteratorFactory, dataPlan, true); if (plan != null) { return plan; } } startIndex = endIndex + 1; } } return null; }
@Override public ExplainPlan getExplainPlan() throws SQLException { List<String> steps = Lists.newArrayList(); steps.add("SORT-MERGE-JOIN (" + type.toString().toUpperCase() + ") TABLES"); for (String step : lhsPlan.getExplainPlan().getPlanSteps()) { steps.add(" " + step); } steps.add("AND" + (rhsSchema.getFieldCount() == 0 ? " (SKIP MERGE)" : "")); for (String step : rhsPlan.getExplainPlan().getPlanSteps()) { steps.add(" " + step); } return new ExplainPlan(steps); }
public SortMergeJoinPlan( StatementContext context, FilterableStatement statement, TableRef table, JoinType type, QueryPlan lhsPlan, QueryPlan rhsPlan, List<Expression> lhsKeyExpressions, List<Expression> rhsKeyExpressions, PTable joinedTable, PTable lhsTable, PTable rhsTable, int rhsFieldPosition, boolean isSingleValueOnly) { if (type == JoinType.Right) throw new IllegalArgumentException("JoinType should not be " + type); this.context = context; this.statement = statement; this.table = table; this.type = type; this.lhsPlan = lhsPlan; this.rhsPlan = rhsPlan; this.lhsKeyExpressions = lhsKeyExpressions; this.rhsKeyExpressions = rhsKeyExpressions; this.joinedSchema = buildSchema(joinedTable); this.lhsSchema = buildSchema(lhsTable); this.rhsSchema = buildSchema(rhsTable); this.rhsFieldPosition = rhsFieldPosition; this.isSingleValueOnly = isSingleValueOnly; this.tableRefs = Sets.newHashSetWithExpectedSize( lhsPlan.getSourceRefs().size() + rhsPlan.getSourceRefs().size()); this.tableRefs.addAll(lhsPlan.getSourceRefs()); this.tableRefs.addAll(rhsPlan.getSourceRefs()); this.thresholdBytes = context .getConnection() .getQueryServices() .getProps() .getInt( QueryServices.SPOOL_THRESHOLD_BYTES_ATTRIB, QueryServicesOptions.DEFAULT_SPOOL_THRESHOLD_BYTES); }
public QueryPlan optimize(PhoenixStatement statement, QueryPlan dataPlan) throws SQLException { if (dataPlan.getTableRef() == null) { return dataPlan; } return optimize(dataPlan, statement, Collections.<PColumn>emptyList(), null); }
/** * Order the plans among all the possible ones from best to worst. Since we don't keep stats yet, * we use the following simple algorithm: 1) If the query is a point lookup (i.e. we have a set of * exact row keys), choose among those. 2) If the query has an ORDER BY and a LIMIT, choose the * plan that has all the ORDER BY expression in the same order as the row key columns. 3) If there * are more than one plan that meets (1&2), choose the plan with: a) the most row key columns that * may be used to form the start/stop scan key. b) the plan that preserves ordering for a group * by. c) the data table plan * * @param plans the list of candidate plans * @return list of plans ordered from best to worst. */ private List<QueryPlan> orderPlansBestToWorst( SelectStatement select, List<QueryPlan> plans, boolean stopAtBestPlan) { final QueryPlan dataPlan = plans.get(0); if (plans.size() == 1) { return plans; } /** * If we have a plan(s) that are just point lookups (i.e. fully qualified row keys), then favor * those first. */ List<QueryPlan> candidates = Lists.newArrayListWithExpectedSize(plans.size()); if (stopAtBestPlan) { // If we're stopping at the best plan, only consider point lookups if // there are any for (QueryPlan plan : plans) { if (plan.getContext().getScanRanges().isPointLookup()) { candidates.add(plan); } } } else { candidates.addAll(plans); } /** * If we have a plan(s) that removes the order by, choose from among these, as this is typically * the most expensive operation. Once we have stats, if there's a limit on the query, we might * choose a different plan. For example if the limit was a very large number and the combination * of applying other filters on the row key are estimated to choose fewer rows, we'd choose that * one. */ List<QueryPlan> stillCandidates = plans; List<QueryPlan> bestCandidates = candidates; if (!candidates.isEmpty()) { stillCandidates = candidates; bestCandidates = Lists.<QueryPlan>newArrayListWithExpectedSize(candidates.size()); } for (QueryPlan plan : stillCandidates) { // If ORDER BY optimized out (or not present at all) if (plan.getOrderBy().getOrderByExpressions().isEmpty()) { bestCandidates.add(plan); } } if (bestCandidates.isEmpty()) { bestCandidates.addAll(stillCandidates); } int nViewConstants = 0; PTable dataTable = dataPlan.getTableRef().getTable(); if (dataTable.getType() == PTableType.VIEW) { for (PColumn column : dataTable.getColumns()) { if (column.getViewConstant() != null) { nViewConstants++; } } } final int boundRanges = nViewConstants; final int comparisonOfDataVersusIndexTable = select.getHint().hasHint(Hint.USE_DATA_OVER_INDEX_TABLE) ? -1 : 1; Collections.sort( bestCandidates, new Comparator<QueryPlan>() { @Override public int compare(QueryPlan plan1, QueryPlan plan2) { PTable table1 = plan1.getTableRef().getTable(); PTable table2 = plan2.getTableRef().getTable(); // For shared indexes (i.e. indexes on views and local indexes), // a) add back any view constants as these won't be in the index, and // b) ignore the viewIndexId which will be part of the row key columns. int c = (plan2.getContext().getScanRanges().getBoundPkColumnCount() + (table2.getViewIndexId() == null ? 0 : (boundRanges - 1))) - (plan1.getContext().getScanRanges().getBoundPkColumnCount() + (table1.getViewIndexId() == null ? 0 : (boundRanges - 1))); if (c != 0) return c; if (plan1.getGroupBy() != null && plan2.getGroupBy() != null) { if (plan1.getGroupBy().isOrderPreserving() != plan2.getGroupBy().isOrderPreserving()) { return plan1.getGroupBy().isOrderPreserving() ? -1 : 1; } } // Use smaller table (table with fewest kv columns) c = (table1.getColumns().size() - table1.getPKColumns().size()) - (table2.getColumns().size() - table2.getPKColumns().size()); if (c != 0) return c; // If all things are equal, don't choose local index as it forces scan // on every region (unless there's no start/stop key) if (table1.getIndexType() == IndexType.LOCAL) { return plan1.getContext().getScanRanges().getRanges().isEmpty() ? -1 : 1; } if (table2.getIndexType() == IndexType.LOCAL) { return plan2.getContext().getScanRanges().getRanges().isEmpty() ? 1 : -1; } // All things being equal, just use the table based on the // Hint.USE_DATA_OVER_INDEX_TABLE if (table1.getType() == PTableType.INDEX) { return comparisonOfDataVersusIndexTable; } if (table2.getType() == PTableType.INDEX) { return -comparisonOfDataVersusIndexTable; } return 0; } }); return bestCandidates; }
private static QueryPlan addPlan( PhoenixStatement statement, SelectStatement select, PTable index, List<? extends PDatum> targetColumns, ParallelIteratorFactory parallelIteratorFactory, QueryPlan dataPlan, boolean isHinted) throws SQLException { int nColumns = dataPlan.getProjector().getColumnCount(); String tableAlias = dataPlan.getTableRef().getTableAlias(); String alias = tableAlias == null ? null : '"' + tableAlias + '"'; // double quote in case it's case sensitive String schemaName = index.getParentSchemaName().getString(); schemaName = schemaName.length() == 0 ? null : '"' + schemaName + '"'; String tableName = '"' + index.getTableName().getString() + '"'; TableNode table = FACTORY.namedTable(alias, FACTORY.table(schemaName, tableName)); SelectStatement indexSelect = FACTORY.select(select, table); ColumnResolver resolver = FromCompiler.getResolverForQuery(indexSelect, statement.getConnection()); // We will or will not do tuple projection according to the data plan. boolean isProjected = dataPlan.getContext().getResolver().getTables().get(0).getTable().getType() == PTableType.PROJECTED; // Check index state of now potentially updated index table to make sure it's active if (PIndexState.ACTIVE.equals(resolver.getTables().get(0).getTable().getIndexState())) { try { // translate nodes that match expressions that are indexed to the associated column parse // node indexSelect = ParseNodeRewriter.rewrite( indexSelect, new IndexExpressionParseNodeRewriter( index, statement.getConnection(), indexSelect.getUdfParseNodes())); QueryCompiler compiler = new QueryCompiler( statement, indexSelect, resolver, targetColumns, parallelIteratorFactory, dataPlan.getContext().getSequenceManager(), isProjected); QueryPlan plan = compiler.compile(); // If query doesn't have where clause and some of columns to project are missing // in the index then we need to get missing columns from main table for each row in // local index. It's like full scan of both local index and data table which is inefficient. // Then we don't use the index. If all the columns to project are present in the index // then we can use the index even the query doesn't have where clause. if (index.getIndexType() == IndexType.LOCAL && indexSelect.getWhere() == null && !plan.getContext().getDataColumns().isEmpty()) { return null; } // Checking number of columns handles the wildcard cases correctly, as in that case the // index // must contain all columns from the data table to be able to be used. if (plan.getTableRef().getTable().getIndexState() == PIndexState.ACTIVE) { if (plan.getProjector().getColumnCount() == nColumns) { return plan; } else if (index.getIndexType() == IndexType.GLOBAL) { throw new ColumnNotFoundException("*"); } } } catch (ColumnNotFoundException e) { /* Means that a column is being used that's not in our index. * Since we currently don't keep stats, we don't know the selectivity of the index. * For now, if this is a hinted plan, we will try rewriting the query as a subquery; * otherwise we just don't use this index (as opposed to trying to join back from * the index table to the data table. */ SelectStatement dataSelect = (SelectStatement) dataPlan.getStatement(); ParseNode where = dataSelect.getWhere(); if (isHinted && where != null) { StatementContext context = new StatementContext(statement, resolver); WhereConditionRewriter whereRewriter = new WhereConditionRewriter(FromCompiler.getResolver(dataPlan.getTableRef()), context); where = where.accept(whereRewriter); if (where != null) { PTable dataTable = dataPlan.getTableRef().getTable(); List<PColumn> pkColumns = dataTable.getPKColumns(); List<AliasedNode> aliasedNodes = Lists.<AliasedNode>newArrayListWithExpectedSize(pkColumns.size()); List<ParseNode> nodes = Lists.<ParseNode>newArrayListWithExpectedSize(pkColumns.size()); boolean isSalted = dataTable.getBucketNum() != null; boolean isTenantSpecific = dataTable.isMultiTenant() && statement.getConnection().getTenantId() != null; int posOffset = (isSalted ? 1 : 0) + (isTenantSpecific ? 1 : 0); for (int i = posOffset; i < pkColumns.size(); i++) { PColumn column = pkColumns.get(i); String indexColName = IndexUtil.getIndexColumnName(column); ParseNode indexColNode = new ColumnParseNode(null, '"' + indexColName + '"', indexColName); PDataType indexColType = IndexUtil.getIndexColumnDataType(column); PDataType dataColType = column.getDataType(); if (indexColType != dataColType) { indexColNode = FACTORY.cast(indexColNode, dataColType, null, null); } aliasedNodes.add(FACTORY.aliasedNode(null, indexColNode)); nodes.add(new ColumnParseNode(null, '"' + column.getName().getString() + '"')); } SelectStatement innerSelect = FACTORY.select( indexSelect.getFrom(), indexSelect.getHint(), false, aliasedNodes, where, null, null, null, null, null, indexSelect.getBindCount(), false, indexSelect.hasSequence(), Collections.<SelectStatement>emptyList(), indexSelect.getUdfParseNodes()); ParseNode outerWhere = FACTORY.in( nodes.size() == 1 ? nodes.get(0) : FACTORY.rowValueConstructor(nodes), FACTORY.subquery(innerSelect, false), false, true); ParseNode extractedCondition = whereRewriter.getExtractedCondition(); if (extractedCondition != null) { outerWhere = FACTORY.and(Lists.newArrayList(outerWhere, extractedCondition)); } HintNode hint = HintNode.combine( HintNode.subtract( indexSelect.getHint(), new Hint[] {Hint.INDEX, Hint.NO_CHILD_PARENT_JOIN_OPTIMIZATION}), FACTORY.hint("NO_INDEX")); SelectStatement query = FACTORY.select(dataSelect, hint, outerWhere); ColumnResolver queryResolver = FromCompiler.getResolverForQuery(query, statement.getConnection()); query = SubqueryRewriter.transform(query, queryResolver, statement.getConnection()); queryResolver = FromCompiler.getResolverForQuery(query, statement.getConnection()); query = StatementNormalizer.normalize(query, queryResolver); QueryPlan plan = new QueryCompiler( statement, query, queryResolver, targetColumns, parallelIteratorFactory, dataPlan.getContext().getSequenceManager(), isProjected) .compile(); return plan; } } } } return null; }
private List<QueryPlan> getApplicablePlans( QueryPlan dataPlan, PhoenixStatement statement, List<? extends PDatum> targetColumns, ParallelIteratorFactory parallelIteratorFactory, boolean stopAtBestPlan) throws SQLException { SelectStatement select = (SelectStatement) dataPlan.getStatement(); // Exit early if we have a point lookup as we can't get better than that if (!useIndexes || (dataPlan.getContext().getScanRanges().isPointLookup() && stopAtBestPlan)) { return Collections.singletonList(dataPlan); } // For single query tuple projection, indexes are inherited from the original table to the // projected // table; otherwise not. So we pass projected table here, which is enough to tell if this is // from a // single query or a part of join query. List<PTable> indexes = Lists.newArrayList( dataPlan.getContext().getResolver().getTables().get(0).getTable().getIndexes()); if (indexes.isEmpty() || dataPlan.isDegenerate() || dataPlan.getTableRef().hasDynamicCols() || select.getHint().hasHint(Hint.NO_INDEX)) { return Collections.singletonList(dataPlan); } // The targetColumns is set for UPSERT SELECT to ensure that the proper type conversion takes // place. // For a SELECT, it is empty. In this case, we want to set the targetColumns to match the // projection // from the dataPlan to ensure that the metadata for when an index is used matches the metadata // for // when the data table is used. if (targetColumns.isEmpty()) { List<? extends ColumnProjector> projectors = dataPlan.getProjector().getColumnProjectors(); List<PDatum> targetDatums = Lists.newArrayListWithExpectedSize(projectors.size()); for (ColumnProjector projector : projectors) { targetDatums.add(projector.getExpression()); } targetColumns = targetDatums; } SelectStatement translatedIndexSelect = IndexStatementRewriter.translate(select, FromCompiler.getResolver(dataPlan.getTableRef())); List<QueryPlan> plans = Lists.newArrayListWithExpectedSize(1 + indexes.size()); plans.add(dataPlan); QueryPlan hintedPlan = getHintedQueryPlan( statement, translatedIndexSelect, indexes, targetColumns, parallelIteratorFactory, plans); if (hintedPlan != null) { if (stopAtBestPlan) { return Collections.singletonList(hintedPlan); } plans.add(0, hintedPlan); } for (PTable index : indexes) { QueryPlan plan = addPlan( statement, translatedIndexSelect, index, targetColumns, parallelIteratorFactory, dataPlan, false); if (plan != null) { // Query can't possibly return anything so just return this plan. if (plan.isDegenerate()) { return Collections.singletonList(plan); } plans.add(plan); } } return hintedPlan == null ? orderPlansBestToWorst(select, plans, stopAtBestPlan) : plans; }
@Override public long getEstimatedSize() { return lhsPlan.getEstimatedSize() + rhsPlan.getEstimatedSize(); }
@Override public ResultIterator iterator(ParallelScanGrouper scanGrouper, Scan scan) throws SQLException { return type == JoinType.Semi || type == JoinType.Anti ? new SemiAntiJoinIterator(lhsPlan.iterator(scanGrouper), rhsPlan.iterator(scanGrouper)) : new BasicJoinIterator(lhsPlan.iterator(scanGrouper), rhsPlan.iterator(scanGrouper)); }