private static StatementContext compileStatement( String query, Scan scan, List<Object> binds, Integer limit, Set<Expression> extractedNodes) throws SQLException { SQLParser parser = new SQLParser(query); SelectStatement statement = parser.parseQuery(); statement = RHSLiteralStatementRewriter.normalizeWhereClause(statement); PhoenixConnection pconn = DriverManager.getConnection(getUrl(), TEST_PROPERTIES).unwrap(PhoenixConnection.class); ColumnResolver resolver = FromCompiler.getResolver(statement, pconn); StatementContext context = new StatementContext(pconn, resolver, binds, statement.getBindCount(), scan); Integer actualLimit = LimitCompiler.getLimit(context, statement.getLimit()); assertEquals(limit, actualLimit); GroupBy groupBy = GroupByCompiler.getGroupBy(statement, context); statement = HavingCompiler.moveToWhereClause(statement, context, groupBy); WhereCompiler.compileWhereClause( context, statement.getWhere(), extractedNodes, statement.getHint()); return context; }
/** * Builds the projection for the scan * * @param context query context kept between compilation of different query clauses * @param statement TODO * @param groupBy compiled GROUP BY clause * @param targetColumns list of columns, parallel to aliasedNodes, that are being set for an * UPSERT SELECT statement. Used to coerce expression types to the expected target type. * @return projector used to access row values during scan * @throws SQLException */ public static RowProjector compile( StatementContext context, SelectStatement statement, GroupBy groupBy, List<? extends PDatum> targetColumns) throws SQLException { List<AliasedNode> aliasedNodes = statement.getSelect(); // Setup projected columns in Scan SelectClauseVisitor selectVisitor = new SelectClauseVisitor(context, groupBy); List<ExpressionProjector> projectedColumns = new ArrayList<ExpressionProjector>(); TableRef tableRef = context.getResolver().getTables().get(0); PTable table = tableRef.getTable(); boolean isWildcard = false; Scan scan = context.getScan(); int index = 0; List<Expression> projectedExpressions = Lists.newArrayListWithExpectedSize(aliasedNodes.size()); List<byte[]> projectedFamilies = Lists.newArrayListWithExpectedSize(aliasedNodes.size()); for (AliasedNode aliasedNode : aliasedNodes) { ParseNode node = aliasedNode.getNode(); // TODO: visitor? if (node instanceof WildcardParseNode) { if (statement.isAggregate()) { ExpressionCompiler.throwNonAggExpressionInAggException(node.toString()); } isWildcard = true; if (tableRef.getTable().getType() == PTableType.INDEX && ((WildcardParseNode) node).isRewrite()) { projectAllIndexColumns(context, tableRef, projectedExpressions, projectedColumns); } else { projectAllTableColumns(context, tableRef, projectedExpressions, projectedColumns); } } else if (node instanceof FamilyWildcardParseNode) { // Project everything for SELECT cf.* // TODO: support cf.* expressions for multiple tables the same way with *. String cfName = ((FamilyWildcardParseNode) node).getName(); // Delay projecting to scan, as when any other column in the column family gets // added to the scan, it overwrites that we want to project the entire column // family. Instead, we do the projection at the end. // TODO: consider having a ScanUtil.addColumn and ScanUtil.addFamily to work // around this, as this code depends on this function being the last place where // columns are projected (which is currently true, but could change). projectedFamilies.add(Bytes.toBytes(cfName)); if (tableRef.getTable().getType() == PTableType.INDEX && ((FamilyWildcardParseNode) node).isRewrite()) { projectIndexColumnFamily( context, cfName, tableRef, projectedExpressions, projectedColumns); } else { projectTableColumnFamily( context, cfName, tableRef, projectedExpressions, projectedColumns); } } else { Expression expression = node.accept(selectVisitor); projectedExpressions.add(expression); if (index < targetColumns.size()) { PDatum targetColumn = targetColumns.get(index); if (targetColumn.getDataType() != expression.getDataType()) { PDataType targetType = targetColumn.getDataType(); // Check if coerce allowed using more relaxed isCastableTo check, since we promote // INTEGER to LONG // during expression evaluation and then convert back to INTEGER on UPSERT SELECT (and // we don't have // (an actual value we can specifically check against). if (expression.getDataType() != null && !expression.getDataType().isCastableTo(targetType)) { throw new ArgumentTypeMismatchException( targetType, expression.getDataType(), "column: " + targetColumn); } expression = CoerceExpression.create(expression, targetType); } } if (node instanceof BindParseNode) { context.getBindManager().addParamMetaData((BindParseNode) node, expression); } if (!node.isStateless()) { if (!selectVisitor.isAggregate() && statement.isAggregate()) { ExpressionCompiler.throwNonAggExpressionInAggException(expression.toString()); } } String columnAlias = aliasedNode.getAlias() != null ? aliasedNode.getAlias() : SchemaUtil.normalizeIdentifier(aliasedNode.getNode().getAlias()); boolean isCaseSensitive = (columnAlias != null && (aliasedNode.isCaseSensitve() || SchemaUtil.isCaseSensitive(columnAlias))) || selectVisitor.isCaseSensitive; String name = columnAlias == null ? expression.toString() : columnAlias; projectedColumns.add( new ExpressionProjector( name, table.getName().getString(), expression, isCaseSensitive)); } selectVisitor.reset(); index++; } table = context.getCurrentTable().getTable(); // switch to current table for scan projection // TODO make estimatedByteSize more accurate by counting the joined columns. int estimatedKeySize = table.getRowKeySchema().getEstimatedValueLength(); int estimatedByteSize = 0; for (Map.Entry<byte[], NavigableSet<byte[]>> entry : scan.getFamilyMap().entrySet()) { PColumnFamily family = table.getColumnFamily(entry.getKey()); if (entry.getValue() == null) { for (PColumn column : family.getColumns()) { Integer byteSize = column.getByteSize(); estimatedByteSize += SizedUtil.KEY_VALUE_SIZE + estimatedKeySize + (byteSize == null ? RowKeySchema.ESTIMATED_VARIABLE_LENGTH_SIZE : byteSize); } } else { for (byte[] cq : entry.getValue()) { PColumn column = family.getColumn(cq); Integer byteSize = column.getByteSize(); estimatedByteSize += SizedUtil.KEY_VALUE_SIZE + estimatedKeySize + (byteSize == null ? RowKeySchema.ESTIMATED_VARIABLE_LENGTH_SIZE : byteSize); } } } selectVisitor.compile(); // Since we don't have the empty key value in read-only tables, // we must project everything. boolean isProjectEmptyKeyValue = table.getType() != PTableType.VIEW && table.getViewType() != ViewType.MAPPED && !isWildcard; if (isProjectEmptyKeyValue) { for (byte[] family : projectedFamilies) { projectColumnFamily(table, scan, family); } } else { /* * TODO: this could be optimized by detecting: * - if a column is projected that's not in the where clause * - if a column is grouped by that's not in the where clause * - if we're not using IS NULL or CASE WHEN expressions */ projectAllColumnFamilies(table, scan); } return new RowProjector(projectedColumns, estimatedByteSize, isProjectEmptyKeyValue); }
public MutationPlan compile(UpsertStatement upsert, List<Object> binds) throws SQLException { final PhoenixConnection connection = statement.getConnection(); ConnectionQueryServices services = connection.getQueryServices(); final int maxSize = services .getProps() .getInt( QueryServices.MAX_MUTATION_SIZE_ATTRIB, QueryServicesOptions.DEFAULT_MAX_MUTATION_SIZE); final ColumnResolver resolver = FromCompiler.getResolver(upsert, connection); final TableRef tableRef = resolver.getTables().get(0); PTable table = tableRef.getTable(); if (table.getType() == PTableType.VIEW) { throw new ReadOnlyTableException("Mutations not allowed for a view (" + tableRef + ")"); } Scan scan = new Scan(); final StatementContext context = new StatementContext(connection, resolver, binds, upsert.getBindCount(), scan); // Setup array of column indexes parallel to values that are going to be set List<ColumnName> columnNodes = upsert.getColumns(); List<PColumn> allColumns = table.getColumns(); int[] columnIndexesToBe; int[] pkSlotIndexesToBe; PColumn[] targetColumns; // Allow full row upsert if no columns or only dynamic one are specified and values count match if (columnNodes.isEmpty() || columnNodes.size() == upsert.getTable().getDynamicColumns().size()) { columnIndexesToBe = new int[allColumns.size()]; pkSlotIndexesToBe = new int[columnIndexesToBe.length]; targetColumns = new PColumn[columnIndexesToBe.length]; int j = table.getBucketNum() == null ? 0 : 1; // Skip over the salting byte. for (int i = 0; i < allColumns.size(); i++) { columnIndexesToBe[i] = i; targetColumns[i] = allColumns.get(i); if (SchemaUtil.isPKColumn(allColumns.get(i))) { pkSlotIndexesToBe[i] = j++; } } } else { columnIndexesToBe = new int[columnNodes.size()]; pkSlotIndexesToBe = new int[columnIndexesToBe.length]; targetColumns = new PColumn[columnIndexesToBe.length]; Arrays.fill( columnIndexesToBe, -1); // TODO: necessary? So we'll get an AIOB exception if it's not replaced Arrays.fill( pkSlotIndexesToBe, -1); // TODO: necessary? So we'll get an AIOB exception if it's not replaced BitSet pkColumnsSet = new BitSet(table.getPKColumns().size()); for (int i = 0; i < columnNodes.size(); i++) { ColumnName colName = columnNodes.get(i); ColumnRef ref = resolver.resolveColumn(null, colName.getFamilyName(), colName.getColumnName()); columnIndexesToBe[i] = ref.getColumnPosition(); targetColumns[i] = ref.getColumn(); if (SchemaUtil.isPKColumn(ref.getColumn())) { pkColumnsSet.set(pkSlotIndexesToBe[i] = ref.getPKSlotPosition()); } } int i = table.getBucketNum() == null ? 0 : 1; for (; i < table.getPKColumns().size(); i++) { PColumn pkCol = table.getPKColumns().get(i); if (!pkColumnsSet.get(i)) { if (!pkCol.isNullable()) { throw new ConstraintViolationException( table.getName().getString() + "." + pkCol.getName().getString() + " may not be null"); } } } } List<ParseNode> valueNodes = upsert.getValues(); QueryPlan plan = null; RowProjector projector = null; int nValuesToSet; boolean runOnServer = false; if (valueNodes == null) { SelectStatement select = upsert.getSelect(); assert (select != null); TableRef selectTableRef = FromCompiler.getResolver(select, connection).getTables().get(0); boolean sameTable = tableRef.equals(selectTableRef); // Pass scan through if same table in upsert and select so that projection is computed // correctly QueryCompiler compiler = new QueryCompiler(connection, 0, sameTable ? scan : new Scan(), targetColumns); plan = compiler.compile(select, binds); projector = plan.getProjector(); nValuesToSet = projector.getColumnCount(); // Cannot auto commit if doing aggregation or topN or salted // Salted causes problems because the row may end up living on a different region runOnServer = plan.getGroupBy() == null && sameTable && select.getOrderBy().isEmpty() && table.getBucketNum() == null; } else { nValuesToSet = valueNodes.size(); } final QueryPlan queryPlan = plan; // Resize down to allow a subset of columns to be specifiable if (columnNodes.isEmpty()) { columnIndexesToBe = Arrays.copyOf(columnIndexesToBe, nValuesToSet); pkSlotIndexesToBe = Arrays.copyOf(pkSlotIndexesToBe, nValuesToSet); } if (nValuesToSet != columnIndexesToBe.length) { throw new SQLExceptionInfo.Builder(SQLExceptionCode.UPSERT_COLUMN_NUMBERS_MISMATCH) .setMessage( "Numbers of columns: " + columnIndexesToBe.length + ". Number of values: " + nValuesToSet) .build() .buildException(); } final int[] columnIndexes = columnIndexesToBe; final int[] pkSlotIndexes = pkSlotIndexesToBe; // TODO: break this up into multiple functions //////////////////////////////////////////////////////////////////// // UPSERT SELECT ///////////////////////////////////////////////////////////////////// if (valueNodes == null) { /* We can run the upsert in a coprocessor if: * 1) the into table matches from table * 2) the select query isn't doing aggregation * 3) autoCommit is on * 4) not topN * Otherwise, run the query to pull the data from the server * and populate the MutationState (upto a limit). */ final boolean isAutoCommit = connection.getAutoCommit(); runOnServer &= isAutoCommit; //////////////////////////////////////////////////////////////////// // UPSERT SELECT run server-side (maybe) ///////////////////////////////////////////////////////////////////// if (runOnServer) { // At most this array will grow bigger by the number of PK columns int[] allColumnsIndexes = Arrays.copyOf(columnIndexes, columnIndexes.length + nValuesToSet); int[] reverseColumnIndexes = new int[table.getColumns().size()]; List<Expression> projectedExpressions = Lists.newArrayListWithExpectedSize(reverseColumnIndexes.length); Arrays.fill(reverseColumnIndexes, -1); for (int i = 0; i < nValuesToSet; i++) { projectedExpressions.add(projector.getColumnProjector(i).getExpression()); reverseColumnIndexes[columnIndexes[i]] = i; } /* * Order projected columns and projected expressions with PK columns * leading order by slot position */ int offset = table.getBucketNum() == null ? 0 : 1; for (int i = 0; i < table.getPKColumns().size() - offset; i++) { PColumn column = table.getPKColumns().get(i + offset); int pos = reverseColumnIndexes[column.getPosition()]; if (pos == -1) { // Last PK column may be fixed width and nullable // We don't want to insert a null expression b/c // it's not valid to set a fixed width type to null. if (column.getDataType().isFixedWidth()) { continue; } // Add literal null for missing PK columns pos = projectedExpressions.size(); Expression literalNull = LiteralExpression.newConstant(null, column.getDataType()); projectedExpressions.add(literalNull); allColumnsIndexes[pos] = column.getPosition(); } // Swap select expression at pos with i Collections.swap(projectedExpressions, i, pos); // Swap column indexes and reverse column indexes too int tempPos = allColumnsIndexes[i]; allColumnsIndexes[i] = allColumnsIndexes[pos]; allColumnsIndexes[pos] = tempPos; reverseColumnIndexes[tempPos] = reverseColumnIndexes[i]; reverseColumnIndexes[i] = i; } // If any pk slots are changing, be conservative and don't run this server side. // If the row ends up living in a different region, we'll get an error otherwise. for (int i = 0; i < table.getPKColumns().size(); i++) { PColumn column = table.getPKColumns().get(i); Expression source = projectedExpressions.get(i); if (source == null || !source.equals( new ColumnRef(tableRef, column.getPosition()).newColumnExpression())) { // TODO: we could check the region boundaries to see if the pk will still be in it. runOnServer = false; // bail on running server side, since PK may be changing break; } } //////////////////////////////////////////////////////////////////// // UPSERT SELECT run server-side ///////////////////////////////////////////////////////////////////// if (runOnServer) { // Iterate through columns being projected List<PColumn> projectedColumns = Lists.newArrayListWithExpectedSize(projectedExpressions.size()); for (int i = 0; i < projectedExpressions.size(); i++) { // Must make new column if position has changed PColumn column = allColumns.get(allColumnsIndexes[i]); projectedColumns.add(column.getPosition() == i ? column : new PColumnImpl(column, i)); } // Build table from projectedColumns PTable projectedTable = PTableImpl.makePTable( table.getName(), table.getType(), table.getTimeStamp(), table.getSequenceNumber(), table.getPKName(), table.getBucketNum(), projectedColumns); // Remove projection of empty column, since it can lead to problems when building another // projection // using this same scan. TODO: move projection code to a later stage, like // QueryPlan.newScanner to // prevent having to do this. ScanUtil.removeEmptyColumnFamily(context.getScan(), table); List<AliasedNode> select = Collections.<AliasedNode>singletonList( NODE_FACTORY.aliasedNode( null, NODE_FACTORY.function( CountAggregateFunction.NORMALIZED_NAME, LiteralParseNode.STAR))); // Ignore order by - it has no impact final RowProjector aggProjector = ProjectionCompiler.getRowProjector( context, select, false, GroupBy.EMPTY_GROUP_BY, OrderBy.EMPTY_ORDER_BY, null); /* * Transfer over PTable representing subset of columns selected, but all PK columns. * Move columns setting PK first in pkSlot order, adding LiteralExpression of null for any missing ones. * Transfer over List<Expression> for projection. * In region scan, evaluate expressions in order, collecting first n columns for PK and collection non PK in mutation Map * Create the PRow and get the mutations, adding them to the batch */ scan.setAttribute( UngroupedAggregateRegionObserver.UPSERT_SELECT_TABLE, UngroupedAggregateRegionObserver.serialize(projectedTable)); scan.setAttribute( UngroupedAggregateRegionObserver.UPSERT_SELECT_EXPRS, UngroupedAggregateRegionObserver.serialize(projectedExpressions)); final QueryPlan aggPlan = new AggregatePlan( context, tableRef, projector, null, GroupBy.EMPTY_GROUP_BY, false, null, OrderBy.EMPTY_ORDER_BY); return new MutationPlan() { @Override public PhoenixConnection getConnection() { return connection; } @Override public ParameterMetaData getParameterMetaData() { return context.getBindManager().getParameterMetaData(); } @Override public MutationState execute() throws SQLException { Scanner scanner = aggPlan.getScanner(); ResultIterator iterator = scanner.iterator(); try { Tuple row = iterator.next(); ImmutableBytesWritable ptr = context.getTempPtr(); final long mutationCount = (Long) aggProjector.getColumnProjector(0).getValue(row, PDataType.LONG, ptr); return new MutationState(maxSize, connection) { @Override public long getUpdateCount() { return mutationCount; } }; } finally { iterator.close(); } } @Override public ExplainPlan getExplainPlan() throws SQLException { List<String> queryPlanSteps = aggPlan.getExplainPlan().getPlanSteps(); List<String> planSteps = Lists.newArrayListWithExpectedSize(queryPlanSteps.size() + 1); planSteps.add("UPSERT ROWS"); planSteps.addAll(queryPlanSteps); return new ExplainPlan(planSteps); } }; } } //////////////////////////////////////////////////////////////////// // UPSERT SELECT run client-side ///////////////////////////////////////////////////////////////////// final int batchSize = Math.min(connection.getMutateBatchSize(), maxSize); return new MutationPlan() { @Override public PhoenixConnection getConnection() { return connection; } @Override public ParameterMetaData getParameterMetaData() { return context.getBindManager().getParameterMetaData(); } @Override public MutationState execute() throws SQLException { byte[][] values = new byte[columnIndexes.length][]; Scanner scanner = queryPlan.getScanner(); int estSize = scanner.getEstimatedSize(); int rowCount = 0; Map<ImmutableBytesPtr, Map<PColumn, byte[]>> mutation = Maps.newHashMapWithExpectedSize(estSize); ResultSet rs = new PhoenixResultSet(scanner, statement); PTable table = tableRef.getTable(); PColumn column; while (rs.next()) { for (int i = 0; i < values.length; i++) { column = table.getColumns().get(columnIndexes[i]); byte[] byteValue = rs.getBytes(i + 1); Object value = rs.getObject(i + 1); int rsPrecision = rs.getMetaData().getPrecision(i + 1); Integer precision = rsPrecision == 0 ? null : rsPrecision; int rsScale = rs.getMetaData().getScale(i + 1); Integer scale = rsScale == 0 ? null : rsScale; // If ColumnModifier from expression in SELECT doesn't match the // column being projected into then invert the bits. if (column.getColumnModifier() == ColumnModifier.SORT_DESC) { byte[] tempByteValue = Arrays.copyOf(byteValue, byteValue.length); byteValue = ColumnModifier.SORT_DESC.apply(byteValue, tempByteValue, 0, byteValue.length); } // We are guaranteed that the two column will have compatible types, // as we checked that before. if (!column .getDataType() .isSizeCompatible( column.getDataType(), value, byteValue, precision, column.getMaxLength(), scale, column.getScale())) { throw new SQLExceptionInfo.Builder(SQLExceptionCode.DATA_INCOMPATIBLE_WITH_TYPE) .setColumnName(column.getName().getString()) .build() .buildException(); } values[i] = column .getDataType() .coerceBytes( byteValue, value, column.getDataType(), precision, scale, column.getMaxLength(), column.getScale()); } setValues(values, pkSlotIndexes, columnIndexes, table, mutation); rowCount++; // Commit a batch if auto commit is true and we're at our batch size if (isAutoCommit && rowCount % batchSize == 0) { MutationState state = new MutationState(tableRef, mutation, 0, maxSize, connection); connection.getMutationState().join(state); connection.commit(); mutation.clear(); } } // If auto commit is true, this last batch will be committed upon return return new MutationState( tableRef, mutation, rowCount / batchSize * batchSize, maxSize, connection); } @Override public ExplainPlan getExplainPlan() throws SQLException { List<String> queryPlanSteps = queryPlan.getExplainPlan().getPlanSteps(); List<String> planSteps = Lists.newArrayListWithExpectedSize(queryPlanSteps.size() + 1); planSteps.add("UPSERT SELECT"); planSteps.addAll(queryPlanSteps); return new ExplainPlan(planSteps); } }; } //////////////////////////////////////////////////////////////////// // UPSERT VALUES ///////////////////////////////////////////////////////////////////// int nodeIndex = 0; // Allocate array based on size of all columns in table, // since some values may not be set (if they're nullable). UpsertValuesCompiler expressionBuilder = new UpsertValuesCompiler(context); final byte[][] values = new byte[nValuesToSet][]; for (ParseNode valueNode : valueNodes) { if (!valueNode.isConstant()) { throw new SQLExceptionInfo.Builder(SQLExceptionCode.VALUE_IN_UPSERT_NOT_CONSTANT) .build() .buildException(); } PColumn column = allColumns.get(columnIndexes[nodeIndex]); expressionBuilder.setColumn(column); LiteralExpression literalExpression = (LiteralExpression) valueNode.accept(expressionBuilder); byte[] byteValue = literalExpression.getBytes(); if (literalExpression.getDataType() != null) { // If ColumnModifier from expression in SELECT doesn't match the // column being projected into then invert the bits. if (literalExpression.getColumnModifier() != column.getColumnModifier()) { byte[] tempByteValue = Arrays.copyOf(byteValue, byteValue.length); byteValue = ColumnModifier.SORT_DESC.apply(byteValue, tempByteValue, 0, byteValue.length); } if (!literalExpression .getDataType() .isCoercibleTo(column.getDataType(), literalExpression.getValue())) { throw new TypeMismatchException( literalExpression.getDataType(), column.getDataType(), "expression: " + literalExpression.toString() + " in column " + column); } if (!column .getDataType() .isSizeCompatible( literalExpression.getDataType(), literalExpression.getValue(), byteValue, literalExpression.getMaxLength(), column.getMaxLength(), literalExpression.getScale(), column.getScale())) { throw new SQLExceptionInfo.Builder(SQLExceptionCode.DATA_INCOMPATIBLE_WITH_TYPE) .setColumnName(column.getName().getString()) .setMessage("value=" + literalExpression.toString()) .build() .buildException(); } } byteValue = column .getDataType() .coerceBytes( byteValue, literalExpression.getValue(), literalExpression.getDataType(), literalExpression.getMaxLength(), literalExpression.getScale(), column.getMaxLength(), column.getScale()); values[nodeIndex] = byteValue; nodeIndex++; } return new MutationPlan() { @Override public PhoenixConnection getConnection() { return connection; } @Override public ParameterMetaData getParameterMetaData() { return context.getBindManager().getParameterMetaData(); } @Override public MutationState execute() { Map<ImmutableBytesPtr, Map<PColumn, byte[]>> mutation = Maps.newHashMapWithExpectedSize(1); setValues(values, pkSlotIndexes, columnIndexes, tableRef.getTable(), mutation); return new MutationState(tableRef, mutation, 0, maxSize, connection); } @Override public ExplainPlan getExplainPlan() throws SQLException { return new ExplainPlan(Collections.singletonList("PUT SINGLE ROW")); } }; }