/** * Checks that the column permutation + select list + static partition exprs + dynamic partition * exprs collectively cover exactly all columns in the target table (not more of fewer). */ private void checkColumnCoverage( ArrayList<Column> selectExprTargetColumns, Set<String> mentionedColumnNames, int numSelectListExprs, int numStaticPartitionExprs) throws AnalysisException { boolean isHBaseTable = (table_ instanceof HBaseTable); int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols(); // Check that all columns are mentioned by the permutation and partition clauses if (selectExprTargetColumns.size() + numStaticPartitionExprs != table_.getColumns().size()) { // We've already ruled out too many columns in the permutation and partition clauses // by checking that there are no duplicates and that every column mentioned actually // exists. So all columns aren't mentioned in the query. If the unmentioned columns // include partition columns, this is an error. List<String> missingColumnNames = Lists.newArrayList(); for (Column column : table_.getColumns()) { if (!mentionedColumnNames.contains(column.getName())) { // HBase tables have a single row-key column which is always in position 0. It // must be mentioned, since it is invalid to set it to NULL (which would // otherwise happen by default). if (isHBaseTable && column.getPosition() == 0) { throw new AnalysisException( "Row-key column '" + column.getName() + "' must be explicitly mentioned in column permutation."); } if (column.getPosition() < numClusteringCols) { missingColumnNames.add(column.getName()); } } } if (!missingColumnNames.isEmpty()) { throw new AnalysisException( "Not enough partition columns mentioned in query. Missing columns are: " + Joiner.on(", ").join(missingColumnNames)); } } // Expect the selectListExpr to have entries for every target column if (selectExprTargetColumns.size() != numSelectListExprs) { String comparator = (selectExprTargetColumns.size() < numSelectListExprs) ? "fewer" : "more"; String partitionClause = (partitionKeyValues_ == null) ? "returns" : "and PARTITION clause return"; // If there was no column permutation provided, the error is that the select-list // has the wrong number of expressions compared to the number of columns in the // table. If there was a column permutation, then the mismatch is between the // select-list and the permutation itself. if (columnPermutation_ == null) { int totalColumnsMentioned = numSelectListExprs + numStaticPartitionExprs; throw new AnalysisException( String.format( "Target table '%s' has %s columns (%s) than the SELECT / VALUES clause %s" + " (%s)", table_.getFullName(), comparator, table_.getColumns().size(), partitionClause, totalColumnsMentioned)); } else { String partitionPrefix = (partitionKeyValues_ == null) ? "mentions" : "and PARTITION clause mention"; throw new AnalysisException( String.format( "Column permutation %s %s columns (%s) than " + "the SELECT / VALUES clause %s (%s)", partitionPrefix, comparator, selectExprTargetColumns.size(), partitionClause, numSelectListExprs)); } } }
/** * Performs three final parts of the analysis: 1. Checks type compatibility between all * expressions and their targets * * <p>2. Populates partitionKeyExprs with type-compatible expressions, in Hive partition-column * order, for all partition columns * * <p>3. Populates resultExprs_ with type-compatible expressions, in Hive column order, for all * expressions in the select-list. Unmentioned columns are assigned NULL literal expressions. * * <p>If necessary, adds casts to the expressions to make them compatible with the type of the * corresponding column. * * @throws AnalysisException If an expression is not compatible with its target column */ private void prepareExpressions( List<Column> selectExprTargetColumns, List<Expr> selectListExprs, Table tbl, Analyzer analyzer) throws AnalysisException { // Temporary lists of partition key exprs and names in an arbitrary order. List<Expr> tmpPartitionKeyExprs = new ArrayList<Expr>(); List<String> tmpPartitionKeyNames = new ArrayList<String>(); int numClusteringCols = (tbl instanceof HBaseTable) ? 0 : tbl.getNumClusteringCols(); // Check dynamic partition columns for type compatibility. for (int i = 0; i < selectListExprs.size(); ++i) { Column targetColumn = selectExprTargetColumns.get(i); Expr compatibleExpr = checkTypeCompatibility(targetColumn, selectListExprs.get(i)); if (targetColumn.getPosition() < numClusteringCols) { // This is a dynamic clustering column tmpPartitionKeyExprs.add(compatibleExpr); tmpPartitionKeyNames.add(targetColumn.getName()); } selectListExprs.set(i, compatibleExpr); } // Check static partition columns, dynamic entries in partitionKeyValues will already // be in selectExprTargetColumns and therefore are ignored in this loop if (partitionKeyValues_ != null) { for (PartitionKeyValue pkv : partitionKeyValues_) { if (pkv.isStatic()) { // tableColumns is guaranteed to exist after the earlier analysis checks Column tableColumn = table_.getColumn(pkv.getColName()); Expr compatibleExpr = checkTypeCompatibility(tableColumn, pkv.getValue()); tmpPartitionKeyExprs.add(compatibleExpr); tmpPartitionKeyNames.add(pkv.getColName()); } } } // Reorder the partition key exprs and names to be consistent with the target table // declaration. We need those exprs in the original order to create the corresponding // Hdfs folder structure correctly. for (Column c : table_.getColumns()) { for (int j = 0; j < tmpPartitionKeyNames.size(); ++j) { if (c.getName().equals(tmpPartitionKeyNames.get(j))) { partitionKeyExprs_.add(tmpPartitionKeyExprs.get(j)); break; } } } Preconditions.checkState(partitionKeyExprs_.size() == numClusteringCols); // Make sure we have stats for partitionKeyExprs for (Expr expr : partitionKeyExprs_) { expr.analyze(analyzer); } // Finally, 'undo' the permutation so that the selectListExprs are in Hive column // order, and add NULL expressions to all missing columns. for (Column tblColumn : table_.getColumnsInHiveOrder()) { boolean matchFound = false; for (int i = 0; i < selectListExprs.size(); ++i) { if (selectExprTargetColumns.get(i).getName().equals(tblColumn.getName())) { resultExprs_.add(selectListExprs.get(i)); matchFound = true; break; } } // If no match is found, either the column is a clustering column with a static // value, or it was unmentioned and therefore should have a NULL select-list // expression. if (!matchFound) { if (tblColumn.getPosition() >= numClusteringCols) { // Unmentioned non-clustering columns get NULL literals with the appropriate // target type because Parquet cannot handle NULL_TYPE (IMPALA-617). resultExprs_.add(NullLiteral.create(tblColumn.getType())); } } } // TODO: Check that HBase row-key columns are not NULL? See IMPALA-406 if (needsGeneratedQueryStatement_) { // Build a query statement that returns NULL for every column List<SelectListItem> selectListItems = Lists.newArrayList(); for (Expr e : resultExprs_) { selectListItems.add(new SelectListItem(e, null)); } SelectList selectList = new SelectList(selectListItems); queryStmt_ = new SelectStmt(selectList, null, null, null, null, null, null); queryStmt_.analyze(analyzer); } }
@Override public void analyze(Analyzer analyzer) throws AnalysisException { if (isExplain_) analyzer.setIsExplain(); try { if (withClause_ != null) withClause_.analyze(analyzer); } catch (AnalysisException e) { // Ignore AnalysisExceptions if tables are missing to ensure the maximum number // of missing tables can be collected before failing analyze(). if (analyzer.getMissingTbls().isEmpty()) throw e; } List<Expr> selectListExprs = null; if (!needsGeneratedQueryStatement_) { try { // Use a child analyzer for the query stmt to properly scope WITH-clause // views and to ignore irrelevant ORDER BYs. Analyzer queryStmtAnalyzer = new Analyzer(analyzer); queryStmt_.analyze(queryStmtAnalyzer); if (analyzer.containsSubquery()) { Preconditions.checkState(queryStmt_ instanceof SelectStmt); StmtRewriter.rewriteStatement((SelectStmt) queryStmt_, queryStmtAnalyzer); queryStmt_ = queryStmt_.clone(); queryStmtAnalyzer = new Analyzer(analyzer); queryStmt_.analyze(queryStmtAnalyzer); } selectListExprs = Expr.cloneList(queryStmt_.getBaseTblResultExprs()); } catch (AnalysisException e) { if (analyzer.getMissingTbls().isEmpty()) throw e; } } else { selectListExprs = Lists.newArrayList(); } // Set target table and perform table-type specific analysis and auth checking. // Also checks if the target table is missing. setTargetTable(analyzer); // Abort analysis if there are any missing tables beyond this point. if (!analyzer.getMissingTbls().isEmpty()) { throw new AnalysisException("Found missing tables. Aborting analysis."); } boolean isHBaseTable = (table_ instanceof HBaseTable); int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols(); // Analysis of the INSERT statement from this point is basically the act of matching // the set of output columns (which come from a column permutation, perhaps // implicitly, and the PARTITION clause) to the set of input columns (which come from // the select-list and any statically-valued columns in the PARTITION clause). // // First, we compute the set of mentioned columns, and reject statements that refer to // non-existent columns, or duplicates (we must check both the column permutation, and // the set of partition keys). Next, we check that all partition columns are // mentioned. During this process we build the map from select-list expr index to // column in the targeted table. // // Then we check that the select-list contains exactly the right number of expressions // for all mentioned columns which are not statically-valued partition columns (which // get their expressions from partitionKeyValues). // // Finally, prepareExpressions analyzes the expressions themselves, and confirms that // they are type-compatible with the target columns. Where columns are not mentioned // (and by this point, we know that missing columns are not partition columns), // prepareExpressions assigns them a NULL literal expressions. // An null permutation clause is the same as listing all non-partition columns in // order. List<String> analysisColumnPermutation = columnPermutation_; if (analysisColumnPermutation == null) { analysisColumnPermutation = Lists.newArrayList(); ArrayList<Column> tableColumns = table_.getColumns(); for (int i = numClusteringCols; i < tableColumns.size(); ++i) { analysisColumnPermutation.add(tableColumns.get(i).getName()); } } // selectExprTargetColumns maps from select expression index to a column in the target // table. It will eventually include all mentioned columns that aren't static-valued // partition columns. ArrayList<Column> selectExprTargetColumns = Lists.newArrayList(); // Tracks the name of all columns encountered in either the permutation clause or the // partition clause to detect duplicates. Set<String> mentionedColumnNames = Sets.newHashSet(); for (String columnName : analysisColumnPermutation) { Column column = table_.getColumn(columnName); if (column == null) { throw new AnalysisException("Unknown column '" + columnName + "' in column permutation"); } if (!mentionedColumnNames.add(columnName)) { throw new AnalysisException("Duplicate column '" + columnName + "' in column permutation"); } selectExprTargetColumns.add(column); } int numStaticPartitionExprs = 0; if (partitionKeyValues_ != null) { for (PartitionKeyValue pkv : partitionKeyValues_) { Column column = table_.getColumn(pkv.getColName()); if (column == null) { throw new AnalysisException( "Unknown column '" + pkv.getColName() + "' in partition clause"); } if (column.getPosition() >= numClusteringCols) { throw new AnalysisException( "Column '" + pkv.getColName() + "' is not a partition column"); } if (!mentionedColumnNames.add(pkv.getColName())) { throw new AnalysisException( "Duplicate column '" + pkv.getColName() + "' in partition clause"); } if (!pkv.isDynamic()) { numStaticPartitionExprs++; } else { selectExprTargetColumns.add(column); } } } // Checks that exactly all columns in the target table are assigned an expr. checkColumnCoverage( selectExprTargetColumns, mentionedColumnNames, selectListExprs.size(), numStaticPartitionExprs); // Make sure static partition key values only contain const exprs. if (partitionKeyValues_ != null) { for (PartitionKeyValue kv : partitionKeyValues_) { kv.analyze(analyzer); } } // Populate partitionKeyExprs from partitionKeyValues and selectExprTargetColumns prepareExpressions(selectExprTargetColumns, selectListExprs, table_, analyzer); // Analyze plan hints at the end to prefer reporting other error messages first // (e.g., the PARTITION clause is not applicable to unpartitioned and HBase tables). analyzePlanHints(analyzer); }