/** * Expand "*" for a particular tuple descriptor by appending refs for each column to * selectListExprs. * * @param analyzer * @param alias * @param desc * @throws AnalysisException */ private void expandStar(Analyzer analyzer, String alias, TupleDescriptor desc) throws AnalysisException { for (Column col : desc.getTable().getColumnsInHiveOrder()) { resultExprs.add(new SlotRef(new TableName(null, alias), col.getName())); colLabels.add(col.getName().toLowerCase()); } }
/** Checks for type compatibility of column and expr. Returns compatible (possibly cast) expr. */ private Expr checkTypeCompatibility(Column column, Expr expr) throws AnalysisException { // Check for compatible type, and add casts to the selectListExprs if necessary. // We don't allow casting to a lower precision type. Type colType = column.getType(); Type exprType = expr.getType(); // Trivially compatible. if (colType.equals(exprType)) return expr; Type compatibleType = Type.getAssignmentCompatibleType(colType, exprType); // Incompatible types. if (!compatibleType.isValid()) { throw new AnalysisException( String.format( "Target table '%s' is incompatible with SELECT / PARTITION expressions.\n" + "Expression '%s' (type: %s) is not compatible with column '%s' (type: %s)", targetTableName_, expr.toSql(), exprType, column.getName(), colType)); } // Loss of precision when inserting into the table. if (!compatibleType.equals(colType) && !compatibleType.isNull()) { throw new AnalysisException( String.format( "Possible loss of precision for target table '%s'.\n" + "Expression '%s' (type: %s) would need to be cast to %s" + " for column '%s'", targetTableName_, expr.toSql(), exprType, colType, column.getName())); } // Add a cast to the selectListExpr to the higher type. return expr.castTo(compatibleType); }
/** * Expand "*" for a particular tuple descriptor by appending analyzed slot refs for each column to * selectListExprs. */ private void expandStar(Analyzer analyzer, TableName tblName, TupleDescriptor desc) throws AnalysisException, AuthorizationException { for (Column col : desc.getTable().getColumnsInHiveOrder()) { SlotRef slotRef = new SlotRef(tblName, col.getName()); slotRef.analyze(analyzer); resultExprs_.add(slotRef); colLabels_.add(col.getName().toLowerCase()); } }
/* * Builds results for a DESCRIBE <table> command. This consists of the column * definition for each column in the table. */ private static TDescribeTableResult describeTableMinimal(Table table) { TDescribeTableResult descResult = new TDescribeTableResult(); descResult.results = Lists.newArrayList(); // Get description of all the table's columns (includes partition columns). for (Column column : table.getColumnsInHiveOrder()) { TColumnValue colNameCol = new TColumnValue(); colNameCol.setString_val(column.getName()); TColumnValue dataTypeCol = new TColumnValue(); dataTypeCol.setString_val(column.getType().toString().toLowerCase()); TColumnValue commentCol = new TColumnValue(); commentCol.setString_val(column.getComment() != null ? column.getComment() : ""); descResult.results.add( new TResultRow(Lists.newArrayList(colNameCol, dataTypeCol, commentCol))); } return descResult; }
/** * Performs three final parts of the analysis: 1. Checks type compatibility between all * expressions and their targets * * <p>2. Populates partitionKeyExprs with type-compatible expressions, in Hive partition-column * order, for all partition columns * * <p>3. Populates resultExprs_ with type-compatible expressions, in Hive column order, for all * expressions in the select-list. Unmentioned columns are assigned NULL literal expressions. * * <p>If necessary, adds casts to the expressions to make them compatible with the type of the * corresponding column. * * @throws AnalysisException If an expression is not compatible with its target column */ private void prepareExpressions( List<Column> selectExprTargetColumns, List<Expr> selectListExprs, Table tbl, Analyzer analyzer) throws AnalysisException { // Temporary lists of partition key exprs and names in an arbitrary order. List<Expr> tmpPartitionKeyExprs = new ArrayList<Expr>(); List<String> tmpPartitionKeyNames = new ArrayList<String>(); int numClusteringCols = (tbl instanceof HBaseTable) ? 0 : tbl.getNumClusteringCols(); // Check dynamic partition columns for type compatibility. for (int i = 0; i < selectListExprs.size(); ++i) { Column targetColumn = selectExprTargetColumns.get(i); Expr compatibleExpr = checkTypeCompatibility(targetColumn, selectListExprs.get(i)); if (targetColumn.getPosition() < numClusteringCols) { // This is a dynamic clustering column tmpPartitionKeyExprs.add(compatibleExpr); tmpPartitionKeyNames.add(targetColumn.getName()); } selectListExprs.set(i, compatibleExpr); } // Check static partition columns, dynamic entries in partitionKeyValues will already // be in selectExprTargetColumns and therefore are ignored in this loop if (partitionKeyValues_ != null) { for (PartitionKeyValue pkv : partitionKeyValues_) { if (pkv.isStatic()) { // tableColumns is guaranteed to exist after the earlier analysis checks Column tableColumn = table_.getColumn(pkv.getColName()); Expr compatibleExpr = checkTypeCompatibility(tableColumn, pkv.getValue()); tmpPartitionKeyExprs.add(compatibleExpr); tmpPartitionKeyNames.add(pkv.getColName()); } } } // Reorder the partition key exprs and names to be consistent with the target table // declaration. We need those exprs in the original order to create the corresponding // Hdfs folder structure correctly. for (Column c : table_.getColumns()) { for (int j = 0; j < tmpPartitionKeyNames.size(); ++j) { if (c.getName().equals(tmpPartitionKeyNames.get(j))) { partitionKeyExprs_.add(tmpPartitionKeyExprs.get(j)); break; } } } Preconditions.checkState(partitionKeyExprs_.size() == numClusteringCols); // Make sure we have stats for partitionKeyExprs for (Expr expr : partitionKeyExprs_) { expr.analyze(analyzer); } // Finally, 'undo' the permutation so that the selectListExprs are in Hive column // order, and add NULL expressions to all missing columns. for (Column tblColumn : table_.getColumnsInHiveOrder()) { boolean matchFound = false; for (int i = 0; i < selectListExprs.size(); ++i) { if (selectExprTargetColumns.get(i).getName().equals(tblColumn.getName())) { resultExprs_.add(selectListExprs.get(i)); matchFound = true; break; } } // If no match is found, either the column is a clustering column with a static // value, or it was unmentioned and therefore should have a NULL select-list // expression. if (!matchFound) { if (tblColumn.getPosition() >= numClusteringCols) { // Unmentioned non-clustering columns get NULL literals with the appropriate // target type because Parquet cannot handle NULL_TYPE (IMPALA-617). resultExprs_.add(NullLiteral.create(tblColumn.getType())); } } } // TODO: Check that HBase row-key columns are not NULL? See IMPALA-406 if (needsGeneratedQueryStatement_) { // Build a query statement that returns NULL for every column List<SelectListItem> selectListItems = Lists.newArrayList(); for (Expr e : resultExprs_) { selectListItems.add(new SelectListItem(e, null)); } SelectList selectList = new SelectList(selectListItems); queryStmt_ = new SelectStmt(selectList, null, null, null, null, null, null); queryStmt_.analyze(analyzer); } }
/** * Checks that the column permutation + select list + static partition exprs + dynamic partition * exprs collectively cover exactly all columns in the target table (not more of fewer). */ private void checkColumnCoverage( ArrayList<Column> selectExprTargetColumns, Set<String> mentionedColumnNames, int numSelectListExprs, int numStaticPartitionExprs) throws AnalysisException { boolean isHBaseTable = (table_ instanceof HBaseTable); int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols(); // Check that all columns are mentioned by the permutation and partition clauses if (selectExprTargetColumns.size() + numStaticPartitionExprs != table_.getColumns().size()) { // We've already ruled out too many columns in the permutation and partition clauses // by checking that there are no duplicates and that every column mentioned actually // exists. So all columns aren't mentioned in the query. If the unmentioned columns // include partition columns, this is an error. List<String> missingColumnNames = Lists.newArrayList(); for (Column column : table_.getColumns()) { if (!mentionedColumnNames.contains(column.getName())) { // HBase tables have a single row-key column which is always in position 0. It // must be mentioned, since it is invalid to set it to NULL (which would // otherwise happen by default). if (isHBaseTable && column.getPosition() == 0) { throw new AnalysisException( "Row-key column '" + column.getName() + "' must be explicitly mentioned in column permutation."); } if (column.getPosition() < numClusteringCols) { missingColumnNames.add(column.getName()); } } } if (!missingColumnNames.isEmpty()) { throw new AnalysisException( "Not enough partition columns mentioned in query. Missing columns are: " + Joiner.on(", ").join(missingColumnNames)); } } // Expect the selectListExpr to have entries for every target column if (selectExprTargetColumns.size() != numSelectListExprs) { String comparator = (selectExprTargetColumns.size() < numSelectListExprs) ? "fewer" : "more"; String partitionClause = (partitionKeyValues_ == null) ? "returns" : "and PARTITION clause return"; // If there was no column permutation provided, the error is that the select-list // has the wrong number of expressions compared to the number of columns in the // table. If there was a column permutation, then the mismatch is between the // select-list and the permutation itself. if (columnPermutation_ == null) { int totalColumnsMentioned = numSelectListExprs + numStaticPartitionExprs; throw new AnalysisException( String.format( "Target table '%s' has %s columns (%s) than the SELECT / VALUES clause %s" + " (%s)", table_.getFullName(), comparator, table_.getColumns().size(), partitionClause, totalColumnsMentioned)); } else { String partitionPrefix = (partitionKeyValues_ == null) ? "mentions" : "and PARTITION clause mention"; throw new AnalysisException( String.format( "Column permutation %s %s columns (%s) than " + "the SELECT / VALUES clause %s (%s)", partitionPrefix, comparator, selectExprTargetColumns.size(), partitionClause, numSelectListExprs)); } } }
/** * Sets table_ based on targetTableName_ and performs table-type specific analysis: - Partition * clause is invalid for unpartitioned Hdfs tables and HBase tables - Overwrite is invalid for * HBase tables - Check INSERT privileges as well as write access to Hdfs paths - Cannot insert * into a view Adds table_ to the analyzer's descriptor table if analysis succeeds. */ private void setTargetTable(Analyzer analyzer) throws AnalysisException { // If the table has not yet been set, load it from the Catalog. This allows for // callers to set a table to analyze that may not actually be created in the Catalog. // One example use case is CREATE TABLE AS SELECT which must run analysis on the // INSERT before the table has actually been created. if (table_ == null) { if (!targetTableName_.isFullyQualified()) { targetTableName_ = new TableName(analyzer.getDefaultDb(), targetTableName_.getTbl()); } table_ = analyzer.getTable(targetTableName_, Privilege.INSERT); } else { targetTableName_ = new TableName(table_.getDb().getName(), table_.getName()); PrivilegeRequestBuilder pb = new PrivilegeRequestBuilder(); analyzer.registerPrivReq( pb.onTable(table_.getDb().getName(), table_.getName()) .allOf(Privilege.INSERT) .toRequest()); } // We do not support inserting into views. if (table_ instanceof View) { throw new AnalysisException( String.format("Impala does not support inserting into views: %s", table_.getFullName())); } boolean isHBaseTable = (table_ instanceof HBaseTable); int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols(); if (partitionKeyValues_ != null && numClusteringCols == 0) { if (isHBaseTable) { throw new AnalysisException( "PARTITION clause is not valid for INSERT into " + "HBase tables. '" + targetTableName_ + "' is an HBase table"); } else { // Unpartitioned table, but INSERT has PARTITION clause throw new AnalysisException( "PARTITION clause is only valid for INSERT into " + "partitioned table. '" + targetTableName_ + "' is not partitioned"); } } if (table_ instanceof HdfsTable) { HdfsTable hdfsTable = (HdfsTable) table_; if (!hdfsTable.hasWriteAccess()) { throw new AnalysisException( String.format( "Unable to INSERT into target table " + "(%s) because Impala does not have WRITE access to at least one HDFS path" + ": %s", targetTableName_, hdfsTable.getFirstLocationWithoutWriteAccess())); } for (int colIdx = 0; colIdx < numClusteringCols; ++colIdx) { Column col = hdfsTable.getColumns().get(colIdx); // Hive has a number of issues handling BOOLEAN partition columns (see HIVE-6590). // Instead of working around the Hive bugs, INSERT is disabled for BOOLEAN // partitions in Impala. Once the Hive JIRA is resolved, we can remove this // analysis check. if (col.getType() == Type.BOOLEAN) { throw new AnalysisException( String.format( "INSERT into table with BOOLEAN " + "partition column (%s) is not supported: %s", col.getName(), targetTableName_)); } } } if (isHBaseTable && overwrite_) { throw new AnalysisException("HBase doesn't have a way to perform INSERT OVERWRITE"); } // Add target table to descriptor table. analyzer.getDescTbl().addReferencedTable(table_); }
@Override public void analyze(Analyzer analyzer) throws AnalysisException { if (isExplain_) analyzer.setIsExplain(); try { if (withClause_ != null) withClause_.analyze(analyzer); } catch (AnalysisException e) { // Ignore AnalysisExceptions if tables are missing to ensure the maximum number // of missing tables can be collected before failing analyze(). if (analyzer.getMissingTbls().isEmpty()) throw e; } List<Expr> selectListExprs = null; if (!needsGeneratedQueryStatement_) { try { // Use a child analyzer for the query stmt to properly scope WITH-clause // views and to ignore irrelevant ORDER BYs. Analyzer queryStmtAnalyzer = new Analyzer(analyzer); queryStmt_.analyze(queryStmtAnalyzer); if (analyzer.containsSubquery()) { Preconditions.checkState(queryStmt_ instanceof SelectStmt); StmtRewriter.rewriteStatement((SelectStmt) queryStmt_, queryStmtAnalyzer); queryStmt_ = queryStmt_.clone(); queryStmtAnalyzer = new Analyzer(analyzer); queryStmt_.analyze(queryStmtAnalyzer); } selectListExprs = Expr.cloneList(queryStmt_.getBaseTblResultExprs()); } catch (AnalysisException e) { if (analyzer.getMissingTbls().isEmpty()) throw e; } } else { selectListExprs = Lists.newArrayList(); } // Set target table and perform table-type specific analysis and auth checking. // Also checks if the target table is missing. setTargetTable(analyzer); // Abort analysis if there are any missing tables beyond this point. if (!analyzer.getMissingTbls().isEmpty()) { throw new AnalysisException("Found missing tables. Aborting analysis."); } boolean isHBaseTable = (table_ instanceof HBaseTable); int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols(); // Analysis of the INSERT statement from this point is basically the act of matching // the set of output columns (which come from a column permutation, perhaps // implicitly, and the PARTITION clause) to the set of input columns (which come from // the select-list and any statically-valued columns in the PARTITION clause). // // First, we compute the set of mentioned columns, and reject statements that refer to // non-existent columns, or duplicates (we must check both the column permutation, and // the set of partition keys). Next, we check that all partition columns are // mentioned. During this process we build the map from select-list expr index to // column in the targeted table. // // Then we check that the select-list contains exactly the right number of expressions // for all mentioned columns which are not statically-valued partition columns (which // get their expressions from partitionKeyValues). // // Finally, prepareExpressions analyzes the expressions themselves, and confirms that // they are type-compatible with the target columns. Where columns are not mentioned // (and by this point, we know that missing columns are not partition columns), // prepareExpressions assigns them a NULL literal expressions. // An null permutation clause is the same as listing all non-partition columns in // order. List<String> analysisColumnPermutation = columnPermutation_; if (analysisColumnPermutation == null) { analysisColumnPermutation = Lists.newArrayList(); ArrayList<Column> tableColumns = table_.getColumns(); for (int i = numClusteringCols; i < tableColumns.size(); ++i) { analysisColumnPermutation.add(tableColumns.get(i).getName()); } } // selectExprTargetColumns maps from select expression index to a column in the target // table. It will eventually include all mentioned columns that aren't static-valued // partition columns. ArrayList<Column> selectExprTargetColumns = Lists.newArrayList(); // Tracks the name of all columns encountered in either the permutation clause or the // partition clause to detect duplicates. Set<String> mentionedColumnNames = Sets.newHashSet(); for (String columnName : analysisColumnPermutation) { Column column = table_.getColumn(columnName); if (column == null) { throw new AnalysisException("Unknown column '" + columnName + "' in column permutation"); } if (!mentionedColumnNames.add(columnName)) { throw new AnalysisException("Duplicate column '" + columnName + "' in column permutation"); } selectExprTargetColumns.add(column); } int numStaticPartitionExprs = 0; if (partitionKeyValues_ != null) { for (PartitionKeyValue pkv : partitionKeyValues_) { Column column = table_.getColumn(pkv.getColName()); if (column == null) { throw new AnalysisException( "Unknown column '" + pkv.getColName() + "' in partition clause"); } if (column.getPosition() >= numClusteringCols) { throw new AnalysisException( "Column '" + pkv.getColName() + "' is not a partition column"); } if (!mentionedColumnNames.add(pkv.getColName())) { throw new AnalysisException( "Duplicate column '" + pkv.getColName() + "' in partition clause"); } if (!pkv.isDynamic()) { numStaticPartitionExprs++; } else { selectExprTargetColumns.add(column); } } } // Checks that exactly all columns in the target table are assigned an expr. checkColumnCoverage( selectExprTargetColumns, mentionedColumnNames, selectListExprs.size(), numStaticPartitionExprs); // Make sure static partition key values only contain const exprs. if (partitionKeyValues_ != null) { for (PartitionKeyValue kv : partitionKeyValues_) { kv.analyze(analyzer); } } // Populate partitionKeyExprs from partitionKeyValues and selectExprTargetColumns prepareExpressions(selectExprTargetColumns, selectListExprs, table_, analyzer); // Analyze plan hints at the end to prefer reporting other error messages first // (e.g., the PARTITION clause is not applicable to unpartitioned and HBase tables). analyzePlanHints(analyzer); }