@Override public void analyze(Analyzer analyzer) throws AnalysisException, AuthorizationException { super.analyze(analyzer); Table t = getTargetTable(); String tableName = getDb() + "." + getTbl(); // Verify there are no conflicts with partition columns. for (FieldSchema fs : t.getMetaStoreTable().getPartitionKeys()) { if (fs.getName().toLowerCase().equals(colName.toLowerCase())) { throw new AnalysisException("Cannot modify partition column: " + colName); } if (fs.getName().toLowerCase().equals(newColDef.getColName().toLowerCase())) { throw new AnalysisException( "Column name conflicts with existing partition column: " + newColDef.getColName()); } } // Verify the column being modified exists in the table if (t.getColumn(colName) == null) { throw new AnalysisException( String.format("Column '%s' does not exist in table: %s", colName, tableName)); } // Check that the new column def's name is valid. newColDef.analyze(); // Verify that if the column name is being changed, the new name doesn't conflict // with an existing column. if (!colName.toLowerCase().equals(newColDef.getColName().toLowerCase()) && t.getColumn(newColDef.getColName()) != null) { throw new AnalysisException("Column already exists: " + newColDef.getColName()); } }
/* * Builds a TDescribeTableResult that contains the result of a DESCRIBE FORMATTED * <table> command. For the formatted describe output the goal is to be exactly the * same as what Hive (via HiveServer2) outputs, for compatibility reasons. To do this, * Hive's MetadataFormatUtils class is used to build the results. */ private static TDescribeTableResult describeTableFormatted(Table table) { TDescribeTableResult descResult = new TDescribeTableResult(); descResult.results = Lists.newArrayList(); org.apache.hadoop.hive.metastore.api.Table msTable = table.getMetaStoreTable().deepCopy(); // Fixup the metastore table so the output of DESCRIBE FORMATTED matches Hive's. // This is to distinguish between empty comments and no comments (value is null). for (FieldSchema fs : msTable.getSd().getCols()) fs.setComment(table.getColumn(fs.getName()).getComment()); for (FieldSchema fs : msTable.getPartitionKeys()) { fs.setComment(table.getColumn(fs.getName()).getComment()); } // To avoid initializing any of the SerDe classes in the metastore table Thrift // struct, create the ql.metadata.Table object by calling the empty c'tor and // then calling setTTable(). org.apache.hadoop.hive.ql.metadata.Table hiveTable = new org.apache.hadoop.hive.ql.metadata.Table(); hiveTable.setTTable(msTable); StringBuilder sb = new StringBuilder(); // First add all the columns (includes partition columns). sb.append( MetaDataFormatUtils.getAllColumnsInformation( msTable.getSd().getCols(), msTable.getPartitionKeys())); // Add the extended table metadata information. sb.append(MetaDataFormatUtils.getTableInformation(hiveTable)); for (String line : sb.toString().split("\n")) { // To match Hive's HiveServer2 output, split each line into multiple column // values based on the field delimiter. String[] columns = line.split(MetaDataFormatUtils.FIELD_DELIM); TResultRow resultRow = new TResultRow(); for (int i = 0; i < NUM_DESC_FORMATTED_RESULT_COLS; ++i) { TColumnValue colVal = new TColumnValue(); colVal.setString_val(null); if (columns.length > i) { // Add the column value. colVal.setString_val(columns[i]); } resultRow.addToColVals(colVal); } descResult.results.add(resultRow); } return descResult; }
/* * Builds results for a DESCRIBE <table> command. This consists of the column * definition for each column in the table. */ private static TDescribeTableResult describeTableMinimal(Table table) { TDescribeTableResult descResult = new TDescribeTableResult(); descResult.results = Lists.newArrayList(); // Get description of all the table's columns (includes partition columns). for (Column column : table.getColumnsInHiveOrder()) { TColumnValue colNameCol = new TColumnValue(); colNameCol.setString_val(column.getName()); TColumnValue dataTypeCol = new TColumnValue(); dataTypeCol.setString_val(column.getType().toString().toLowerCase()); TColumnValue commentCol = new TColumnValue(); commentCol.setString_val(column.getComment() != null ? column.getComment() : ""); descResult.results.add( new TResultRow(Lists.newArrayList(colNameCol, dataTypeCol, commentCol))); } return descResult; }
/** * Performs three final parts of the analysis: 1. Checks type compatibility between all * expressions and their targets * * <p>2. Populates partitionKeyExprs with type-compatible expressions, in Hive partition-column * order, for all partition columns * * <p>3. Populates resultExprs_ with type-compatible expressions, in Hive column order, for all * expressions in the select-list. Unmentioned columns are assigned NULL literal expressions. * * <p>If necessary, adds casts to the expressions to make them compatible with the type of the * corresponding column. * * @throws AnalysisException If an expression is not compatible with its target column */ private void prepareExpressions( List<Column> selectExprTargetColumns, List<Expr> selectListExprs, Table tbl, Analyzer analyzer) throws AnalysisException { // Temporary lists of partition key exprs and names in an arbitrary order. List<Expr> tmpPartitionKeyExprs = new ArrayList<Expr>(); List<String> tmpPartitionKeyNames = new ArrayList<String>(); int numClusteringCols = (tbl instanceof HBaseTable) ? 0 : tbl.getNumClusteringCols(); // Check dynamic partition columns for type compatibility. for (int i = 0; i < selectListExprs.size(); ++i) { Column targetColumn = selectExprTargetColumns.get(i); Expr compatibleExpr = checkTypeCompatibility(targetColumn, selectListExprs.get(i)); if (targetColumn.getPosition() < numClusteringCols) { // This is a dynamic clustering column tmpPartitionKeyExprs.add(compatibleExpr); tmpPartitionKeyNames.add(targetColumn.getName()); } selectListExprs.set(i, compatibleExpr); } // Check static partition columns, dynamic entries in partitionKeyValues will already // be in selectExprTargetColumns and therefore are ignored in this loop if (partitionKeyValues_ != null) { for (PartitionKeyValue pkv : partitionKeyValues_) { if (pkv.isStatic()) { // tableColumns is guaranteed to exist after the earlier analysis checks Column tableColumn = table_.getColumn(pkv.getColName()); Expr compatibleExpr = checkTypeCompatibility(tableColumn, pkv.getValue()); tmpPartitionKeyExprs.add(compatibleExpr); tmpPartitionKeyNames.add(pkv.getColName()); } } } // Reorder the partition key exprs and names to be consistent with the target table // declaration. We need those exprs in the original order to create the corresponding // Hdfs folder structure correctly. for (Column c : table_.getColumns()) { for (int j = 0; j < tmpPartitionKeyNames.size(); ++j) { if (c.getName().equals(tmpPartitionKeyNames.get(j))) { partitionKeyExprs_.add(tmpPartitionKeyExprs.get(j)); break; } } } Preconditions.checkState(partitionKeyExprs_.size() == numClusteringCols); // Make sure we have stats for partitionKeyExprs for (Expr expr : partitionKeyExprs_) { expr.analyze(analyzer); } // Finally, 'undo' the permutation so that the selectListExprs are in Hive column // order, and add NULL expressions to all missing columns. for (Column tblColumn : table_.getColumnsInHiveOrder()) { boolean matchFound = false; for (int i = 0; i < selectListExprs.size(); ++i) { if (selectExprTargetColumns.get(i).getName().equals(tblColumn.getName())) { resultExprs_.add(selectListExprs.get(i)); matchFound = true; break; } } // If no match is found, either the column is a clustering column with a static // value, or it was unmentioned and therefore should have a NULL select-list // expression. if (!matchFound) { if (tblColumn.getPosition() >= numClusteringCols) { // Unmentioned non-clustering columns get NULL literals with the appropriate // target type because Parquet cannot handle NULL_TYPE (IMPALA-617). resultExprs_.add(NullLiteral.create(tblColumn.getType())); } } } // TODO: Check that HBase row-key columns are not NULL? See IMPALA-406 if (needsGeneratedQueryStatement_) { // Build a query statement that returns NULL for every column List<SelectListItem> selectListItems = Lists.newArrayList(); for (Expr e : resultExprs_) { selectListItems.add(new SelectListItem(e, null)); } SelectList selectList = new SelectList(selectListItems); queryStmt_ = new SelectStmt(selectList, null, null, null, null, null, null); queryStmt_.analyze(analyzer); } }
/** * Checks that the column permutation + select list + static partition exprs + dynamic partition * exprs collectively cover exactly all columns in the target table (not more of fewer). */ private void checkColumnCoverage( ArrayList<Column> selectExprTargetColumns, Set<String> mentionedColumnNames, int numSelectListExprs, int numStaticPartitionExprs) throws AnalysisException { boolean isHBaseTable = (table_ instanceof HBaseTable); int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols(); // Check that all columns are mentioned by the permutation and partition clauses if (selectExprTargetColumns.size() + numStaticPartitionExprs != table_.getColumns().size()) { // We've already ruled out too many columns in the permutation and partition clauses // by checking that there are no duplicates and that every column mentioned actually // exists. So all columns aren't mentioned in the query. If the unmentioned columns // include partition columns, this is an error. List<String> missingColumnNames = Lists.newArrayList(); for (Column column : table_.getColumns()) { if (!mentionedColumnNames.contains(column.getName())) { // HBase tables have a single row-key column which is always in position 0. It // must be mentioned, since it is invalid to set it to NULL (which would // otherwise happen by default). if (isHBaseTable && column.getPosition() == 0) { throw new AnalysisException( "Row-key column '" + column.getName() + "' must be explicitly mentioned in column permutation."); } if (column.getPosition() < numClusteringCols) { missingColumnNames.add(column.getName()); } } } if (!missingColumnNames.isEmpty()) { throw new AnalysisException( "Not enough partition columns mentioned in query. Missing columns are: " + Joiner.on(", ").join(missingColumnNames)); } } // Expect the selectListExpr to have entries for every target column if (selectExprTargetColumns.size() != numSelectListExprs) { String comparator = (selectExprTargetColumns.size() < numSelectListExprs) ? "fewer" : "more"; String partitionClause = (partitionKeyValues_ == null) ? "returns" : "and PARTITION clause return"; // If there was no column permutation provided, the error is that the select-list // has the wrong number of expressions compared to the number of columns in the // table. If there was a column permutation, then the mismatch is between the // select-list and the permutation itself. if (columnPermutation_ == null) { int totalColumnsMentioned = numSelectListExprs + numStaticPartitionExprs; throw new AnalysisException( String.format( "Target table '%s' has %s columns (%s) than the SELECT / VALUES clause %s" + " (%s)", table_.getFullName(), comparator, table_.getColumns().size(), partitionClause, totalColumnsMentioned)); } else { String partitionPrefix = (partitionKeyValues_ == null) ? "mentions" : "and PARTITION clause mention"; throw new AnalysisException( String.format( "Column permutation %s %s columns (%s) than " + "the SELECT / VALUES clause %s (%s)", partitionPrefix, comparator, selectExprTargetColumns.size(), partitionClause, numSelectListExprs)); } } }
/** * Sets table_ based on targetTableName_ and performs table-type specific analysis: - Partition * clause is invalid for unpartitioned Hdfs tables and HBase tables - Overwrite is invalid for * HBase tables - Check INSERT privileges as well as write access to Hdfs paths - Cannot insert * into a view Adds table_ to the analyzer's descriptor table if analysis succeeds. */ private void setTargetTable(Analyzer analyzer) throws AnalysisException { // If the table has not yet been set, load it from the Catalog. This allows for // callers to set a table to analyze that may not actually be created in the Catalog. // One example use case is CREATE TABLE AS SELECT which must run analysis on the // INSERT before the table has actually been created. if (table_ == null) { if (!targetTableName_.isFullyQualified()) { targetTableName_ = new TableName(analyzer.getDefaultDb(), targetTableName_.getTbl()); } table_ = analyzer.getTable(targetTableName_, Privilege.INSERT); } else { targetTableName_ = new TableName(table_.getDb().getName(), table_.getName()); PrivilegeRequestBuilder pb = new PrivilegeRequestBuilder(); analyzer.registerPrivReq( pb.onTable(table_.getDb().getName(), table_.getName()) .allOf(Privilege.INSERT) .toRequest()); } // We do not support inserting into views. if (table_ instanceof View) { throw new AnalysisException( String.format("Impala does not support inserting into views: %s", table_.getFullName())); } boolean isHBaseTable = (table_ instanceof HBaseTable); int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols(); if (partitionKeyValues_ != null && numClusteringCols == 0) { if (isHBaseTable) { throw new AnalysisException( "PARTITION clause is not valid for INSERT into " + "HBase tables. '" + targetTableName_ + "' is an HBase table"); } else { // Unpartitioned table, but INSERT has PARTITION clause throw new AnalysisException( "PARTITION clause is only valid for INSERT into " + "partitioned table. '" + targetTableName_ + "' is not partitioned"); } } if (table_ instanceof HdfsTable) { HdfsTable hdfsTable = (HdfsTable) table_; if (!hdfsTable.hasWriteAccess()) { throw new AnalysisException( String.format( "Unable to INSERT into target table " + "(%s) because Impala does not have WRITE access to at least one HDFS path" + ": %s", targetTableName_, hdfsTable.getFirstLocationWithoutWriteAccess())); } for (int colIdx = 0; colIdx < numClusteringCols; ++colIdx) { Column col = hdfsTable.getColumns().get(colIdx); // Hive has a number of issues handling BOOLEAN partition columns (see HIVE-6590). // Instead of working around the Hive bugs, INSERT is disabled for BOOLEAN // partitions in Impala. Once the Hive JIRA is resolved, we can remove this // analysis check. if (col.getType() == Type.BOOLEAN) { throw new AnalysisException( String.format( "INSERT into table with BOOLEAN " + "partition column (%s) is not supported: %s", col.getName(), targetTableName_)); } } } if (isHBaseTable && overwrite_) { throw new AnalysisException("HBase doesn't have a way to perform INSERT OVERWRITE"); } // Add target table to descriptor table. analyzer.getDescTbl().addReferencedTable(table_); }
@Override public void analyze(Analyzer analyzer) throws AnalysisException { if (isExplain_) analyzer.setIsExplain(); try { if (withClause_ != null) withClause_.analyze(analyzer); } catch (AnalysisException e) { // Ignore AnalysisExceptions if tables are missing to ensure the maximum number // of missing tables can be collected before failing analyze(). if (analyzer.getMissingTbls().isEmpty()) throw e; } List<Expr> selectListExprs = null; if (!needsGeneratedQueryStatement_) { try { // Use a child analyzer for the query stmt to properly scope WITH-clause // views and to ignore irrelevant ORDER BYs. Analyzer queryStmtAnalyzer = new Analyzer(analyzer); queryStmt_.analyze(queryStmtAnalyzer); if (analyzer.containsSubquery()) { Preconditions.checkState(queryStmt_ instanceof SelectStmt); StmtRewriter.rewriteStatement((SelectStmt) queryStmt_, queryStmtAnalyzer); queryStmt_ = queryStmt_.clone(); queryStmtAnalyzer = new Analyzer(analyzer); queryStmt_.analyze(queryStmtAnalyzer); } selectListExprs = Expr.cloneList(queryStmt_.getBaseTblResultExprs()); } catch (AnalysisException e) { if (analyzer.getMissingTbls().isEmpty()) throw e; } } else { selectListExprs = Lists.newArrayList(); } // Set target table and perform table-type specific analysis and auth checking. // Also checks if the target table is missing. setTargetTable(analyzer); // Abort analysis if there are any missing tables beyond this point. if (!analyzer.getMissingTbls().isEmpty()) { throw new AnalysisException("Found missing tables. Aborting analysis."); } boolean isHBaseTable = (table_ instanceof HBaseTable); int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols(); // Analysis of the INSERT statement from this point is basically the act of matching // the set of output columns (which come from a column permutation, perhaps // implicitly, and the PARTITION clause) to the set of input columns (which come from // the select-list and any statically-valued columns in the PARTITION clause). // // First, we compute the set of mentioned columns, and reject statements that refer to // non-existent columns, or duplicates (we must check both the column permutation, and // the set of partition keys). Next, we check that all partition columns are // mentioned. During this process we build the map from select-list expr index to // column in the targeted table. // // Then we check that the select-list contains exactly the right number of expressions // for all mentioned columns which are not statically-valued partition columns (which // get their expressions from partitionKeyValues). // // Finally, prepareExpressions analyzes the expressions themselves, and confirms that // they are type-compatible with the target columns. Where columns are not mentioned // (and by this point, we know that missing columns are not partition columns), // prepareExpressions assigns them a NULL literal expressions. // An null permutation clause is the same as listing all non-partition columns in // order. List<String> analysisColumnPermutation = columnPermutation_; if (analysisColumnPermutation == null) { analysisColumnPermutation = Lists.newArrayList(); ArrayList<Column> tableColumns = table_.getColumns(); for (int i = numClusteringCols; i < tableColumns.size(); ++i) { analysisColumnPermutation.add(tableColumns.get(i).getName()); } } // selectExprTargetColumns maps from select expression index to a column in the target // table. It will eventually include all mentioned columns that aren't static-valued // partition columns. ArrayList<Column> selectExprTargetColumns = Lists.newArrayList(); // Tracks the name of all columns encountered in either the permutation clause or the // partition clause to detect duplicates. Set<String> mentionedColumnNames = Sets.newHashSet(); for (String columnName : analysisColumnPermutation) { Column column = table_.getColumn(columnName); if (column == null) { throw new AnalysisException("Unknown column '" + columnName + "' in column permutation"); } if (!mentionedColumnNames.add(columnName)) { throw new AnalysisException("Duplicate column '" + columnName + "' in column permutation"); } selectExprTargetColumns.add(column); } int numStaticPartitionExprs = 0; if (partitionKeyValues_ != null) { for (PartitionKeyValue pkv : partitionKeyValues_) { Column column = table_.getColumn(pkv.getColName()); if (column == null) { throw new AnalysisException( "Unknown column '" + pkv.getColName() + "' in partition clause"); } if (column.getPosition() >= numClusteringCols) { throw new AnalysisException( "Column '" + pkv.getColName() + "' is not a partition column"); } if (!mentionedColumnNames.add(pkv.getColName())) { throw new AnalysisException( "Duplicate column '" + pkv.getColName() + "' in partition clause"); } if (!pkv.isDynamic()) { numStaticPartitionExprs++; } else { selectExprTargetColumns.add(column); } } } // Checks that exactly all columns in the target table are assigned an expr. checkColumnCoverage( selectExprTargetColumns, mentionedColumnNames, selectListExprs.size(), numStaticPartitionExprs); // Make sure static partition key values only contain const exprs. if (partitionKeyValues_ != null) { for (PartitionKeyValue kv : partitionKeyValues_) { kv.analyze(analyzer); } } // Populate partitionKeyExprs from partitionKeyValues and selectExprTargetColumns prepareExpressions(selectExprTargetColumns, selectListExprs, table_, analyzer); // Analyze plan hints at the end to prefer reporting other error messages first // (e.g., the PARTITION clause is not applicable to unpartitioned and HBase tables). analyzePlanHints(analyzer); }