@Override public void analyze(Analyzer analyzer) throws AnalysisException, AuthorizationException { super.analyze(analyzer); Table t = getTargetTable(); String tableName = getDb() + "." + getTbl(); // Verify there are no conflicts with partition columns. for (FieldSchema fs : t.getMetaStoreTable().getPartitionKeys()) { if (fs.getName().toLowerCase().equals(colName.toLowerCase())) { throw new AnalysisException("Cannot modify partition column: " + colName); } if (fs.getName().toLowerCase().equals(newColDef.getColName().toLowerCase())) { throw new AnalysisException( "Column name conflicts with existing partition column: " + newColDef.getColName()); } } // Verify the column being modified exists in the table if (t.getColumn(colName) == null) { throw new AnalysisException( String.format("Column '%s' does not exist in table: %s", colName, tableName)); } // Check that the new column def's name is valid. newColDef.analyze(); // Verify that if the column name is being changed, the new name doesn't conflict // with an existing column. if (!colName.toLowerCase().equals(newColDef.getColName().toLowerCase()) && t.getColumn(newColDef.getColName()) != null) { throw new AnalysisException("Column already exists: " + newColDef.getColName()); } }
/* * Builds a TDescribeTableResult that contains the result of a DESCRIBE FORMATTED * <table> command. For the formatted describe output the goal is to be exactly the * same as what Hive (via HiveServer2) outputs, for compatibility reasons. To do this, * Hive's MetadataFormatUtils class is used to build the results. */ private static TDescribeTableResult describeTableFormatted(Table table) { TDescribeTableResult descResult = new TDescribeTableResult(); descResult.results = Lists.newArrayList(); org.apache.hadoop.hive.metastore.api.Table msTable = table.getMetaStoreTable().deepCopy(); // Fixup the metastore table so the output of DESCRIBE FORMATTED matches Hive's. // This is to distinguish between empty comments and no comments (value is null). for (FieldSchema fs : msTable.getSd().getCols()) fs.setComment(table.getColumn(fs.getName()).getComment()); for (FieldSchema fs : msTable.getPartitionKeys()) { fs.setComment(table.getColumn(fs.getName()).getComment()); } // To avoid initializing any of the SerDe classes in the metastore table Thrift // struct, create the ql.metadata.Table object by calling the empty c'tor and // then calling setTTable(). org.apache.hadoop.hive.ql.metadata.Table hiveTable = new org.apache.hadoop.hive.ql.metadata.Table(); hiveTable.setTTable(msTable); StringBuilder sb = new StringBuilder(); // First add all the columns (includes partition columns). sb.append( MetaDataFormatUtils.getAllColumnsInformation( msTable.getSd().getCols(), msTable.getPartitionKeys())); // Add the extended table metadata information. sb.append(MetaDataFormatUtils.getTableInformation(hiveTable)); for (String line : sb.toString().split("\n")) { // To match Hive's HiveServer2 output, split each line into multiple column // values based on the field delimiter. String[] columns = line.split(MetaDataFormatUtils.FIELD_DELIM); TResultRow resultRow = new TResultRow(); for (int i = 0; i < NUM_DESC_FORMATTED_RESULT_COLS; ++i) { TColumnValue colVal = new TColumnValue(); colVal.setString_val(null); if (columns.length > i) { // Add the column value. colVal.setString_val(columns[i]); } resultRow.addToColVals(colVal); } descResult.results.add(resultRow); } return descResult; }
/** * Performs three final parts of the analysis: 1. Checks type compatibility between all * expressions and their targets * * <p>2. Populates partitionKeyExprs with type-compatible expressions, in Hive partition-column * order, for all partition columns * * <p>3. Populates resultExprs_ with type-compatible expressions, in Hive column order, for all * expressions in the select-list. Unmentioned columns are assigned NULL literal expressions. * * <p>If necessary, adds casts to the expressions to make them compatible with the type of the * corresponding column. * * @throws AnalysisException If an expression is not compatible with its target column */ private void prepareExpressions( List<Column> selectExprTargetColumns, List<Expr> selectListExprs, Table tbl, Analyzer analyzer) throws AnalysisException { // Temporary lists of partition key exprs and names in an arbitrary order. List<Expr> tmpPartitionKeyExprs = new ArrayList<Expr>(); List<String> tmpPartitionKeyNames = new ArrayList<String>(); int numClusteringCols = (tbl instanceof HBaseTable) ? 0 : tbl.getNumClusteringCols(); // Check dynamic partition columns for type compatibility. for (int i = 0; i < selectListExprs.size(); ++i) { Column targetColumn = selectExprTargetColumns.get(i); Expr compatibleExpr = checkTypeCompatibility(targetColumn, selectListExprs.get(i)); if (targetColumn.getPosition() < numClusteringCols) { // This is a dynamic clustering column tmpPartitionKeyExprs.add(compatibleExpr); tmpPartitionKeyNames.add(targetColumn.getName()); } selectListExprs.set(i, compatibleExpr); } // Check static partition columns, dynamic entries in partitionKeyValues will already // be in selectExprTargetColumns and therefore are ignored in this loop if (partitionKeyValues_ != null) { for (PartitionKeyValue pkv : partitionKeyValues_) { if (pkv.isStatic()) { // tableColumns is guaranteed to exist after the earlier analysis checks Column tableColumn = table_.getColumn(pkv.getColName()); Expr compatibleExpr = checkTypeCompatibility(tableColumn, pkv.getValue()); tmpPartitionKeyExprs.add(compatibleExpr); tmpPartitionKeyNames.add(pkv.getColName()); } } } // Reorder the partition key exprs and names to be consistent with the target table // declaration. We need those exprs in the original order to create the corresponding // Hdfs folder structure correctly. for (Column c : table_.getColumns()) { for (int j = 0; j < tmpPartitionKeyNames.size(); ++j) { if (c.getName().equals(tmpPartitionKeyNames.get(j))) { partitionKeyExprs_.add(tmpPartitionKeyExprs.get(j)); break; } } } Preconditions.checkState(partitionKeyExprs_.size() == numClusteringCols); // Make sure we have stats for partitionKeyExprs for (Expr expr : partitionKeyExprs_) { expr.analyze(analyzer); } // Finally, 'undo' the permutation so that the selectListExprs are in Hive column // order, and add NULL expressions to all missing columns. for (Column tblColumn : table_.getColumnsInHiveOrder()) { boolean matchFound = false; for (int i = 0; i < selectListExprs.size(); ++i) { if (selectExprTargetColumns.get(i).getName().equals(tblColumn.getName())) { resultExprs_.add(selectListExprs.get(i)); matchFound = true; break; } } // If no match is found, either the column is a clustering column with a static // value, or it was unmentioned and therefore should have a NULL select-list // expression. if (!matchFound) { if (tblColumn.getPosition() >= numClusteringCols) { // Unmentioned non-clustering columns get NULL literals with the appropriate // target type because Parquet cannot handle NULL_TYPE (IMPALA-617). resultExprs_.add(NullLiteral.create(tblColumn.getType())); } } } // TODO: Check that HBase row-key columns are not NULL? See IMPALA-406 if (needsGeneratedQueryStatement_) { // Build a query statement that returns NULL for every column List<SelectListItem> selectListItems = Lists.newArrayList(); for (Expr e : resultExprs_) { selectListItems.add(new SelectListItem(e, null)); } SelectList selectList = new SelectList(selectListItems); queryStmt_ = new SelectStmt(selectList, null, null, null, null, null, null); queryStmt_.analyze(analyzer); } }
@Override public void analyze(Analyzer analyzer) throws AnalysisException { if (isExplain_) analyzer.setIsExplain(); try { if (withClause_ != null) withClause_.analyze(analyzer); } catch (AnalysisException e) { // Ignore AnalysisExceptions if tables are missing to ensure the maximum number // of missing tables can be collected before failing analyze(). if (analyzer.getMissingTbls().isEmpty()) throw e; } List<Expr> selectListExprs = null; if (!needsGeneratedQueryStatement_) { try { // Use a child analyzer for the query stmt to properly scope WITH-clause // views and to ignore irrelevant ORDER BYs. Analyzer queryStmtAnalyzer = new Analyzer(analyzer); queryStmt_.analyze(queryStmtAnalyzer); if (analyzer.containsSubquery()) { Preconditions.checkState(queryStmt_ instanceof SelectStmt); StmtRewriter.rewriteStatement((SelectStmt) queryStmt_, queryStmtAnalyzer); queryStmt_ = queryStmt_.clone(); queryStmtAnalyzer = new Analyzer(analyzer); queryStmt_.analyze(queryStmtAnalyzer); } selectListExprs = Expr.cloneList(queryStmt_.getBaseTblResultExprs()); } catch (AnalysisException e) { if (analyzer.getMissingTbls().isEmpty()) throw e; } } else { selectListExprs = Lists.newArrayList(); } // Set target table and perform table-type specific analysis and auth checking. // Also checks if the target table is missing. setTargetTable(analyzer); // Abort analysis if there are any missing tables beyond this point. if (!analyzer.getMissingTbls().isEmpty()) { throw new AnalysisException("Found missing tables. Aborting analysis."); } boolean isHBaseTable = (table_ instanceof HBaseTable); int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols(); // Analysis of the INSERT statement from this point is basically the act of matching // the set of output columns (which come from a column permutation, perhaps // implicitly, and the PARTITION clause) to the set of input columns (which come from // the select-list and any statically-valued columns in the PARTITION clause). // // First, we compute the set of mentioned columns, and reject statements that refer to // non-existent columns, or duplicates (we must check both the column permutation, and // the set of partition keys). Next, we check that all partition columns are // mentioned. During this process we build the map from select-list expr index to // column in the targeted table. // // Then we check that the select-list contains exactly the right number of expressions // for all mentioned columns which are not statically-valued partition columns (which // get their expressions from partitionKeyValues). // // Finally, prepareExpressions analyzes the expressions themselves, and confirms that // they are type-compatible with the target columns. Where columns are not mentioned // (and by this point, we know that missing columns are not partition columns), // prepareExpressions assigns them a NULL literal expressions. // An null permutation clause is the same as listing all non-partition columns in // order. List<String> analysisColumnPermutation = columnPermutation_; if (analysisColumnPermutation == null) { analysisColumnPermutation = Lists.newArrayList(); ArrayList<Column> tableColumns = table_.getColumns(); for (int i = numClusteringCols; i < tableColumns.size(); ++i) { analysisColumnPermutation.add(tableColumns.get(i).getName()); } } // selectExprTargetColumns maps from select expression index to a column in the target // table. It will eventually include all mentioned columns that aren't static-valued // partition columns. ArrayList<Column> selectExprTargetColumns = Lists.newArrayList(); // Tracks the name of all columns encountered in either the permutation clause or the // partition clause to detect duplicates. Set<String> mentionedColumnNames = Sets.newHashSet(); for (String columnName : analysisColumnPermutation) { Column column = table_.getColumn(columnName); if (column == null) { throw new AnalysisException("Unknown column '" + columnName + "' in column permutation"); } if (!mentionedColumnNames.add(columnName)) { throw new AnalysisException("Duplicate column '" + columnName + "' in column permutation"); } selectExprTargetColumns.add(column); } int numStaticPartitionExprs = 0; if (partitionKeyValues_ != null) { for (PartitionKeyValue pkv : partitionKeyValues_) { Column column = table_.getColumn(pkv.getColName()); if (column == null) { throw new AnalysisException( "Unknown column '" + pkv.getColName() + "' in partition clause"); } if (column.getPosition() >= numClusteringCols) { throw new AnalysisException( "Column '" + pkv.getColName() + "' is not a partition column"); } if (!mentionedColumnNames.add(pkv.getColName())) { throw new AnalysisException( "Duplicate column '" + pkv.getColName() + "' in partition clause"); } if (!pkv.isDynamic()) { numStaticPartitionExprs++; } else { selectExprTargetColumns.add(column); } } } // Checks that exactly all columns in the target table are assigned an expr. checkColumnCoverage( selectExprTargetColumns, mentionedColumnNames, selectListExprs.size(), numStaticPartitionExprs); // Make sure static partition key values only contain const exprs. if (partitionKeyValues_ != null) { for (PartitionKeyValue kv : partitionKeyValues_) { kv.analyze(analyzer); } } // Populate partitionKeyExprs from partitionKeyValues and selectExprTargetColumns prepareExpressions(selectExprTargetColumns, selectListExprs, table_, analyzer); // Analyze plan hints at the end to prefer reporting other error messages first // (e.g., the PARTITION clause is not applicable to unpartitioned and HBase tables). analyzePlanHints(analyzer); }