예제 #1
0
  /**
   * Checks that the column permutation + select list + static partition exprs + dynamic partition
   * exprs collectively cover exactly all columns in the target table (not more of fewer).
   */
  private void checkColumnCoverage(
      ArrayList<Column> selectExprTargetColumns,
      Set<String> mentionedColumnNames,
      int numSelectListExprs,
      int numStaticPartitionExprs)
      throws AnalysisException {
    boolean isHBaseTable = (table_ instanceof HBaseTable);
    int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols();
    // Check that all columns are mentioned by the permutation and partition clauses
    if (selectExprTargetColumns.size() + numStaticPartitionExprs != table_.getColumns().size()) {
      // We've already ruled out too many columns in the permutation and partition clauses
      // by checking that there are no duplicates and that every column mentioned actually
      // exists. So all columns aren't mentioned in the query. If the unmentioned columns
      // include partition columns, this is an error.
      List<String> missingColumnNames = Lists.newArrayList();
      for (Column column : table_.getColumns()) {
        if (!mentionedColumnNames.contains(column.getName())) {
          // HBase tables have a single row-key column which is always in position 0. It
          // must be mentioned, since it is invalid to set it to NULL (which would
          // otherwise happen by default).
          if (isHBaseTable && column.getPosition() == 0) {
            throw new AnalysisException(
                "Row-key column '"
                    + column.getName()
                    + "' must be explicitly mentioned in column permutation.");
          }
          if (column.getPosition() < numClusteringCols) {
            missingColumnNames.add(column.getName());
          }
        }
      }

      if (!missingColumnNames.isEmpty()) {
        throw new AnalysisException(
            "Not enough partition columns mentioned in query. Missing columns are: "
                + Joiner.on(", ").join(missingColumnNames));
      }
    }

    // Expect the selectListExpr to have entries for every target column
    if (selectExprTargetColumns.size() != numSelectListExprs) {
      String comparator = (selectExprTargetColumns.size() < numSelectListExprs) ? "fewer" : "more";
      String partitionClause =
          (partitionKeyValues_ == null) ? "returns" : "and PARTITION clause return";

      // If there was no column permutation provided, the error is that the select-list
      // has the wrong number of expressions compared to the number of columns in the
      // table. If there was a column permutation, then the mismatch is between the
      // select-list and the permutation itself.
      if (columnPermutation_ == null) {
        int totalColumnsMentioned = numSelectListExprs + numStaticPartitionExprs;
        throw new AnalysisException(
            String.format(
                "Target table '%s' has %s columns (%s) than the SELECT / VALUES clause %s"
                    + " (%s)",
                table_.getFullName(),
                comparator,
                table_.getColumns().size(),
                partitionClause,
                totalColumnsMentioned));
      } else {
        String partitionPrefix =
            (partitionKeyValues_ == null) ? "mentions" : "and PARTITION clause mention";
        throw new AnalysisException(
            String.format(
                "Column permutation %s %s columns (%s) than "
                    + "the SELECT / VALUES clause %s (%s)",
                partitionPrefix,
                comparator,
                selectExprTargetColumns.size(),
                partitionClause,
                numSelectListExprs));
      }
    }
  }
예제 #2
0
  /**
   * Performs three final parts of the analysis: 1. Checks type compatibility between all
   * expressions and their targets
   *
   * <p>2. Populates partitionKeyExprs with type-compatible expressions, in Hive partition-column
   * order, for all partition columns
   *
   * <p>3. Populates resultExprs_ with type-compatible expressions, in Hive column order, for all
   * expressions in the select-list. Unmentioned columns are assigned NULL literal expressions.
   *
   * <p>If necessary, adds casts to the expressions to make them compatible with the type of the
   * corresponding column.
   *
   * @throws AnalysisException If an expression is not compatible with its target column
   */
  private void prepareExpressions(
      List<Column> selectExprTargetColumns,
      List<Expr> selectListExprs,
      Table tbl,
      Analyzer analyzer)
      throws AnalysisException {
    // Temporary lists of partition key exprs and names in an arbitrary order.
    List<Expr> tmpPartitionKeyExprs = new ArrayList<Expr>();
    List<String> tmpPartitionKeyNames = new ArrayList<String>();

    int numClusteringCols = (tbl instanceof HBaseTable) ? 0 : tbl.getNumClusteringCols();

    // Check dynamic partition columns for type compatibility.
    for (int i = 0; i < selectListExprs.size(); ++i) {
      Column targetColumn = selectExprTargetColumns.get(i);
      Expr compatibleExpr = checkTypeCompatibility(targetColumn, selectListExprs.get(i));
      if (targetColumn.getPosition() < numClusteringCols) {
        // This is a dynamic clustering column
        tmpPartitionKeyExprs.add(compatibleExpr);
        tmpPartitionKeyNames.add(targetColumn.getName());
      }
      selectListExprs.set(i, compatibleExpr);
    }

    // Check static partition columns, dynamic entries in partitionKeyValues will already
    // be in selectExprTargetColumns and therefore are ignored in this loop
    if (partitionKeyValues_ != null) {
      for (PartitionKeyValue pkv : partitionKeyValues_) {
        if (pkv.isStatic()) {
          // tableColumns is guaranteed to exist after the earlier analysis checks
          Column tableColumn = table_.getColumn(pkv.getColName());
          Expr compatibleExpr = checkTypeCompatibility(tableColumn, pkv.getValue());
          tmpPartitionKeyExprs.add(compatibleExpr);
          tmpPartitionKeyNames.add(pkv.getColName());
        }
      }
    }

    // Reorder the partition key exprs and names to be consistent with the target table
    // declaration.  We need those exprs in the original order to create the corresponding
    // Hdfs folder structure correctly.
    for (Column c : table_.getColumns()) {
      for (int j = 0; j < tmpPartitionKeyNames.size(); ++j) {
        if (c.getName().equals(tmpPartitionKeyNames.get(j))) {
          partitionKeyExprs_.add(tmpPartitionKeyExprs.get(j));
          break;
        }
      }
    }

    Preconditions.checkState(partitionKeyExprs_.size() == numClusteringCols);
    // Make sure we have stats for partitionKeyExprs
    for (Expr expr : partitionKeyExprs_) {
      expr.analyze(analyzer);
    }

    // Finally, 'undo' the permutation so that the selectListExprs are in Hive column
    // order, and add NULL expressions to all missing columns.
    for (Column tblColumn : table_.getColumnsInHiveOrder()) {
      boolean matchFound = false;
      for (int i = 0; i < selectListExprs.size(); ++i) {
        if (selectExprTargetColumns.get(i).getName().equals(tblColumn.getName())) {
          resultExprs_.add(selectListExprs.get(i));
          matchFound = true;
          break;
        }
      }
      // If no match is found, either the column is a clustering column with a static
      // value, or it was unmentioned and therefore should have a NULL select-list
      // expression.
      if (!matchFound) {
        if (tblColumn.getPosition() >= numClusteringCols) {
          // Unmentioned non-clustering columns get NULL literals with the appropriate
          // target type because Parquet cannot handle NULL_TYPE (IMPALA-617).
          resultExprs_.add(NullLiteral.create(tblColumn.getType()));
        }
      }
    }
    // TODO: Check that HBase row-key columns are not NULL? See IMPALA-406
    if (needsGeneratedQueryStatement_) {
      // Build a query statement that returns NULL for every column
      List<SelectListItem> selectListItems = Lists.newArrayList();
      for (Expr e : resultExprs_) {
        selectListItems.add(new SelectListItem(e, null));
      }
      SelectList selectList = new SelectList(selectListItems);
      queryStmt_ = new SelectStmt(selectList, null, null, null, null, null, null);
      queryStmt_.analyze(analyzer);
    }
  }
예제 #3
0
  @Override
  public void analyze(Analyzer analyzer) throws AnalysisException {
    if (isExplain_) analyzer.setIsExplain();
    try {
      if (withClause_ != null) withClause_.analyze(analyzer);
    } catch (AnalysisException e) {
      // Ignore AnalysisExceptions if tables are missing to ensure the maximum number
      // of missing tables can be collected before failing analyze().
      if (analyzer.getMissingTbls().isEmpty()) throw e;
    }

    List<Expr> selectListExprs = null;
    if (!needsGeneratedQueryStatement_) {
      try {
        // Use a child analyzer for the query stmt to properly scope WITH-clause
        // views and to ignore irrelevant ORDER BYs.
        Analyzer queryStmtAnalyzer = new Analyzer(analyzer);
        queryStmt_.analyze(queryStmtAnalyzer);

        if (analyzer.containsSubquery()) {
          Preconditions.checkState(queryStmt_ instanceof SelectStmt);
          StmtRewriter.rewriteStatement((SelectStmt) queryStmt_, queryStmtAnalyzer);
          queryStmt_ = queryStmt_.clone();
          queryStmtAnalyzer = new Analyzer(analyzer);
          queryStmt_.analyze(queryStmtAnalyzer);
        }

        selectListExprs = Expr.cloneList(queryStmt_.getBaseTblResultExprs());
      } catch (AnalysisException e) {
        if (analyzer.getMissingTbls().isEmpty()) throw e;
      }
    } else {
      selectListExprs = Lists.newArrayList();
    }

    // Set target table and perform table-type specific analysis and auth checking.
    // Also checks if the target table is missing.
    setTargetTable(analyzer);

    // Abort analysis if there are any missing tables beyond this point.
    if (!analyzer.getMissingTbls().isEmpty()) {
      throw new AnalysisException("Found missing tables. Aborting analysis.");
    }

    boolean isHBaseTable = (table_ instanceof HBaseTable);
    int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols();

    // Analysis of the INSERT statement from this point is basically the act of matching
    // the set of output columns (which come from a column permutation, perhaps
    // implicitly, and the PARTITION clause) to the set of input columns (which come from
    // the select-list and any statically-valued columns in the PARTITION clause).
    //
    // First, we compute the set of mentioned columns, and reject statements that refer to
    // non-existent columns, or duplicates (we must check both the column permutation, and
    // the set of partition keys). Next, we check that all partition columns are
    // mentioned. During this process we build the map from select-list expr index to
    // column in the targeted table.
    //
    // Then we check that the select-list contains exactly the right number of expressions
    // for all mentioned columns which are not statically-valued partition columns (which
    // get their expressions from partitionKeyValues).
    //
    // Finally, prepareExpressions analyzes the expressions themselves, and confirms that
    // they are type-compatible with the target columns. Where columns are not mentioned
    // (and by this point, we know that missing columns are not partition columns),
    // prepareExpressions assigns them a NULL literal expressions.

    // An null permutation clause is the same as listing all non-partition columns in
    // order.
    List<String> analysisColumnPermutation = columnPermutation_;
    if (analysisColumnPermutation == null) {
      analysisColumnPermutation = Lists.newArrayList();
      ArrayList<Column> tableColumns = table_.getColumns();
      for (int i = numClusteringCols; i < tableColumns.size(); ++i) {
        analysisColumnPermutation.add(tableColumns.get(i).getName());
      }
    }

    // selectExprTargetColumns maps from select expression index to a column in the target
    // table. It will eventually include all mentioned columns that aren't static-valued
    // partition columns.
    ArrayList<Column> selectExprTargetColumns = Lists.newArrayList();

    // Tracks the name of all columns encountered in either the permutation clause or the
    // partition clause to detect duplicates.
    Set<String> mentionedColumnNames = Sets.newHashSet();
    for (String columnName : analysisColumnPermutation) {
      Column column = table_.getColumn(columnName);
      if (column == null) {
        throw new AnalysisException("Unknown column '" + columnName + "' in column permutation");
      }

      if (!mentionedColumnNames.add(columnName)) {
        throw new AnalysisException("Duplicate column '" + columnName + "' in column permutation");
      }
      selectExprTargetColumns.add(column);
    }

    int numStaticPartitionExprs = 0;
    if (partitionKeyValues_ != null) {
      for (PartitionKeyValue pkv : partitionKeyValues_) {
        Column column = table_.getColumn(pkv.getColName());
        if (column == null) {
          throw new AnalysisException(
              "Unknown column '" + pkv.getColName() + "' in partition clause");
        }

        if (column.getPosition() >= numClusteringCols) {
          throw new AnalysisException(
              "Column '" + pkv.getColName() + "' is not a partition column");
        }

        if (!mentionedColumnNames.add(pkv.getColName())) {
          throw new AnalysisException(
              "Duplicate column '" + pkv.getColName() + "' in partition clause");
        }
        if (!pkv.isDynamic()) {
          numStaticPartitionExprs++;
        } else {
          selectExprTargetColumns.add(column);
        }
      }
    }

    // Checks that exactly all columns in the target table are assigned an expr.
    checkColumnCoverage(
        selectExprTargetColumns,
        mentionedColumnNames,
        selectListExprs.size(),
        numStaticPartitionExprs);

    // Make sure static partition key values only contain const exprs.
    if (partitionKeyValues_ != null) {
      for (PartitionKeyValue kv : partitionKeyValues_) {
        kv.analyze(analyzer);
      }
    }

    // Populate partitionKeyExprs from partitionKeyValues and selectExprTargetColumns
    prepareExpressions(selectExprTargetColumns, selectListExprs, table_, analyzer);
    // Analyze plan hints at the end to prefer reporting other error messages first
    // (e.g., the PARTITION clause is not applicable to unpartitioned and HBase tables).
    analyzePlanHints(analyzer);
  }