Esempio n. 1
0
  @Override
  public void analyze(Analyzer analyzer) throws AnalysisException, AuthorizationException {
    super.analyze(analyzer);
    Table t = getTargetTable();
    String tableName = getDb() + "." + getTbl();

    // Verify there are no conflicts with partition columns.
    for (FieldSchema fs : t.getMetaStoreTable().getPartitionKeys()) {
      if (fs.getName().toLowerCase().equals(colName.toLowerCase())) {
        throw new AnalysisException("Cannot modify partition column: " + colName);
      }
      if (fs.getName().toLowerCase().equals(newColDef.getColName().toLowerCase())) {
        throw new AnalysisException(
            "Column name conflicts with existing partition column: " + newColDef.getColName());
      }
    }

    // Verify the column being modified exists in the table
    if (t.getColumn(colName) == null) {
      throw new AnalysisException(
          String.format("Column '%s' does not exist in table: %s", colName, tableName));
    }

    // Check that the new column def's name is valid.
    newColDef.analyze();
    // Verify that if the column name is being changed, the new name doesn't conflict
    // with an existing column.
    if (!colName.toLowerCase().equals(newColDef.getColName().toLowerCase())
        && t.getColumn(newColDef.getColName()) != null) {
      throw new AnalysisException("Column already exists: " + newColDef.getColName());
    }
  }
  /*
   * Builds a TDescribeTableResult that contains the result of a DESCRIBE FORMATTED
   * <table> command. For the formatted describe output the goal is to be exactly the
   * same as what Hive (via HiveServer2) outputs, for compatibility reasons. To do this,
   * Hive's MetadataFormatUtils class is used to build the results.
   */
  private static TDescribeTableResult describeTableFormatted(Table table) {
    TDescribeTableResult descResult = new TDescribeTableResult();
    descResult.results = Lists.newArrayList();

    org.apache.hadoop.hive.metastore.api.Table msTable = table.getMetaStoreTable().deepCopy();
    // Fixup the metastore table so the output of DESCRIBE FORMATTED matches Hive's.
    // This is to distinguish between empty comments and no comments (value is null).
    for (FieldSchema fs : msTable.getSd().getCols())
      fs.setComment(table.getColumn(fs.getName()).getComment());
    for (FieldSchema fs : msTable.getPartitionKeys()) {
      fs.setComment(table.getColumn(fs.getName()).getComment());
    }

    // To avoid initializing any of the SerDe classes in the metastore table Thrift
    // struct, create the ql.metadata.Table object by calling the empty c'tor and
    // then calling setTTable().
    org.apache.hadoop.hive.ql.metadata.Table hiveTable =
        new org.apache.hadoop.hive.ql.metadata.Table();
    hiveTable.setTTable(msTable);
    StringBuilder sb = new StringBuilder();
    // First add all the columns (includes partition columns).
    sb.append(
        MetaDataFormatUtils.getAllColumnsInformation(
            msTable.getSd().getCols(), msTable.getPartitionKeys()));
    // Add the extended table metadata information.
    sb.append(MetaDataFormatUtils.getTableInformation(hiveTable));

    for (String line : sb.toString().split("\n")) {
      // To match Hive's HiveServer2 output, split each line into multiple column
      // values based on the field delimiter.
      String[] columns = line.split(MetaDataFormatUtils.FIELD_DELIM);
      TResultRow resultRow = new TResultRow();
      for (int i = 0; i < NUM_DESC_FORMATTED_RESULT_COLS; ++i) {
        TColumnValue colVal = new TColumnValue();
        colVal.setString_val(null);
        if (columns.length > i) {
          // Add the column value.
          colVal.setString_val(columns[i]);
        }
        resultRow.addToColVals(colVal);
      }
      descResult.results.add(resultRow);
    }
    return descResult;
  }
Esempio n. 3
0
  /**
   * Performs three final parts of the analysis: 1. Checks type compatibility between all
   * expressions and their targets
   *
   * <p>2. Populates partitionKeyExprs with type-compatible expressions, in Hive partition-column
   * order, for all partition columns
   *
   * <p>3. Populates resultExprs_ with type-compatible expressions, in Hive column order, for all
   * expressions in the select-list. Unmentioned columns are assigned NULL literal expressions.
   *
   * <p>If necessary, adds casts to the expressions to make them compatible with the type of the
   * corresponding column.
   *
   * @throws AnalysisException If an expression is not compatible with its target column
   */
  private void prepareExpressions(
      List<Column> selectExprTargetColumns,
      List<Expr> selectListExprs,
      Table tbl,
      Analyzer analyzer)
      throws AnalysisException {
    // Temporary lists of partition key exprs and names in an arbitrary order.
    List<Expr> tmpPartitionKeyExprs = new ArrayList<Expr>();
    List<String> tmpPartitionKeyNames = new ArrayList<String>();

    int numClusteringCols = (tbl instanceof HBaseTable) ? 0 : tbl.getNumClusteringCols();

    // Check dynamic partition columns for type compatibility.
    for (int i = 0; i < selectListExprs.size(); ++i) {
      Column targetColumn = selectExprTargetColumns.get(i);
      Expr compatibleExpr = checkTypeCompatibility(targetColumn, selectListExprs.get(i));
      if (targetColumn.getPosition() < numClusteringCols) {
        // This is a dynamic clustering column
        tmpPartitionKeyExprs.add(compatibleExpr);
        tmpPartitionKeyNames.add(targetColumn.getName());
      }
      selectListExprs.set(i, compatibleExpr);
    }

    // Check static partition columns, dynamic entries in partitionKeyValues will already
    // be in selectExprTargetColumns and therefore are ignored in this loop
    if (partitionKeyValues_ != null) {
      for (PartitionKeyValue pkv : partitionKeyValues_) {
        if (pkv.isStatic()) {
          // tableColumns is guaranteed to exist after the earlier analysis checks
          Column tableColumn = table_.getColumn(pkv.getColName());
          Expr compatibleExpr = checkTypeCompatibility(tableColumn, pkv.getValue());
          tmpPartitionKeyExprs.add(compatibleExpr);
          tmpPartitionKeyNames.add(pkv.getColName());
        }
      }
    }

    // Reorder the partition key exprs and names to be consistent with the target table
    // declaration.  We need those exprs in the original order to create the corresponding
    // Hdfs folder structure correctly.
    for (Column c : table_.getColumns()) {
      for (int j = 0; j < tmpPartitionKeyNames.size(); ++j) {
        if (c.getName().equals(tmpPartitionKeyNames.get(j))) {
          partitionKeyExprs_.add(tmpPartitionKeyExprs.get(j));
          break;
        }
      }
    }

    Preconditions.checkState(partitionKeyExprs_.size() == numClusteringCols);
    // Make sure we have stats for partitionKeyExprs
    for (Expr expr : partitionKeyExprs_) {
      expr.analyze(analyzer);
    }

    // Finally, 'undo' the permutation so that the selectListExprs are in Hive column
    // order, and add NULL expressions to all missing columns.
    for (Column tblColumn : table_.getColumnsInHiveOrder()) {
      boolean matchFound = false;
      for (int i = 0; i < selectListExprs.size(); ++i) {
        if (selectExprTargetColumns.get(i).getName().equals(tblColumn.getName())) {
          resultExprs_.add(selectListExprs.get(i));
          matchFound = true;
          break;
        }
      }
      // If no match is found, either the column is a clustering column with a static
      // value, or it was unmentioned and therefore should have a NULL select-list
      // expression.
      if (!matchFound) {
        if (tblColumn.getPosition() >= numClusteringCols) {
          // Unmentioned non-clustering columns get NULL literals with the appropriate
          // target type because Parquet cannot handle NULL_TYPE (IMPALA-617).
          resultExprs_.add(NullLiteral.create(tblColumn.getType()));
        }
      }
    }
    // TODO: Check that HBase row-key columns are not NULL? See IMPALA-406
    if (needsGeneratedQueryStatement_) {
      // Build a query statement that returns NULL for every column
      List<SelectListItem> selectListItems = Lists.newArrayList();
      for (Expr e : resultExprs_) {
        selectListItems.add(new SelectListItem(e, null));
      }
      SelectList selectList = new SelectList(selectListItems);
      queryStmt_ = new SelectStmt(selectList, null, null, null, null, null, null);
      queryStmt_.analyze(analyzer);
    }
  }
Esempio n. 4
0
  @Override
  public void analyze(Analyzer analyzer) throws AnalysisException {
    if (isExplain_) analyzer.setIsExplain();
    try {
      if (withClause_ != null) withClause_.analyze(analyzer);
    } catch (AnalysisException e) {
      // Ignore AnalysisExceptions if tables are missing to ensure the maximum number
      // of missing tables can be collected before failing analyze().
      if (analyzer.getMissingTbls().isEmpty()) throw e;
    }

    List<Expr> selectListExprs = null;
    if (!needsGeneratedQueryStatement_) {
      try {
        // Use a child analyzer for the query stmt to properly scope WITH-clause
        // views and to ignore irrelevant ORDER BYs.
        Analyzer queryStmtAnalyzer = new Analyzer(analyzer);
        queryStmt_.analyze(queryStmtAnalyzer);

        if (analyzer.containsSubquery()) {
          Preconditions.checkState(queryStmt_ instanceof SelectStmt);
          StmtRewriter.rewriteStatement((SelectStmt) queryStmt_, queryStmtAnalyzer);
          queryStmt_ = queryStmt_.clone();
          queryStmtAnalyzer = new Analyzer(analyzer);
          queryStmt_.analyze(queryStmtAnalyzer);
        }

        selectListExprs = Expr.cloneList(queryStmt_.getBaseTblResultExprs());
      } catch (AnalysisException e) {
        if (analyzer.getMissingTbls().isEmpty()) throw e;
      }
    } else {
      selectListExprs = Lists.newArrayList();
    }

    // Set target table and perform table-type specific analysis and auth checking.
    // Also checks if the target table is missing.
    setTargetTable(analyzer);

    // Abort analysis if there are any missing tables beyond this point.
    if (!analyzer.getMissingTbls().isEmpty()) {
      throw new AnalysisException("Found missing tables. Aborting analysis.");
    }

    boolean isHBaseTable = (table_ instanceof HBaseTable);
    int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols();

    // Analysis of the INSERT statement from this point is basically the act of matching
    // the set of output columns (which come from a column permutation, perhaps
    // implicitly, and the PARTITION clause) to the set of input columns (which come from
    // the select-list and any statically-valued columns in the PARTITION clause).
    //
    // First, we compute the set of mentioned columns, and reject statements that refer to
    // non-existent columns, or duplicates (we must check both the column permutation, and
    // the set of partition keys). Next, we check that all partition columns are
    // mentioned. During this process we build the map from select-list expr index to
    // column in the targeted table.
    //
    // Then we check that the select-list contains exactly the right number of expressions
    // for all mentioned columns which are not statically-valued partition columns (which
    // get their expressions from partitionKeyValues).
    //
    // Finally, prepareExpressions analyzes the expressions themselves, and confirms that
    // they are type-compatible with the target columns. Where columns are not mentioned
    // (and by this point, we know that missing columns are not partition columns),
    // prepareExpressions assigns them a NULL literal expressions.

    // An null permutation clause is the same as listing all non-partition columns in
    // order.
    List<String> analysisColumnPermutation = columnPermutation_;
    if (analysisColumnPermutation == null) {
      analysisColumnPermutation = Lists.newArrayList();
      ArrayList<Column> tableColumns = table_.getColumns();
      for (int i = numClusteringCols; i < tableColumns.size(); ++i) {
        analysisColumnPermutation.add(tableColumns.get(i).getName());
      }
    }

    // selectExprTargetColumns maps from select expression index to a column in the target
    // table. It will eventually include all mentioned columns that aren't static-valued
    // partition columns.
    ArrayList<Column> selectExprTargetColumns = Lists.newArrayList();

    // Tracks the name of all columns encountered in either the permutation clause or the
    // partition clause to detect duplicates.
    Set<String> mentionedColumnNames = Sets.newHashSet();
    for (String columnName : analysisColumnPermutation) {
      Column column = table_.getColumn(columnName);
      if (column == null) {
        throw new AnalysisException("Unknown column '" + columnName + "' in column permutation");
      }

      if (!mentionedColumnNames.add(columnName)) {
        throw new AnalysisException("Duplicate column '" + columnName + "' in column permutation");
      }
      selectExprTargetColumns.add(column);
    }

    int numStaticPartitionExprs = 0;
    if (partitionKeyValues_ != null) {
      for (PartitionKeyValue pkv : partitionKeyValues_) {
        Column column = table_.getColumn(pkv.getColName());
        if (column == null) {
          throw new AnalysisException(
              "Unknown column '" + pkv.getColName() + "' in partition clause");
        }

        if (column.getPosition() >= numClusteringCols) {
          throw new AnalysisException(
              "Column '" + pkv.getColName() + "' is not a partition column");
        }

        if (!mentionedColumnNames.add(pkv.getColName())) {
          throw new AnalysisException(
              "Duplicate column '" + pkv.getColName() + "' in partition clause");
        }
        if (!pkv.isDynamic()) {
          numStaticPartitionExprs++;
        } else {
          selectExprTargetColumns.add(column);
        }
      }
    }

    // Checks that exactly all columns in the target table are assigned an expr.
    checkColumnCoverage(
        selectExprTargetColumns,
        mentionedColumnNames,
        selectListExprs.size(),
        numStaticPartitionExprs);

    // Make sure static partition key values only contain const exprs.
    if (partitionKeyValues_ != null) {
      for (PartitionKeyValue kv : partitionKeyValues_) {
        kv.analyze(analyzer);
      }
    }

    // Populate partitionKeyExprs from partitionKeyValues and selectExprTargetColumns
    prepareExpressions(selectExprTargetColumns, selectListExprs, table_, analyzer);
    // Analyze plan hints at the end to prefer reporting other error messages first
    // (e.g., the PARTITION clause is not applicable to unpartitioned and HBase tables).
    analyzePlanHints(analyzer);
  }