Java Table.getNumClusteringCols Examples

Programming Language: Java

Namespace/Package Name: com.cloudera.impala.catalog

Class/Type: Table

Method/Function: getNumClusteringCols

Examples at hotexamples.com: 4

Java Table.getNumClusteringCols - 4 examples found. These are the top rated real world Java examples of com.cloudera.impala.catalog.Table.getNumClusteringCols extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getColumn(4)

getNumClusteringCols(4)

getColumns(3)

getColumnsInHiveOrder(2)

getFullName(2)

getMetaStoreTable(2)

getDb(1)

getName(1)

Example #1

Show file

File: InsertStmt.java Project: AtScaleInc/Impala

  /**
   * Performs three final parts of the analysis: 1. Checks type compatibility between all
   * expressions and their targets
   *
   * <p>2. Populates partitionKeyExprs with type-compatible expressions, in Hive partition-column
   * order, for all partition columns
   *
   * <p>3. Populates resultExprs_ with type-compatible expressions, in Hive column order, for all
   * expressions in the select-list. Unmentioned columns are assigned NULL literal expressions.
   *
   * <p>If necessary, adds casts to the expressions to make them compatible with the type of the
   * corresponding column.
   *
   * @throws AnalysisException If an expression is not compatible with its target column
   */
  private void prepareExpressions(
      List<Column> selectExprTargetColumns,
      List<Expr> selectListExprs,
      Table tbl,
      Analyzer analyzer)
      throws AnalysisException {
    // Temporary lists of partition key exprs and names in an arbitrary order.
    List<Expr> tmpPartitionKeyExprs = new ArrayList<Expr>();
    List<String> tmpPartitionKeyNames = new ArrayList<String>();

    int numClusteringCols = (tbl instanceof HBaseTable) ? 0 : tbl.getNumClusteringCols();

    // Check dynamic partition columns for type compatibility.
    for (int i = 0; i < selectListExprs.size(); ++i) {
      Column targetColumn = selectExprTargetColumns.get(i);
      Expr compatibleExpr = checkTypeCompatibility(targetColumn, selectListExprs.get(i));
      if (targetColumn.getPosition() < numClusteringCols) {
        // This is a dynamic clustering column
        tmpPartitionKeyExprs.add(compatibleExpr);
        tmpPartitionKeyNames.add(targetColumn.getName());
      }
      selectListExprs.set(i, compatibleExpr);
    }

    // Check static partition columns, dynamic entries in partitionKeyValues will already
    // be in selectExprTargetColumns and therefore are ignored in this loop
    if (partitionKeyValues_ != null) {
      for (PartitionKeyValue pkv : partitionKeyValues_) {
        if (pkv.isStatic()) {
          // tableColumns is guaranteed to exist after the earlier analysis checks
          Column tableColumn = table_.getColumn(pkv.getColName());
          Expr compatibleExpr = checkTypeCompatibility(tableColumn, pkv.getValue());
          tmpPartitionKeyExprs.add(compatibleExpr);
          tmpPartitionKeyNames.add(pkv.getColName());
        }
      }
    }

    // Reorder the partition key exprs and names to be consistent with the target table
    // declaration.  We need those exprs in the original order to create the corresponding
    // Hdfs folder structure correctly.
    for (Column c : table_.getColumns()) {
      for (int j = 0; j < tmpPartitionKeyNames.size(); ++j) {
        if (c.getName().equals(tmpPartitionKeyNames.get(j))) {
          partitionKeyExprs_.add(tmpPartitionKeyExprs.get(j));
          break;
        }
      }
    }

    Preconditions.checkState(partitionKeyExprs_.size() == numClusteringCols);
    // Make sure we have stats for partitionKeyExprs
    for (Expr expr : partitionKeyExprs_) {
      expr.analyze(analyzer);
    }

    // Finally, 'undo' the permutation so that the selectListExprs are in Hive column
    // order, and add NULL expressions to all missing columns.
    for (Column tblColumn : table_.getColumnsInHiveOrder()) {
      boolean matchFound = false;
      for (int i = 0; i < selectListExprs.size(); ++i) {
        if (selectExprTargetColumns.get(i).getName().equals(tblColumn.getName())) {
          resultExprs_.add(selectListExprs.get(i));
          matchFound = true;
          break;
        }
      }
      // If no match is found, either the column is a clustering column with a static
      // value, or it was unmentioned and therefore should have a NULL select-list
      // expression.
      if (!matchFound) {
        if (tblColumn.getPosition() >= numClusteringCols) {
          // Unmentioned non-clustering columns get NULL literals with the appropriate
          // target type because Parquet cannot handle NULL_TYPE (IMPALA-617).
          resultExprs_.add(NullLiteral.create(tblColumn.getType()));
        }
      }
    }
    // TODO: Check that HBase row-key columns are not NULL? See IMPALA-406
    if (needsGeneratedQueryStatement_) {
      // Build a query statement that returns NULL for every column
      List<SelectListItem> selectListItems = Lists.newArrayList();
      for (Expr e : resultExprs_) {
        selectListItems.add(new SelectListItem(e, null));
      }
      SelectList selectList = new SelectList(selectListItems);
      queryStmt_ = new SelectStmt(selectList, null, null, null, null, null, null);
      queryStmt_.analyze(analyzer);
    }
  }

Example #2

Show file

File: InsertStmt.java Project: AtScaleInc/Impala

  /**
   * Sets table_ based on targetTableName_ and performs table-type specific analysis: - Partition
   * clause is invalid for unpartitioned Hdfs tables and HBase tables - Overwrite is invalid for
   * HBase tables - Check INSERT privileges as well as write access to Hdfs paths - Cannot insert
   * into a view Adds table_ to the analyzer's descriptor table if analysis succeeds.
   */
  private void setTargetTable(Analyzer analyzer) throws AnalysisException {
    // If the table has not yet been set, load it from the Catalog. This allows for
    // callers to set a table to analyze that may not actually be created in the Catalog.
    // One example use case is CREATE TABLE AS SELECT which must run analysis on the
    // INSERT before the table has actually been created.
    if (table_ == null) {
      if (!targetTableName_.isFullyQualified()) {
        targetTableName_ = new TableName(analyzer.getDefaultDb(), targetTableName_.getTbl());
      }
      table_ = analyzer.getTable(targetTableName_, Privilege.INSERT);
    } else {
      targetTableName_ = new TableName(table_.getDb().getName(), table_.getName());
      PrivilegeRequestBuilder pb = new PrivilegeRequestBuilder();
      analyzer.registerPrivReq(
          pb.onTable(table_.getDb().getName(), table_.getName())
              .allOf(Privilege.INSERT)
              .toRequest());
    }

    // We do not support inserting into views.
    if (table_ instanceof View) {
      throw new AnalysisException(
          String.format("Impala does not support inserting into views: %s", table_.getFullName()));
    }

    boolean isHBaseTable = (table_ instanceof HBaseTable);
    int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols();

    if (partitionKeyValues_ != null && numClusteringCols == 0) {
      if (isHBaseTable) {
        throw new AnalysisException(
            "PARTITION clause is not valid for INSERT into "
                + "HBase tables. '"
                + targetTableName_
                + "' is an HBase table");

      } else {
        // Unpartitioned table, but INSERT has PARTITION clause
        throw new AnalysisException(
            "PARTITION clause is only valid for INSERT into "
                + "partitioned table. '"
                + targetTableName_
                + "' is not partitioned");
      }
    }

    if (table_ instanceof HdfsTable) {
      HdfsTable hdfsTable = (HdfsTable) table_;
      if (!hdfsTable.hasWriteAccess()) {
        throw new AnalysisException(
            String.format(
                "Unable to INSERT into target table "
                    + "(%s) because Impala does not have WRITE access to at least one HDFS path"
                    + ": %s",
                targetTableName_, hdfsTable.getFirstLocationWithoutWriteAccess()));
      }

      for (int colIdx = 0; colIdx < numClusteringCols; ++colIdx) {
        Column col = hdfsTable.getColumns().get(colIdx);
        // Hive has a number of issues handling BOOLEAN partition columns (see HIVE-6590).
        // Instead of working around the Hive bugs, INSERT is disabled for BOOLEAN
        // partitions in Impala. Once the Hive JIRA is resolved, we can remove this
        // analysis check.
        if (col.getType() == Type.BOOLEAN) {
          throw new AnalysisException(
              String.format(
                  "INSERT into table with BOOLEAN " + "partition column (%s) is not supported: %s",
                  col.getName(), targetTableName_));
        }
      }
    }

    if (isHBaseTable && overwrite_) {
      throw new AnalysisException("HBase doesn't have a way to perform INSERT OVERWRITE");
    }

    // Add target table to descriptor table.
    analyzer.getDescTbl().addReferencedTable(table_);
  }

Example #3

Show file

File: InsertStmt.java Project: AtScaleInc/Impala

  /**
   * Checks that the column permutation + select list + static partition exprs + dynamic partition
   * exprs collectively cover exactly all columns in the target table (not more of fewer).
   */
  private void checkColumnCoverage(
      ArrayList<Column> selectExprTargetColumns,
      Set<String> mentionedColumnNames,
      int numSelectListExprs,
      int numStaticPartitionExprs)
      throws AnalysisException {
    boolean isHBaseTable = (table_ instanceof HBaseTable);
    int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols();
    // Check that all columns are mentioned by the permutation and partition clauses
    if (selectExprTargetColumns.size() + numStaticPartitionExprs != table_.getColumns().size()) {
      // We've already ruled out too many columns in the permutation and partition clauses
      // by checking that there are no duplicates and that every column mentioned actually
      // exists. So all columns aren't mentioned in the query. If the unmentioned columns
      // include partition columns, this is an error.
      List<String> missingColumnNames = Lists.newArrayList();
      for (Column column : table_.getColumns()) {
        if (!mentionedColumnNames.contains(column.getName())) {
          // HBase tables have a single row-key column which is always in position 0. It
          // must be mentioned, since it is invalid to set it to NULL (which would
          // otherwise happen by default).
          if (isHBaseTable && column.getPosition() == 0) {
            throw new AnalysisException(
                "Row-key column '"
                    + column.getName()
                    + "' must be explicitly mentioned in column permutation.");
          }
          if (column.getPosition() < numClusteringCols) {
            missingColumnNames.add(column.getName());
          }
        }
      }

      if (!missingColumnNames.isEmpty()) {
        throw new AnalysisException(
            "Not enough partition columns mentioned in query. Missing columns are: "
                + Joiner.on(", ").join(missingColumnNames));
      }
    }

    // Expect the selectListExpr to have entries for every target column
    if (selectExprTargetColumns.size() != numSelectListExprs) {
      String comparator = (selectExprTargetColumns.size() < numSelectListExprs) ? "fewer" : "more";
      String partitionClause =
          (partitionKeyValues_ == null) ? "returns" : "and PARTITION clause return";

      // If there was no column permutation provided, the error is that the select-list
      // has the wrong number of expressions compared to the number of columns in the
      // table. If there was a column permutation, then the mismatch is between the
      // select-list and the permutation itself.
      if (columnPermutation_ == null) {
        int totalColumnsMentioned = numSelectListExprs + numStaticPartitionExprs;
        throw new AnalysisException(
            String.format(
                "Target table '%s' has %s columns (%s) than the SELECT / VALUES clause %s"
                    + " (%s)",
                table_.getFullName(),
                comparator,
                table_.getColumns().size(),
                partitionClause,
                totalColumnsMentioned));
      } else {
        String partitionPrefix =
            (partitionKeyValues_ == null) ? "mentions" : "and PARTITION clause mention";
        throw new AnalysisException(
            String.format(
                "Column permutation %s %s columns (%s) than "
                    + "the SELECT / VALUES clause %s (%s)",
                partitionPrefix,
                comparator,
                selectExprTargetColumns.size(),
                partitionClause,
                numSelectListExprs));
      }
    }
  }

Example #4

Show file

File: InsertStmt.java Project: AtScaleInc/Impala

  @Override
  public void analyze(Analyzer analyzer) throws AnalysisException {
    if (isExplain_) analyzer.setIsExplain();
    try {
      if (withClause_ != null) withClause_.analyze(analyzer);
    } catch (AnalysisException e) {
      // Ignore AnalysisExceptions if tables are missing to ensure the maximum number
      // of missing tables can be collected before failing analyze().
      if (analyzer.getMissingTbls().isEmpty()) throw e;
    }

    List<Expr> selectListExprs = null;
    if (!needsGeneratedQueryStatement_) {
      try {
        // Use a child analyzer for the query stmt to properly scope WITH-clause
        // views and to ignore irrelevant ORDER BYs.
        Analyzer queryStmtAnalyzer = new Analyzer(analyzer);
        queryStmt_.analyze(queryStmtAnalyzer);

        if (analyzer.containsSubquery()) {
          Preconditions.checkState(queryStmt_ instanceof SelectStmt);
          StmtRewriter.rewriteStatement((SelectStmt) queryStmt_, queryStmtAnalyzer);
          queryStmt_ = queryStmt_.clone();
          queryStmtAnalyzer = new Analyzer(analyzer);
          queryStmt_.analyze(queryStmtAnalyzer);
        }

        selectListExprs = Expr.cloneList(queryStmt_.getBaseTblResultExprs());
      } catch (AnalysisException e) {
        if (analyzer.getMissingTbls().isEmpty()) throw e;
      }
    } else {
      selectListExprs = Lists.newArrayList();
    }

    // Set target table and perform table-type specific analysis and auth checking.
    // Also checks if the target table is missing.
    setTargetTable(analyzer);

    // Abort analysis if there are any missing tables beyond this point.
    if (!analyzer.getMissingTbls().isEmpty()) {
      throw new AnalysisException("Found missing tables. Aborting analysis.");
    }

    boolean isHBaseTable = (table_ instanceof HBaseTable);
    int numClusteringCols = isHBaseTable ? 0 : table_.getNumClusteringCols();

    // Analysis of the INSERT statement from this point is basically the act of matching
    // the set of output columns (which come from a column permutation, perhaps
    // implicitly, and the PARTITION clause) to the set of input columns (which come from
    // the select-list and any statically-valued columns in the PARTITION clause).
    //
    // First, we compute the set of mentioned columns, and reject statements that refer to
    // non-existent columns, or duplicates (we must check both the column permutation, and
    // the set of partition keys). Next, we check that all partition columns are
    // mentioned. During this process we build the map from select-list expr index to
    // column in the targeted table.
    //
    // Then we check that the select-list contains exactly the right number of expressions
    // for all mentioned columns which are not statically-valued partition columns (which
    // get their expressions from partitionKeyValues).
    //
    // Finally, prepareExpressions analyzes the expressions themselves, and confirms that
    // they are type-compatible with the target columns. Where columns are not mentioned
    // (and by this point, we know that missing columns are not partition columns),
    // prepareExpressions assigns them a NULL literal expressions.

    // An null permutation clause is the same as listing all non-partition columns in
    // order.
    List<String> analysisColumnPermutation = columnPermutation_;
    if (analysisColumnPermutation == null) {
      analysisColumnPermutation = Lists.newArrayList();
      ArrayList<Column> tableColumns = table_.getColumns();
      for (int i = numClusteringCols; i < tableColumns.size(); ++i) {
        analysisColumnPermutation.add(tableColumns.get(i).getName());
      }
    }

    // selectExprTargetColumns maps from select expression index to a column in the target
    // table. It will eventually include all mentioned columns that aren't static-valued
    // partition columns.
    ArrayList<Column> selectExprTargetColumns = Lists.newArrayList();

    // Tracks the name of all columns encountered in either the permutation clause or the
    // partition clause to detect duplicates.
    Set<String> mentionedColumnNames = Sets.newHashSet();
    for (String columnName : analysisColumnPermutation) {
      Column column = table_.getColumn(columnName);
      if (column == null) {
        throw new AnalysisException("Unknown column '" + columnName + "' in column permutation");
      }

      if (!mentionedColumnNames.add(columnName)) {
        throw new AnalysisException("Duplicate column '" + columnName + "' in column permutation");
      }
      selectExprTargetColumns.add(column);
    }

    int numStaticPartitionExprs = 0;
    if (partitionKeyValues_ != null) {
      for (PartitionKeyValue pkv : partitionKeyValues_) {
        Column column = table_.getColumn(pkv.getColName());
        if (column == null) {
          throw new AnalysisException(
              "Unknown column '" + pkv.getColName() + "' in partition clause");
        }

        if (column.getPosition() >= numClusteringCols) {
          throw new AnalysisException(
              "Column '" + pkv.getColName() + "' is not a partition column");
        }

        if (!mentionedColumnNames.add(pkv.getColName())) {
          throw new AnalysisException(
              "Duplicate column '" + pkv.getColName() + "' in partition clause");
        }
        if (!pkv.isDynamic()) {
          numStaticPartitionExprs++;
        } else {
          selectExprTargetColumns.add(column);
        }
      }
    }

    // Checks that exactly all columns in the target table are assigned an expr.
    checkColumnCoverage(
        selectExprTargetColumns,
        mentionedColumnNames,
        selectListExprs.size(),
        numStaticPartitionExprs);

    // Make sure static partition key values only contain const exprs.
    if (partitionKeyValues_ != null) {
      for (PartitionKeyValue kv : partitionKeyValues_) {
        kv.analyze(analyzer);
      }
    }

    // Populate partitionKeyExprs from partitionKeyValues and selectExprTargetColumns
    prepareExpressions(selectExprTargetColumns, selectListExprs, table_, analyzer);
    // Analyze plan hints at the end to prefer reporting other error messages first
    // (e.g., the PARTITION clause is not applicable to unpartitioned and HBase tables).
    analyzePlanHints(analyzer);
  }