static Map<String, String> createPtnKeyValueMap(Table table, Partition ptn) throws IOException {
    List<String> values = ptn.getValues();
    if (values.size() != table.getPartitionKeys().size()) {
      throw new IOException(
          "Partition values in partition inconsistent with table definition, table "
              + table.getTableName()
              + " has "
              + table.getPartitionKeys().size()
              + " partition keys, partition has "
              + values.size()
              + "partition values");
    }

    Map<String, String> ptnKeyValues = new HashMap<String, String>();

    int i = 0;
    for (FieldSchema schema : table.getPartitionKeys()) {
      // CONCERN : the way this mapping goes, the order *needs* to be
      // preserved for table.getPartitionKeys() and ptn.getValues()
      ptnKeyValues.put(schema.getName().toLowerCase(), values.get(i));
      i++;
    }

    return ptnKeyValues;
  }
  private Referenceable replaceTableQFName(
      HiveEventContext event,
      Table oldTable,
      Table newTable,
      final Referenceable tableEntity,
      final String oldTableQFName,
      final String newTableQFName)
      throws HiveException {
    tableEntity.set(AtlasClient.NAME, oldTable.getTableName().toLowerCase());
    tableEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, oldTableQFName);

    // Replace table entity with new name
    final Referenceable newEntity = new Referenceable(HiveDataTypes.HIVE_TABLE.getName());
    newEntity.set(AtlasClient.NAME, newTable.getTableName().toLowerCase());
    newEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, newTableQFName);

    ArrayList<String> alias_list = new ArrayList<>();
    alias_list.add(oldTable.getTableName().toLowerCase());
    newEntity.set(HiveDataModelGenerator.TABLE_ALIAS_LIST, alias_list);
    event.addMessage(
        new HookNotification.EntityPartialUpdateRequest(
            event.getUser(),
            HiveDataTypes.HIVE_TABLE.getName(),
            AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
            oldTableQFName,
            newEntity));

    return newEntity;
  }
  /**
   * @param db
   * @param table
   * @param filter
   * @param path
   * @param jobConf
   * @return
   */
  public static boolean setDataStorageLocation(
      String db, String table, String filter, String path, JobConf jobConf) {
    Preconditions.checkNotNull(table, "Table name must not be null");

    HiveMetaStoreClient client = null;
    List<String> locations = new ArrayList<String>();

    try {
      client = getHiveMetaStoreClient(jobConf);

      Table hiveTable = HCatUtil.getTable(client, db, table);
      hiveTable.setDataLocation(new URI(path));

      client.alter_table(db, table, hiveTable.getTTable());
    } catch (IOException e) {
      logError("Error occured when getting hiveconf", e);
    } catch (URISyntaxException e) {
      logError("Error occured when convert path to URI", e);
    } catch (MetaException e) {
      logError("Error occured when getting HiveMetaStoreClient", e);
    } catch (NoSuchObjectException e) {
      logError("Table doesn't exist in HCatalog: " + table, e);
    } catch (TException e) {
      logError("Error occured when getting Table", e);
    } finally {
      HCatUtil.closeHiveClientQuietly(client);
    }

    return true;
  }
 /**
  * Method to fetch table data
  *
  * @param table table name
  * @param database database
  * @return list of columns in comma seperated way
  * @throws Exception if any error occurs
  */
 private List<String> getTableData(String table, String database) throws Exception {
   HiveConf conf = new HiveConf();
   conf.addResource("hive-site.xml");
   ArrayList<String> results = new ArrayList<String>();
   ArrayList<String> temp = new ArrayList<String>();
   Hive hive = Hive.get(conf);
   org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table);
   FetchWork work;
   if (!tbl.getPartCols().isEmpty()) {
     List<Partition> partitions = hive.getPartitions(tbl);
     List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
     List<String> partLocs = new ArrayList<String>();
     for (Partition part : partitions) {
       partLocs.add(part.getLocation());
       partDesc.add(Utilities.getPartitionDesc(part));
     }
     work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl));
     work.setLimit(100);
   } else {
     work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl));
   }
   FetchTask task = new FetchTask();
   task.setWork(work);
   task.initialize(conf, null, null);
   task.fetch(temp);
   for (String str : temp) {
     results.add(str.replace("\t", ","));
   }
   return results;
 }
 @Override
 public LongWatermark getPreviousHighWatermark(Table table) {
   if (this.tableWatermarks.containsKey(table.getCompleteName())) {
     return this.tableWatermarks.get(table.getCompleteName());
   }
   return new LongWatermark(0);
 }
Exemple #6
0
 /**
  * Retrieve skewed column name for a table.
  *
  * @param alias table alias
  * @return
  */
 public List<String> getSkewedColumnNames(String alias) {
   List<String> skewedColNames = null;
   if (null != qbm && null != qbm.getAliasToTable() && qbm.getAliasToTable().size() > 0) {
     Table tbl = getMetaData().getTableForAlias(alias);
     skewedColNames = tbl.getSkewedColNames();
   }
   return skewedColNames;
 }
  public static void createHiveTable(String db, String table, List<FieldSchema> columns)
      throws Exception {
    Table tbl1 = new Table(db, table);
    tbl1.setFields(columns);

    Hive.get().createTable(tbl1);
    Assert.assertNotNull(Hive.get().getTable(db, table));
    log.info("Created table : " + table);
  }
  /** Clear out any side effects of running tests */
  public void clearTablesCreatedDuringTests() throws Exception {
    if (System.getenv(QTEST_LEAVE_FILES) != null) {
      return;
    }

    // Delete any tables other than the source tables
    // and any databases other than the default database.
    for (String dbName : db.getAllDatabases()) {
      SessionState.get().setCurrentDatabase(dbName);
      for (String tblName : db.getAllTables()) {
        if (!DEFAULT_DATABASE_NAME.equals(dbName)) {
          Table tblObj = db.getTable(tblName);
          // dropping index table can not be dropped directly. Dropping the base
          // table will automatically drop all its index table
          if (tblObj.isIndexTable()) {
            continue;
          }
          db.dropTable(dbName, tblName);
        } else {
          // this table is defined in srcTables, drop all indexes on it
          List<Index> indexes = db.getIndexes(dbName, tblName, (short) -1);
          if (indexes != null && indexes.size() > 0) {
            for (Index index : indexes) {
              db.dropIndex(dbName, tblName, index.getIndexName(), true, true);
            }
          }
        }
      }
      if (!DEFAULT_DATABASE_NAME.equals(dbName)) {
        // Drop cascade, may need to drop functions
        db.dropDatabase(dbName, true, true, true);
      }
    }

    // delete remaining directories for external tables (can affect stats for following tests)
    try {
      Path p = new Path(testWarehouse);
      FileSystem fileSystem = p.getFileSystem(conf);
      if (fileSystem.exists(p)) {
        for (FileStatus status : fileSystem.listStatus(p)) {
          if (status.isDir()) {
            fileSystem.delete(status.getPath(), true);
          }
        }
      }
    } catch (IllegalArgumentException e) {
      // ignore.. provides invalid url sometimes intentionally
    }
    SessionState.get().setCurrentDatabase(DEFAULT_DATABASE_NAME);

    List<String> roleNames = db.getAllRoleNames();
    for (String roleName : roleNames) {
      if (!"PUBLIC".equalsIgnoreCase(roleName) && !"ADMIN".equalsIgnoreCase(roleName)) {
        db.dropRole(roleName);
      }
    }
  }
 /**
  * return the partition columns from a table instance
  *
  * @param table the instance to extract partition columns from
  * @return HCatSchema instance which contains the partition columns
  * @throws IOException
  */
 public static HCatSchema getPartitionColumns(Table table) throws IOException {
   HCatSchema cols = new HCatSchema(new LinkedList<HCatFieldSchema>());
   if (table.getPartitionKeys().size() != 0) {
     for (FieldSchema fs : table.getPartitionKeys()) {
       cols.append(HCatSchemaUtils.getHCatFieldSchema(fs));
     }
   }
   return cols;
 }
  /**
   * @param db
   * @param table
   * @param filter
   * @param jobConf
   * @return A list of locations
   */
  public static List<String> getDataStorageLocation(
      String db, String table, String filter, JobConf jobConf) {
    Preconditions.checkNotNull(table, "Table name must not be null");

    HiveMetaStoreClient client = null;
    List<String> locations = new ArrayList<String>();

    try {
      client = getHiveMetaStoreClient(jobConf);
      Table hiveTable = HCatUtil.getTable(client, db, table);

      if (hiveTable.isPartitioned()) {
        List<Partition> parts = null;
        if (null != StringUtils.stripToNull(filter)) {
          parts = client.listPartitionsByFilter(db, table, filter, (short) -1);
        } else {
          parts = client.listPartitions(db, table, (short) -1);
        }

        if (parts.size() > 0) {
          // Return more than one partitions when filter is
          // something
          // like ds >= 1234
          for (Partition part : parts) {
            locations.addAll(getFilesInHivePartition(part, jobConf));
          }
        } else {
          logError(
              "Table "
                  + hiveTable.getTableName()
                  + " doesn't have the specified partition:"
                  + filter,
              null);
        }

      } else {
        locations.add(hiveTable.getTTable().getSd().getLocation());
      }
    } catch (IOException e) {
      logError("Error occured when getting hiveconf", e);
    } catch (MetaException e) {
      logError("Error occured when getting HiveMetaStoreClient", e);
    } catch (NoSuchObjectException e) {
      logError("Table doesn't exist in HCatalog: " + table, e);
    } catch (TException e) {
      logError("Error occured when getting Table", e);
    } finally {
      HCatUtil.closeHiveClientQuietly(client);
    }

    return locations;
  }
  public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException {
    HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols()));

    if (table.getPartitionKeys().size() != 0) {

      // add partition keys to table schema
      // NOTE : this assumes that we do not ever have ptn keys as columns
      // inside the table schema as well!
      for (FieldSchema fs : table.getPartitionKeys()) {
        tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs));
      }
    }
    return tableSchema;
  }
Exemple #12
0
  /**
   * Determines if one can insert into partition(s), or there's a conflict with archive. It can be
   * because partition is itself archived or it is to be created inside existing archive. The second
   * case is when partition doesn't exist yet, but it would be inside of an archive if it existed.
   * This one is quite tricky to check, we need to find at least one partition inside of the parent
   * directory. If it is archived and archiving level tells that the archival was done of directory
   * partition is in it means we cannot insert; otherwise we can. This method works both for full
   * specifications and partial ones - in second case it checks if any partition that could possibly
   * match such specification is inside archive.
   *
   * @param db - Hive object
   * @param tbl - table where partition is
   * @param partSpec - partition specification with possible nulls in case of dynamic partiton
   *     inserts
   * @return null if partition can be inserted, string with colliding archive name when it can't
   * @throws HiveException
   */
  public static String conflictingArchiveNameOrNull(
      Hive db, Table tbl, LinkedHashMap<String, String> partSpec) throws HiveException {

    List<FieldSchema> partKeys = tbl.getPartitionKeys();
    int partSpecLevel = 0;
    for (FieldSchema partKey : partKeys) {
      if (!partSpec.containsKey(partKey.getName())) {
        break;
      }
      partSpecLevel++;
    }

    if (partSpecLevel != partSpec.size()) {
      throw new HiveException("partspec " + partSpec + " is wrong for table " + tbl.getTableName());
    }

    Map<String, String> spec = new HashMap<String, String>(partSpec);
    List<String> reversedKeys = new LinkedList<String>();
    for (FieldSchema fs : tbl.getPartCols()) {
      if (spec.containsKey(fs.getName())) {
        reversedKeys.add(0, fs.getName());
      }
    }

    for (String rk : reversedKeys) {
      List<Partition> parts = db.getPartitions(tbl, spec, (short) 1);
      if (parts.size() != 0) {
        Partition p = parts.get(0);
        if (!isArchived(p)) {
          // if archiving was done at this or at upper level, every matched
          // partition would be archived, so it not being archived means
          // no archiving was done neither at this nor at upper level
          return null;
        } else if (getArchivingLevel(p) > spec.size()) {
          // if archiving was done at this or at upper level its level
          // would be lesser or equal to specification size
          // it is not, which means no archiving at this or upper level
          return null;
        } else {
          return getPartialName(p, getArchivingLevel(p));
        }
      }
      spec.remove(rk);
    }
    return null;
  }
  private void handleExternalTables(
      final HiveMetaStoreBridge dgiBridge,
      final HiveEventContext event,
      final LinkedHashMap<Type, Referenceable> tables)
      throws HiveException, MalformedURLException {
    List<Referenceable> entities = new ArrayList<>();
    final Entity hiveEntity = getEntityByType(event.getOutputs(), Type.TABLE);
    Table hiveTable = hiveEntity.getTable();
    // Refresh to get the correct location
    hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName());

    final String location = lower(hiveTable.getDataLocation().toString());
    if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) {
      LOG.info("Registering external table process {} ", event.getQueryStr());
      final ReadEntity dfsEntity = new ReadEntity();
      dfsEntity.setTyp(Type.DFS_DIR);
      dfsEntity.setName(location);

      SortedMap<Entity, Referenceable> inputs =
          new TreeMap<Entity, Referenceable>(entityComparator) {
            {
              put(dfsEntity, dgiBridge.fillHDFSDataSet(location));
            }
          };

      SortedMap<Entity, Referenceable> outputs =
          new TreeMap<Entity, Referenceable>(entityComparator) {
            {
              put(hiveEntity, tables.get(Type.TABLE));
            }
          };

      Referenceable processReferenceable =
          getProcessReferenceable(dgiBridge, event, inputs, outputs);
      String tableQualifiedName =
          dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), hiveTable);

      if (isCreateOp(event)) {
        processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName);
      }
      entities.addAll(tables.values());
      entities.add(processReferenceable);
      event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities));
    }
  }
  /*
   * Builds a TDescribeTableResult that contains the result of a DESCRIBE FORMATTED
   * <table> command. For the formatted describe output the goal is to be exactly the
   * same as what Hive (via HiveServer2) outputs, for compatibility reasons. To do this,
   * Hive's MetadataFormatUtils class is used to build the results.
   */
  private static TDescribeTableResult describeTableFormatted(Table table) {
    TDescribeTableResult descResult = new TDescribeTableResult();
    descResult.results = Lists.newArrayList();

    org.apache.hadoop.hive.metastore.api.Table msTable = table.getMetaStoreTable().deepCopy();
    // Fixup the metastore table so the output of DESCRIBE FORMATTED matches Hive's.
    // This is to distinguish between empty comments and no comments (value is null).
    for (FieldSchema fs : msTable.getSd().getCols())
      fs.setComment(table.getColumn(fs.getName()).getComment());
    for (FieldSchema fs : msTable.getPartitionKeys()) {
      fs.setComment(table.getColumn(fs.getName()).getComment());
    }

    // To avoid initializing any of the SerDe classes in the metastore table Thrift
    // struct, create the ql.metadata.Table object by calling the empty c'tor and
    // then calling setTTable().
    org.apache.hadoop.hive.ql.metadata.Table hiveTable =
        new org.apache.hadoop.hive.ql.metadata.Table();
    hiveTable.setTTable(msTable);
    StringBuilder sb = new StringBuilder();
    // First add all the columns (includes partition columns).
    sb.append(
        MetaDataFormatUtils.getAllColumnsInformation(
            msTable.getSd().getCols(), msTable.getPartitionKeys()));
    // Add the extended table metadata information.
    sb.append(MetaDataFormatUtils.getTableInformation(hiveTable));

    for (String line : sb.toString().split("\n")) {
      // To match Hive's HiveServer2 output, split each line into multiple column
      // values based on the field delimiter.
      String[] columns = line.split(MetaDataFormatUtils.FIELD_DELIM);
      TResultRow resultRow = new TResultRow();
      for (int i = 0; i < NUM_DESC_FORMATTED_RESULT_COLS; ++i) {
        TColumnValue colVal = new TColumnValue();
        colVal.setString_val(null);
        if (columns.length > i) {
          // Add the column value.
          colVal.setString_val(columns[i]);
        }
        resultRow.addToColVals(colVal);
      }
      descResult.results.add(resultRow);
    }
    return descResult;
  }
 /**
  * Validate alter table description.
  *
  * @throws SemanticException
  */
 public void validate() throws SemanticException {
   if (null != table) {
     /* Validate skewed information. */
     ValidationUtility.validateSkewedInformation(
         ParseUtils.validateColumnNameUniqueness(table.getCols()),
         this.getSkewedColNames(),
         this.getSkewedColValues());
   }
 }
Exemple #16
0
 /**
  * This block of code iterates over the topToTable map from ParseContext to determine if the query
  * has a scan over multiple tables.
  *
  * @return
  */
 boolean ifQueryHasMultipleTables() {
   Map<TableScanOperator, Table> topToTable = parseContext.getTopToTable();
   Iterator<Table> valuesItr = topToTable.values().iterator();
   Set<String> tableNameSet = new HashSet<String>();
   while (valuesItr.hasNext()) {
     Table table = valuesItr.next();
     tableNameSet.add(table.getTableName());
   }
   if (tableNameSet.size() > 1) {
     LOG.debug(
         "Query has more than one table "
             + "that is not supported with "
             + getName()
             + " optimization.");
     return true;
   }
   return false;
 }
Exemple #17
0
 /**
  * This code block iterates over indexes on the table and populates the indexToKeys map for all
  * the indexes that satisfy the rewrite criteria.
  *
  * @param indexTables
  * @return
  * @throws SemanticException
  */
 Map<Index, Set<String>> getIndexToKeysMap(List<Index> indexTables) throws SemanticException {
   Index index = null;
   Hive hiveInstance = hiveDb;
   Map<Index, Set<String>> indexToKeysMap = new LinkedHashMap<Index, Set<String>>();
   for (int idxCtr = 0; idxCtr < indexTables.size(); idxCtr++) {
     final Set<String> indexKeyNames = new LinkedHashSet<String>();
     index = indexTables.get(idxCtr);
     // Getting index key columns
     StorageDescriptor sd = index.getSd();
     List<FieldSchema> idxColList = sd.getCols();
     for (FieldSchema fieldSchema : idxColList) {
       indexKeyNames.add(fieldSchema.getName());
     }
     assert indexKeyNames.size() == 1;
     // Check that the index schema is as expected. This code block should
     // catch problems of this rewrite breaking when the AggregateIndexHandler
     // index is changed.
     List<String> idxTblColNames = new ArrayList<String>();
     try {
       Table idxTbl = hiveInstance.getTable(index.getDbName(), index.getIndexTableName());
       for (FieldSchema idxTblCol : idxTbl.getCols()) {
         idxTblColNames.add(idxTblCol.getName());
       }
     } catch (HiveException e) {
       LOG.error(
           "Got exception while locating index table, "
               + "skipping "
               + getName()
               + " optimization");
       LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
       throw new SemanticException(e.getMessage(), e);
     }
     assert (idxTblColNames.contains(IDX_BUCKET_COL));
     assert (idxTblColNames.contains(IDX_OFFSETS_ARRAY_COL));
     // we add all index tables which can be used for rewrite
     // and defer the decision of using a particular index for later
     // this is to allow choosing a index if a better mechanism is
     // designed later to chose a better rewrite
     indexToKeysMap.put(index, indexKeyNames);
   }
   return indexToKeysMap;
 }
    // Return true if the table is bucketed/sorted by the specified positions
    // The number of buckets, the sort order should also match along with the
    // columns which are bucketed/sorted
    private boolean checkTable(
        Table table,
        List<Integer> bucketPositionsDest,
        List<Integer> sortPositionsDest,
        List<Integer> sortOrderDest,
        int numBucketsDest) {
      // The bucketing and sorting positions should exactly match
      int numBuckets = table.getNumBuckets();
      if (numBucketsDest != numBuckets) {
        return false;
      }

      List<Integer> tableBucketPositions =
          getBucketPositions(table.getBucketCols(), table.getCols());
      ObjectPair<List<Integer>, List<Integer>> tableSortPositionsOrder =
          getSortPositionsOrder(table.getSortCols(), table.getCols());
      return bucketPositionsDest.equals(tableBucketPositions)
          && sortPositionsDest.equals(tableSortPositionsOrder.getFirst())
          && sortOrderDest.equals(tableSortPositionsOrder.getSecond());
    }
Exemple #19
0
 /**
  * Creates path where partitions matching prefix should lie in filesystem
  *
  * @param tbl table in which partition is
  * @return expected location of partitions matching prefix in filesystem
  */
 public Path createPath(Table tbl) throws HiveException {
   String prefixSubdir;
   try {
     prefixSubdir = Warehouse.makePartName(fields, values);
   } catch (MetaException e) {
     throw new HiveException("Unable to get partitions directories prefix", e);
   }
   Path tableDir = tbl.getDataLocation();
   if (tableDir == null) {
     throw new HiveException("Table has no location set");
   }
   return new Path(tableDir, prefixSubdir);
 }
 @BeforeTest
 @Override
 public void beforeTest() throws Exception {
   super.beforeTest();
   List<FieldSchema> factColumns = new ArrayList<>();
   factColumns.add(new FieldSchema("continent", "int", ""));
   factColumns.add(new FieldSchema("country", "int", ""));
   factColumns.add(new FieldSchema("region", "int", ""));
   factColumns.add(new FieldSchema("city", "int", ""));
   factColumns.add(new FieldSchema("count", "double", ""));
   factColumns.add(new FieldSchema("added", "double", ""));
   try {
     HiveConf hiveConf = new HiveConf();
     SessionState.start(hiveConf);
     Hive.get().dropTable("default.wikipedia");
     createHiveTable("default", "wikipedia", factColumns);
     Table tbl = CubeMetastoreClient.getInstance(hiveConf).getHiveTable("wikipedia");
     tbl.setProperty("druid.table.time.dimension", "time");
   } catch (HiveException e) {
     log.error("Exception while creating hive table", e);
   }
 }
  private void renameTable(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws Exception {
    // crappy, no easy of getting new name
    assert event.getInputs() != null && event.getInputs().size() == 1;
    assert event.getOutputs() != null && event.getOutputs().size() > 0;

    // Update entity if not exists
    ReadEntity oldEntity = event.getInputs().iterator().next();
    Table oldTable = oldEntity.getTable();

    for (WriteEntity writeEntity : event.getOutputs()) {
      if (writeEntity.getType() == Entity.Type.TABLE) {
        Table newTable = writeEntity.getTable();
        // Hive sends with both old and new table names in the outputs which is weird. So skipping
        // that with the below check
        if (!newTable.getDbName().equals(oldTable.getDbName())
            || !newTable.getTableName().equals(oldTable.getTableName())) {
          final String oldQualifiedName =
              dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), oldTable);
          final String newQualifiedName =
              dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), newTable);

          // Create/update old table entity - create entity with oldQFNme and old tableName if it
          // doesnt exist. If exists, will update
          // We always use the new entity while creating the table since some flags, attributes of
          // the table are not set in inputEntity and Hive.getTable(oldTableName) also fails since
          // the table doesnt exist in hive anymore
          final LinkedHashMap<Type, Referenceable> tables =
              createOrUpdateEntities(dgiBridge, event, writeEntity, true);
          Referenceable tableEntity = tables.get(Type.TABLE);

          // Reset regular column QF Name to old Name and create a new partial notification request
          // to replace old column QFName to newName to retain any existing traits
          replaceColumnQFName(
              event,
              (List<Referenceable>) tableEntity.get(HiveDataModelGenerator.COLUMNS),
              oldQualifiedName,
              newQualifiedName);

          // Reset partition key column QF Name to old Name and create a new partial notification
          // request to replace old column QFName to newName to retain any existing traits
          replaceColumnQFName(
              event,
              (List<Referenceable>) tableEntity.get(HiveDataModelGenerator.PART_COLS),
              oldQualifiedName,
              newQualifiedName);

          // Reset SD QF Name to old Name and create a new partial notification request to replace
          // old SD QFName to newName to retain any existing traits
          replaceSDQFName(event, tableEntity, oldQualifiedName, newQualifiedName);

          // Reset Table QF Name to old Name and create a new partial notification request to
          // replace old Table QFName to newName
          replaceTableQFName(
              event, oldTable, newTable, tableEntity, oldQualifiedName, newQualifiedName);
        }
      }
    }
  }
Exemple #22
0
    /**
     * Extract partial prefix specification from table and key-value map
     *
     * @param tbl table in which partition is
     * @param partSpec specification of partition
     * @return extracted specification
     */
    public static PartSpecInfo create(Table tbl, Map<String, String> partSpec)
        throws HiveException {
      // we have to check if we receive prefix of partition keys so in table
      // scheme like table/ds=2011-01-02/hr=13/
      // ARCHIVE PARTITION (ds='2011-01-02') will work and
      // ARCHIVE PARTITION(hr='13') won't
      List<FieldSchema> prefixFields = new ArrayList<FieldSchema>();
      List<String> prefixValues = new ArrayList<String>();
      List<FieldSchema> partCols = tbl.getPartCols();
      Iterator<String> itrPsKeys = partSpec.keySet().iterator();
      for (FieldSchema fs : partCols) {
        if (!itrPsKeys.hasNext()) {
          break;
        }
        if (!itrPsKeys.next().toLowerCase().equals(fs.getName().toLowerCase())) {
          throw new HiveException("Invalid partition specifiation: " + partSpec);
        }
        prefixFields.add(fs);
        prefixValues.add(partSpec.get(fs.getName()));
      }

      return new PartSpecInfo(prefixFields, prefixValues);
    }
  private void renameColumn(HiveMetaStoreBridge dgiBridge, HiveEventContext event)
      throws Exception {
    assert event.getInputs() != null && event.getInputs().size() == 1;
    assert event.getOutputs() != null && event.getOutputs().size() > 0;

    Table oldTable = event.getInputs().iterator().next().getTable();
    List<FieldSchema> oldColList = oldTable.getAllCols();
    Table outputTbl = event.getOutputs().iterator().next().getTable();
    outputTbl = dgiBridge.hiveClient.getTable(outputTbl.getDbName(), outputTbl.getTableName());
    List<FieldSchema> newColList = outputTbl.getAllCols();
    assert oldColList.size() == newColList.size();

    Pair<String, String> changedColNamePair = findChangedColNames(oldColList, newColList);
    String oldColName = changedColNamePair.getLeft();
    String newColName = changedColNamePair.getRight();
    for (WriteEntity writeEntity : event.getOutputs()) {
      if (writeEntity.getType() == Type.TABLE) {
        Table newTable = writeEntity.getTable();
        createOrUpdateEntities(dgiBridge, event, writeEntity, true, oldTable);
        final String newQualifiedTableName =
            dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(), newTable);
        String oldColumnQFName =
            HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, oldColName);
        String newColumnQFName =
            HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, newColName);
        Referenceable newColEntity = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName());
        newColEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, newColumnQFName);

        event.addMessage(
            new HookNotification.EntityPartialUpdateRequest(
                event.getUser(),
                HiveDataTypes.HIVE_COLUMN.getName(),
                AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
                oldColumnQFName,
                newColEntity));
      }
    }
    handleEventOutputs(dgiBridge, event, Type.TABLE);
  }
  /**
   * Validate partition schema, checks if the column types match between the partition and the
   * existing table schema. Returns the list of columns present in the partition but not in the
   * table.
   *
   * @param table the table
   * @param partitionSchema the partition schema
   * @return the list of newly added fields
   * @throws IOException Signals that an I/O exception has occurred.
   */
  public static List<FieldSchema> validatePartitionSchema(Table table, HCatSchema partitionSchema)
      throws IOException {
    Map<String, FieldSchema> partitionKeyMap = new HashMap<String, FieldSchema>();

    for (FieldSchema field : table.getPartitionKeys()) {
      partitionKeyMap.put(field.getName().toLowerCase(), field);
    }

    List<FieldSchema> tableCols = table.getCols();
    List<FieldSchema> newFields = new ArrayList<FieldSchema>();

    for (int i = 0; i < partitionSchema.getFields().size(); i++) {

      FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema.getFields().get(i));

      FieldSchema tableField;
      if (i < tableCols.size()) {
        tableField = tableCols.get(i);

        if (!tableField.getName().equalsIgnoreCase(field.getName())) {
          throw new HCatException(
              ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH,
              "Expected column <"
                  + tableField.getName()
                  + "> at position "
                  + (i + 1)
                  + ", found column <"
                  + field.getName()
                  + ">");
        }
      } else {
        tableField = partitionKeyMap.get(field.getName().toLowerCase());

        if (tableField != null) {
          throw new HCatException(
              ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <" + field.getName() + ">");
        }
      }

      if (tableField == null) {
        // field present in partition but not in table
        newFields.add(field);
      } else {
        // field present in both. validate type has not changed
        TypeInfo partitionType = TypeInfoUtils.getTypeInfoFromTypeString(field.getType());
        TypeInfo tableType = TypeInfoUtils.getTypeInfoFromTypeString(tableField.getType());

        if (!partitionType.equals(tableType)) {
          throw new HCatException(
              ErrorType.ERROR_SCHEMA_TYPE_MISMATCH,
              "Column <"
                  + field.getName()
                  + ">, expected <"
                  + tableType.getTypeName()
                  + ">, got <"
                  + partitionType.getTypeName()
                  + ">");
        }
      }
    }

    return newFields;
  }
 /** called from {@link SemanticAnalyzer#genTablePlan} */
 public static void genTableName(RowResolver rwsch, Table tab) {
   rwsch.tableOriginalName = tab.getTableName();
   System.out.println("======Gen Table Name===== " + rwsch.tableOriginalName);
 }
  private LinkedHashMap<Type, Referenceable> createOrUpdateEntities(
      HiveMetaStoreBridge dgiBridge,
      HiveEventContext event,
      Entity entity,
      boolean skipTempTables,
      Table existTable)
      throws Exception {
    Database db = null;
    Table table = null;
    Partition partition = null;
    LinkedHashMap<Type, Referenceable> result = new LinkedHashMap<>();
    List<Referenceable> entities = new ArrayList<>();

    switch (entity.getType()) {
      case DATABASE:
        db = entity.getDatabase();
        break;

      case TABLE:
        table = entity.getTable();
        db = dgiBridge.hiveClient.getDatabase(table.getDbName());
        break;

      case PARTITION:
        partition = entity.getPartition();
        table = partition.getTable();
        db = dgiBridge.hiveClient.getDatabase(table.getDbName());
        break;
    }

    db = dgiBridge.hiveClient.getDatabase(db.getName());
    Referenceable dbEntity = dgiBridge.createDBInstance(db);

    entities.add(dbEntity);
    result.put(Type.DATABASE, dbEntity);

    Referenceable tableEntity = null;

    if (table != null) {
      if (existTable != null) {
        table = existTable;
      } else {
        table = dgiBridge.hiveClient.getTable(table.getDbName(), table.getTableName());
      }
      // If its an external table, even though the temp table skip flag is on,
      // we create the table since we need the HDFS path to temp table lineage.
      if (skipTempTables
          && table.isTemporary()
          && !TableType.EXTERNAL_TABLE.equals(table.getTableType())) {
        LOG.debug(
            "Skipping temporary table registration {} since it is not an external table {} ",
            table.getTableName(),
            table.getTableType().name());

      } else {
        tableEntity = dgiBridge.createTableInstance(dbEntity, table);
        entities.add(tableEntity);
        result.put(Type.TABLE, tableEntity);
      }
    }

    event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities));
    return result;
  }
Exemple #27
0
  /**
   * We traverse the current operator tree to check for conditions in which the optimization cannot
   * be applied.
   *
   * <p>At the end, we check if all conditions have passed for rewrite. If yes, we determine if the
   * the index is usable for rewrite. Else, we log the condition which did not meet the rewrite
   * criterion.
   *
   * @return
   * @throws SemanticException
   */
  boolean shouldApplyOptimization() throws SemanticException {
    boolean canApply = false;
    if (ifQueryHasMultipleTables()) {
      // We do not apply this optimization for this case as of now.
      return false;
    } else {
      /*
       * This code iterates over each TableScanOperator from the topOps map from ParseContext.
       * For each operator tree originating from this top TableScanOperator, we determine
       * if the optimization can be applied. If yes, we add the name of the top table to
       * the tsOpToProcess to apply rewrite later on.
       * */
      Map<TableScanOperator, Table> topToTable = parseContext.getTopToTable();
      Iterator<TableScanOperator> topOpItr = topToTable.keySet().iterator();
      while (topOpItr.hasNext()) {

        TableScanOperator topOp = topOpItr.next();
        Table table = topToTable.get(topOp);
        baseTableName = table.getTableName();
        Map<Table, List<Index>> indexes = getIndexesForRewrite();
        if (indexes == null) {
          LOG.debug(
              "Error getting valid indexes for rewrite, "
                  + "skipping "
                  + getName()
                  + " optimization");
          return false;
        }

        if (indexes.size() == 0) {
          LOG.debug(
              "No Valid Index Found to apply Rewrite, "
                  + "skipping "
                  + getName()
                  + " optimization");
          return false;
        } else {
          // we need to check if the base table has confirmed or unknown partitions
          if (parseContext.getOpToPartList() != null && parseContext.getOpToPartList().size() > 0) {
            // if base table has partitions, we need to check if index is built for
            // all partitions. If not, then we do not apply the optimization
            if (checkIfIndexBuiltOnAllTablePartitions(topOp, indexes)) {
              // check if rewrite can be applied for operator tree
              // if partitions condition returns true
              canApply = checkIfRewriteCanBeApplied(topOp, table, indexes);
            } else {
              LOG.debug(
                  "Index is not built for all table partitions, "
                      + "skipping "
                      + getName()
                      + " optimization");
              return false;
            }
          } else {
            // check if rewrite can be applied for operator tree
            // if there are no partitions on base table
            canApply = checkIfRewriteCanBeApplied(topOp, table, indexes);
          }
        }
      }
    }
    return canApply;
  }
Exemple #28
0
    /**
     * Returns the skewed values in all the tables which are going to be scanned. If the join is on
     * columns c1, c2 and c3 on tables T1 and T2, T1 is skewed on c1 and c4 with the skew values
     * ((1,2),(3,4)), whereas T2 is skewed on c1, c2 with skew values ((5,6),(7,8)), the resulting
     * map would be: <(c1) -> ((1), (3)), (c1,c2) -> ((5,6),(7,8))>
     *
     * @param op The join operator being optimized
     * @param tableScanOpsForJoin table scan operators which are parents of the join operator
     * @return map<join keys intersection skewedkeys, list of skewed values>.
     */
    private Map<List<ExprNodeDesc>, List<List<String>>> getSkewedValues(
        Operator<? extends OperatorDesc> op, List<TableScanOperator> tableScanOpsForJoin) {

      Map<List<ExprNodeDesc>, List<List<String>>> skewDataReturn =
          new HashMap<List<ExprNodeDesc>, List<List<String>>>();

      Map<List<ExprNodeDescEqualityWrapper>, List<List<String>>> skewData =
          new HashMap<List<ExprNodeDescEqualityWrapper>, List<List<String>>>();

      // The join keys are available in the reduceSinkOperators before join
      for (Operator<? extends OperatorDesc> reduceSinkOp : op.getParentOperators()) {
        ReduceSinkDesc rsDesc = ((ReduceSinkOperator) reduceSinkOp).getConf();

        if (rsDesc.getKeyCols() != null) {
          Table table = null;
          // Find the skew information corresponding to the table
          List<String> skewedColumns = null;
          List<List<String>> skewedValueList = null;

          // The join columns which are also skewed
          List<ExprNodeDescEqualityWrapper> joinKeysSkewedCols =
              new ArrayList<ExprNodeDescEqualityWrapper>();

          // skewed Keys which intersect with join keys
          List<Integer> positionSkewedKeys = new ArrayList<Integer>();

          // Update the joinKeys appropriately.
          for (ExprNodeDesc keyColDesc : rsDesc.getKeyCols()) {
            ExprNodeColumnDesc keyCol = null;

            // If the key column is not a column, then dont apply this optimization.
            // This will be fixed as part of https://issues.apache.org/jira/browse/HIVE-3445
            // for type conversion UDFs.
            if (keyColDesc instanceof ExprNodeColumnDesc) {
              keyCol = (ExprNodeColumnDesc) keyColDesc;
              if (table == null) {
                table = getTable(parseContext, reduceSinkOp, tableScanOpsForJoin);
                skewedColumns = table == null ? null : table.getSkewedColNames();
                // No skew on the table to take care of
                if ((skewedColumns == null) || (skewedColumns.isEmpty())) {
                  continue;
                }

                skewedValueList = table == null ? null : table.getSkewedColValues();
              }
              int pos = skewedColumns.indexOf(keyCol.getColumn());
              if ((pos >= 0) && (!positionSkewedKeys.contains(pos))) {
                positionSkewedKeys.add(pos);
                ExprNodeColumnDesc keyColClone = (ExprNodeColumnDesc) keyCol.clone();
                keyColClone.setTabAlias(null);
                joinKeysSkewedCols.add(new ExprNodeDescEqualityWrapper(keyColClone));
              }
            }
          }

          // If the skew keys match the join keys, then add it to the list
          if ((skewedColumns != null) && (!skewedColumns.isEmpty())) {
            if (!joinKeysSkewedCols.isEmpty()) {
              // If the join keys matches the skewed keys, use the table skewed keys
              List<List<String>> skewedJoinValues;
              if (skewedColumns.size() == positionSkewedKeys.size()) {
                skewedJoinValues = skewedValueList;
              } else {
                skewedJoinValues = getSkewedJoinValues(skewedValueList, positionSkewedKeys);
              }

              List<List<String>> oldSkewedJoinValues = skewData.get(joinKeysSkewedCols);
              if (oldSkewedJoinValues == null) {
                oldSkewedJoinValues = new ArrayList<List<String>>();
              }
              for (List<String> skewValue : skewedJoinValues) {
                if (!oldSkewedJoinValues.contains(skewValue)) {
                  oldSkewedJoinValues.add(skewValue);
                }
              }

              skewData.put(joinKeysSkewedCols, oldSkewedJoinValues);
            }
          }
        }
      }

      // convert skewData to contain ExprNodeDesc in the keys
      for (Map.Entry<List<ExprNodeDescEqualityWrapper>, List<List<String>>> mapEntry :
          skewData.entrySet()) {
        List<ExprNodeDesc> skewedKeyJoinCols = new ArrayList<ExprNodeDesc>();
        for (ExprNodeDescEqualityWrapper key : mapEntry.getKey()) {
          skewedKeyJoinCols.add(key.getExprNodeDesc());
        }
        skewDataReturn.put(skewedKeyJoinCols, mapEntry.getValue());
      }

      return skewDataReturn;
    }
Exemple #29
0
  private List<HiveLockObj> getLockObjects(
      QueryPlan plan, Database db, Table t, Partition p, HiveLockMode mode) throws LockException {
    List<HiveLockObj> locks = new LinkedList<HiveLockObj>();

    HiveLockObject.HiveLockObjectData lockData =
        new HiveLockObject.HiveLockObjectData(
            plan.getQueryId(),
            String.valueOf(System.currentTimeMillis()),
            "IMPLICIT",
            plan.getQueryStr());

    if (db != null) {
      locks.add(new HiveLockObj(new HiveLockObject(db.getName(), lockData), mode));
      return locks;
    }

    if (t != null) {
      locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode));
      mode = HiveLockMode.SHARED;
      locks.add(new HiveLockObj(new HiveLockObject(t.getDbName(), lockData), mode));
      return locks;
    }

    if (p != null) {
      if (!(p instanceof DummyPartition)) {
        locks.add(new HiveLockObj(new HiveLockObject(p, lockData), mode));
      }

      // All the parents are locked in shared mode
      mode = HiveLockMode.SHARED;

      // For dummy partitions, only partition name is needed
      String name = p.getName();

      if (p instanceof DummyPartition) {
        name = p.getName().split("@")[2];
      }

      String partialName = "";
      String[] partns = name.split("/");
      int len = p instanceof DummyPartition ? partns.length : partns.length - 1;
      Map<String, String> partialSpec = new LinkedHashMap<String, String>();
      for (int idx = 0; idx < len; idx++) {
        String partn = partns[idx];
        partialName += partn;
        String[] nameValue = partn.split("=");
        assert (nameValue.length == 2);
        partialSpec.put(nameValue[0], nameValue[1]);
        try {
          locks.add(
              new HiveLockObj(
                  new HiveLockObject(
                      new DummyPartition(
                          p.getTable(),
                          p.getTable().getDbName()
                              + "/"
                              + MetaStoreUtils.encodeTableName(p.getTable().getTableName())
                              + "/"
                              + partialName,
                          partialSpec),
                      lockData),
                  mode));
          partialName += "/";
        } catch (HiveException e) {
          throw new LockException(e.getMessage());
        }
      }

      locks.add(new HiveLockObj(new HiveLockObject(p.getTable(), lockData), mode));
      locks.add(new HiveLockObj(new HiveLockObject(p.getTable().getDbName(), lockData), mode));
    }
    return locks;
  }
  @Override
  protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive, DDLWork work)
      throws HiveException {
    // DB opereations, none of them are enforced by Hive right now.

    ShowDatabasesDesc showDatabases = work.getShowDatabasesDesc();
    if (showDatabases != null) {
      authorize(
          HiveOperation.SHOWDATABASES.getInputRequiredPrivileges(),
          HiveOperation.SHOWDATABASES.getOutputRequiredPrivileges());
    }

    DropDatabaseDesc dropDb = work.getDropDatabaseDesc();
    if (dropDb != null) {
      Database db = cntxt.getHive().getDatabase(dropDb.getDatabaseName());
      authorize(db, Privilege.DROP);
    }

    DescDatabaseDesc descDb = work.getDescDatabaseDesc();
    if (descDb != null) {
      Database db = cntxt.getHive().getDatabase(descDb.getDatabaseName());
      authorize(db, Privilege.SELECT);
    }

    SwitchDatabaseDesc switchDb = work.getSwitchDatabaseDesc();
    if (switchDb != null) {
      Database db = cntxt.getHive().getDatabase(switchDb.getDatabaseName());
      authorize(db, Privilege.SELECT);
    }

    ShowTablesDesc showTables = work.getShowTblsDesc();
    if (showTables != null) {
      String dbName =
          showTables.getDbName() == null
              ? SessionState.get().getCurrentDatabase()
              : showTables.getDbName();
      authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT);
    }

    ShowTableStatusDesc showTableStatus = work.getShowTblStatusDesc();
    if (showTableStatus != null) {
      String dbName =
          showTableStatus.getDbName() == null
              ? SessionState.get().getCurrentDatabase()
              : showTableStatus.getDbName();
      authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT);
    }

    // TODO: add alter database support in HCat

    // Table operations.

    DropTableDesc dropTable = work.getDropTblDesc();
    if (dropTable != null) {
      if (dropTable.getPartSpecs() == null) {
        // drop table is already enforced by Hive. We only check for table level location even if
        // the
        // table is partitioned.
      } else {
        // this is actually a ALTER TABLE DROP PARITITION statement
        for (DropTableDesc.PartSpec partSpec : dropTable.getPartSpecs()) {
          // partitions are not added as write entries in drop partitions in Hive
          Table table =
              hive.getTable(SessionState.get().getCurrentDatabase(), dropTable.getTableName());
          List<Partition> partitions = null;
          try {
            partitions = hive.getPartitionsByFilter(table, partSpec.getPartSpec().getExprString());
          } catch (Exception e) {
            throw new HiveException(e);
          }

          for (Partition part : partitions) {
            authorize(part, Privilege.DROP);
          }
        }
      }
    }

    AlterTableDesc alterTable = work.getAlterTblDesc();
    if (alterTable != null) {
      Table table =
          hive.getTable(SessionState.get().getCurrentDatabase(), alterTable.getOldName(), false);

      Partition part = null;
      if (alterTable.getPartSpec() != null) {
        part = hive.getPartition(table, alterTable.getPartSpec(), false);
      }

      String newLocation = alterTable.getNewLocation();

      /* Hcat requires ALTER_DATA privileges for ALTER TABLE LOCATION statements
       * for the old table/partition location and the new location.
       */
      if (alterTable.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) {
        if (part != null) {
          authorize(part, Privilege.ALTER_DATA); // authorize for the old
          // location, and new location
          part.setLocation(newLocation);
          authorize(part, Privilege.ALTER_DATA);
        } else {
          authorize(table, Privilege.ALTER_DATA); // authorize for the old
          // location, and new location
          table.getTTable().getSd().setLocation(newLocation);
          authorize(table, Privilege.ALTER_DATA);
        }
      }
      // other alter operations are already supported by Hive
    }

    // we should be careful when authorizing table based on just the
    // table name. If columns have separate authorization domain, it
    // must be honored
    DescTableDesc descTable = work.getDescTblDesc();
    if (descTable != null) {
      String tableName = extractTableName(descTable.getTableName());
      authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT);
    }

    ShowPartitionsDesc showParts = work.getShowPartsDesc();
    if (showParts != null) {
      String tableName = extractTableName(showParts.getTabName());
      authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT);
    }
  }