public static void dropAllStoreObjects(RawStore store) throws MetaException, InvalidObjectException, InvalidInputException { try { Deadline.registerIfNot(100000); List<Function> funcs = store.getAllFunctions(); for (Function func : funcs) { store.dropFunction(func.getDbName(), func.getFunctionName()); } List<String> dbs = store.getAllDatabases(); for (int i = 0; i < dbs.size(); i++) { String db = dbs.get(i); List<String> tbls = store.getAllTables(db); for (String tbl : tbls) { Deadline.startTimer("getPartition"); List<Partition> parts = store.getPartitions(db, tbl, 100); for (Partition part : parts) { store.dropPartition(db, tbl, part.getValues()); } store.dropTable(db, tbl); } store.dropDatabase(db); } List<String> roles = store.listRoleNames(); for (String role : roles) { store.removeRole(role); } } catch (NoSuchObjectException e) { } }
@Override public void renamePartition( String db, String tableName, List<String> partitionValues, Partition partition) throws HiveMetastoreException { HiveObjectSpec tableSpec = new HiveObjectSpec(db, tableName); Table table = specToTable.get(tableSpec); String renameFromPartitionName = getPartitionName(table, partitionValues); String renameToPartitionName = getPartitionName(table, partition.getValues()); HiveObjectSpec renameFromSpec = new HiveObjectSpec(db, tableName, renameFromPartitionName); HiveObjectSpec renameToSpec = new HiveObjectSpec(db, tableName, renameToPartitionName); if (specToPartition.containsKey(renameToSpec)) { throw new HiveMetastoreException("Partition already exists: " + renameToSpec); } if (!specToPartition.containsKey(renameFromSpec)) { throw new HiveMetastoreException("Partition doesn't exist: " + renameFromPartitionName); } Partition removed = specToPartition.remove(renameFromSpec); removed.setValues(new ArrayList<>(partition.getValues())); specToPartition.put(renameToSpec, removed); }
protected static List<String> getFilesInHivePartition(Partition part, JobConf jobConf) { List<String> result = newArrayList(); String ignoreFileRegex = jobConf.get(HCatTap.IGNORE_FILE_IN_PARTITION_REGEX, ""); Pattern ignoreFilePattern = Pattern.compile(ignoreFileRegex); try { Path partitionDirPath = new Path(part.getSd().getLocation()); FileStatus[] partitionContent = partitionDirPath.getFileSystem(jobConf).listStatus(partitionDirPath); for (FileStatus currStatus : partitionContent) { if (!currStatus.isDir()) { if (!ignoreFilePattern.matcher(currStatus.getPath().getName()).matches()) { result.add(currStatus.getPath().toUri().getPath()); } else { LOG.debug( "Ignoring path {} since matches ignore regex {}", currStatus.getPath().toUri().getPath(), ignoreFileRegex); } } } } catch (IOException e) { logError("Unable to read the content of partition '" + part.getSd().getLocation() + "'", e); } return result; }
private void addPartition( String databaseName, String tableName, CatalogProtos.PartitionDescProto partitionDescProto) { HiveCatalogStoreClientPool.HiveCatalogStoreClient client = null; try { client = clientPool.getClient(); Partition partition = new Partition(); partition.setDbName(databaseName); partition.setTableName(tableName); List<String> values = Lists.newArrayList(); for (CatalogProtos.PartitionKeyProto keyProto : partitionDescProto.getPartitionKeysList()) { values.add(keyProto.getPartitionValue()); } partition.setValues(values); Table table = client.getHiveClient().getTable(databaseName, tableName); StorageDescriptor sd = table.getSd(); sd.setLocation(partitionDescProto.getPath()); partition.setSd(sd); client.getHiveClient().add_partition(partition); } catch (Exception e) { throw new TajoInternalError(e); } finally { if (client != null) { client.release(); } } }
@Override public Partition exchangePartition( Map<String, String> partitionSpecs, String sourceDb, String sourceTable, String destDb, String destinationTableName) throws HiveMetastoreException { final String partitionName = partitionSpecToName(partitionSpecs); final HiveObjectSpec exchangeFromPartitionSpec = new HiveObjectSpec(sourceDb, sourceTable, partitionName); final HiveObjectSpec exchangeToPartitionSpec = new HiveObjectSpec(destDb, destinationTableName, partitionName); if (!existsPartition(sourceDb, sourceTable, partitionName)) { throw new HiveMetastoreException( String.format("Unknown source partition %s.%s/%s", sourceDb, sourceTable, partitionName)); } if (!existsTable(destDb, destinationTableName)) { throw new HiveMetastoreException( String.format("Unknown destination table %s.%s", destDb, destinationTableName)); } Partition partition = specToPartition.remove(exchangeFromPartitionSpec); partition.setDbName(destDb); partition.setTableName(destinationTableName); specToPartition.put(exchangeToPartitionSpec, partition); return partition; }
private Map<HivePartitionName, Optional<Partition>> loadPartitionsByNames( Iterable<? extends HivePartitionName> partitionNames) throws Exception { requireNonNull(partitionNames, "partitionNames is null"); checkArgument(!Iterables.isEmpty(partitionNames), "partitionNames is empty"); HivePartitionName firstPartition = Iterables.get(partitionNames, 0); HiveTableName hiveTableName = firstPartition.getHiveTableName(); String databaseName = hiveTableName.getDatabaseName(); String tableName = hiveTableName.getTableName(); List<String> partitionsToFetch = new ArrayList<>(); for (HivePartitionName partitionName : partitionNames) { checkArgument( partitionName.getHiveTableName().equals(hiveTableName), "Expected table name %s but got %s", hiveTableName, partitionName.getHiveTableName()); partitionsToFetch.add(partitionName.getPartitionName()); } List<String> partitionColumnNames = ImmutableList.copyOf( Warehouse.makeSpecFromName(firstPartition.getPartitionName()).keySet()); try { return retry() .stopOn(NoSuchObjectException.class) .stopOnIllegalExceptions() .run( "getPartitionsByNames", stats .getGetPartitionsByNames() .wrap( () -> { try (HiveMetastoreClient client = clientProvider.createMetastoreClient()) { ImmutableMap.Builder<HivePartitionName, Optional<Partition>> partitions = ImmutableMap.builder(); for (Partition partition : client.getPartitionsByNames( databaseName, tableName, partitionsToFetch)) { String partitionId = FileUtils.makePartName( partitionColumnNames, partition.getValues(), null); partitions.put( HivePartitionName.partition(databaseName, tableName, partitionId), Optional.of(partition)); } return partitions.build(); } })); } catch (NoSuchObjectException e) { // assume none of the partitions in the batch are available return stream(partitionNames.spliterator(), false) .collect(toMap(identity(), (name) -> Optional.empty())); } catch (TException e) { throw new PrestoException(HIVE_METASTORE_ERROR, e); } }
@Override public synchronized Optional<Partition> getPartition( String databaseName, String tableName, List<String> partitionValues) { PartitionName name = PartitionName.partition(databaseName, tableName, partitionValues); Partition partition = partitions.get(name); if (partition == null) { return Optional.empty(); } return Optional.of(partition.deepCopy()); }
/** * Convenience method for working directly on a metastore partition. See * submitCachePartitionDirective(HdfsPartition, String, short) for more details. */ public static long submitCachePartitionDirective( org.apache.hadoop.hive.metastore.api.Partition part, String poolName, short replication) throws ImpalaRuntimeException { long id = HdfsCachingUtil.submitDirective( new Path(part.getSd().getLocation()), poolName, replication); if (id != -1) part.putToParameters(CACHE_DIR_ID_PROP_NAME, Long.toString(id)); part.putToParameters(CACHE_DIR_REPLICATION_PROP_NAME, Long.toString(replication)); return id; }
@Override public void dropPartition(String databaseName, String tableName, List<String> parts) { for (Entry<PartitionName, Partition> entry : partitions.entrySet()) { PartitionName partitionName = entry.getKey(); Partition partition = entry.getValue(); if (partitionName.matches(databaseName, tableName) && partition.getValues().equals(parts)) { partitions.remove(partitionName); } } }
private void validateAlterPartition( Partition expectedOldPartition, Partition expectedNewPartition, String actualOldPartitionDbName, String actualOldPartitionTblName, List<String> actualOldPartitionValues, Partition actualNewPartition) { assertEquals(expectedOldPartition.getValues(), actualOldPartitionValues); assertEquals(expectedOldPartition.getDbName(), actualOldPartitionDbName); assertEquals(expectedOldPartition.getTableName(), actualOldPartitionTblName); validatePartition(expectedNewPartition, actualNewPartition); }
@Override public synchronized List<Partition> getPartitionsByNames( String databaseName, String tableName, List<String> partitionNames) { ImmutableList.Builder<Partition> builder = ImmutableList.builder(); for (String name : partitionNames) { PartitionName partitionName = PartitionName.partition(databaseName, tableName, name); Partition partition = partitions.get(partitionName); if (partition == null) { return ImmutableList.of(); } builder.add(partition.deepCopy()); } return builder.build(); }
static Map<String, String> createPtnKeyValueMap(Table table, Partition ptn) throws IOException { List<String> values = ptn.getValues(); if (values.size() != table.getPartitionKeys().size()) { throw new IOException( "Partition values in partition inconsistent with table definition, table " + table.getTableName() + " has " + table.getPartitionKeys().size() + " partition keys, partition has " + values.size() + "partition values"); } Map<String, String> ptnKeyValues = new HashMap<String, String>(); int i = 0; for (FieldSchema schema : table.getPartitionKeys()) { // CONCERN : the way this mapping goes, the order *needs* to be // preserved for table.getPartitionKeys() and ptn.getValues() ptnKeyValues.put(schema.getName().toLowerCase(), values.get(i)); i++; } return ptnKeyValues; }
@Override public void addPartitions(String databaseName, String tableName, List<Partition> partitions) { Optional<Table> table = getTable(databaseName, tableName); if (!table.isPresent()) { throw new TableNotFoundException(new SchemaTableName(databaseName, tableName)); } for (Partition partition : partitions) { String partitionName = createPartitionName(partition, table.get()); partition = partition.deepCopy(); if (partition.getParameters() == null) { partition.setParameters(ImmutableMap.of()); } this.partitions.put( PartitionName.partition(databaseName, tableName, partitionName), partition); } }
@Override public void onDropPartition(DropPartitionEvent partitionEvent) throws MetaException { if (partitionEvent != null && partitionEvent.getPartitionIterator() != null) { String authzObj = partitionEvent.getTable().getDbName() + "." + partitionEvent.getTable().getTableName(); Iterator<Partition> it = partitionEvent.getPartitionIterator(); while (it.hasNext()) { Partition part = it.next(); if (part.getSd() != null && part.getSd().getLocation() != null) { String path = part.getSd().getLocation(); for (SentryMetastoreListenerPlugin plugin : sentryPlugins) { plugin.removePath(authzObj, path); } } } } }
public static Partition fromMetastoreApiPartition( org.apache.hadoop.hive.metastore.api.Partition partition) { StorageDescriptor storageDescriptor = partition.getSd(); if (storageDescriptor == null) { throw new PrestoException( HIVE_INVALID_METADATA, "Partition does not contain a storage descriptor: " + partition); } Partition.Builder partitionBuilder = Partition.builder() .setDatabaseName(partition.getDbName()) .setTableName(partition.getTableName()) .setValues(partition.getValues()) .setColumns( storageDescriptor .getCols() .stream() .map(MetastoreUtil::fromMetastoreApiFieldSchema) .collect(toList())) .setParameters(partition.getParameters()); fromMetastoreApiStorageDescriptor( storageDescriptor, partitionBuilder.getStorageBuilder(), format("%s.%s", partition.getTableName(), partition.getValues())); return partitionBuilder.build(); }
private static boolean partitionMatches( Partition partition, String databaseName, String tableName, List<String> parts) { if (!partition.getDbName().equals(databaseName) || !partition.getTableName().equals(tableName)) { return false; } List<String> values = partition.getValues(); if (values.size() != parts.size()) { return false; } for (int i = 0; i < values.size(); i++) { String part = parts.get(i); if (!part.isEmpty() && !values.get(i).equals(part)) { return false; } } return true; }
@Override public void alterPartition(String dbName, String tableName, Partition partition) throws HiveMetastoreException { HiveObjectSpec tableSpec = new HiveObjectSpec(partition.getDbName(), partition.getTableName()); if (!specToTable.containsKey(tableSpec)) { throw new HiveMetastoreException("Unknown table: " + tableSpec); } Table table = specToTable.get(tableSpec); String partitionName = getPartitionName(table, partition); HiveObjectSpec partitionSpec = new HiveObjectSpec(tableSpec.getDbName(), tableSpec.getTableName(), partitionName); if (!specToPartition.containsKey(partitionSpec)) { throw new HiveMetastoreException("Partition does not exist: " + partitionSpec); } specToPartition.put(partitionSpec, partition); }
@Override public Partition addPartition(Partition partition) throws HiveMetastoreException { HiveObjectSpec tableSpec = new HiveObjectSpec(partition.getDbName(), partition.getTableName()); if (!specToTable.containsKey(tableSpec)) { throw new HiveMetastoreException("Unknown table: " + tableSpec); } Table table = specToTable.get(tableSpec); String partitionName = getPartitionName(table, partition); HiveObjectSpec partitionSpec = new HiveObjectSpec(tableSpec.getDbName(), tableSpec.getTableName(), partitionName); if (specToPartition.containsKey(partitionSpec)) { throw new HiveMetastoreException("Partition already exists: " + partitionSpec); } specToPartition.put(partitionSpec, partition); return partition; }
/** * Returns the partition name (e.g. ds=1/hr=2) given a Table and Partition object. For simplicity, * this does not handle special characters properly. * * @param table the table that the partition belongs to * @param partition the partition to get the name for * @return the name of the partition * @throws HiveMetastoreException if the schema between the table and partition do not match */ private String getPartitionName(Table table, Partition partition) throws HiveMetastoreException { if (table.getPartitionKeys().size() != partition.getValues().size()) { throw new HiveMetastoreException( "Partition column mismatch: " + "table has " + table.getPartitionKeys().size() + " columns " + "while partition has " + partition.getValues().size() + " values"); } List<String> keyValues = new ArrayList<>(); int keyValueIndex = 0; for (FieldSchema field : table.getPartitionKeys()) { keyValues.add(field.getName() + "=" + partition.getValues().get(keyValueIndex)); keyValueIndex++; } return StringUtils.join(keyValues, "/"); }
@Override public CatalogProtos.PartitionDescProto getPartition( String databaseName, String tableName, String partitionName) throws CatalogException { HiveCatalogStoreClientPool.HiveCatalogStoreClient client = null; CatalogProtos.PartitionDescProto.Builder builder = null; try { client = clientPool.getClient(); Partition partition = client.getHiveClient().getPartition(databaseName, tableName, partitionName); builder = CatalogProtos.PartitionDescProto.newBuilder(); builder.setPartitionName(partitionName); builder.setPath(partition.getSd().getLocation()); String[] partitionNames = partitionName.split("/"); for (int i = 0; i < partition.getValues().size(); i++) { String value = partition.getValues().get(i); CatalogProtos.PartitionKeyProto.Builder keyBuilder = CatalogProtos.PartitionKeyProto.newBuilder(); String columnName = partitionNames[i].split("=")[0]; keyBuilder.setColumnName(columnName); keyBuilder.setPartitionValue(value); builder.addPartitionKeys(keyBuilder); } } catch (NoSuchObjectException e) { return null; } catch (Exception e) { throw new TajoInternalError(e); } finally { if (client != null) { client.release(); } } return builder.build(); }
public static org.apache.hadoop.hive.metastore.api.Partition toMetastoreApiPartition( Partition partition) { org.apache.hadoop.hive.metastore.api.Partition result = new org.apache.hadoop.hive.metastore.api.Partition(); result.setDbName(partition.getDatabaseName()); result.setTableName(partition.getTableName()); result.setValues(partition.getValues()); result.setSd( makeStorageDescriptor( partition.getTableName(), partition.getColumns(), partition.getStorage())); result.setParameters(partition.getParameters()); return result; }
@Override public Integer run() throws Exception { if (jobContext.getCoreExitCode() != 0) { log("Job 运行失败,不进行产出目录清理"); return 0; } SimpleDateFormat format = new SimpleDateFormat("yyyyMMdd"); Calendar cal = Calendar.getInstance(); cal.add(Calendar.DAY_OF_YEAR, keepDays * (-1)); Date limit = format.parse(format.format(cal.getTime())); for (String tableName : tables) { Table t = tableManager.getTable(tableName); int ptIndex = -1; for (FieldSchema fs : t.getPartitionKeys()) { ptIndex++; if (fs.getName().equalsIgnoreCase("pt")) { break; } } if (ptIndex < 0) { log("表" + tableName + "不含pt分区字段,不进行历史分区清理"); continue; } List<Partition> parts = tableManager.getPartitions(tableName, null); for (Partition p : parts) { Date ptDate = null; try { ptDate = format.parse(StringUtils.substring(p.getValues().get(ptIndex), 0, 8)); } catch (Exception e) { log("分区字段格式非法:"); log(e); } if (ptDate == null) { log("解析分区时间失败。" + p.getValues().get(ptIndex)); continue; } if (ptDate.before(limit)) { if (!tableManager.dropPartition(tableName, p.getValues(), true)) { log("drop partition failed.table[" + tableName + "],part_vals=[" + p.getValues()); } else { log("drop partition ok. Table[" + tableName + "],part_vals=[" + p.getValues() + "]"); } } } } return 0; }
private void validatePartition(Partition expectedPartition, Partition actualPartition) { assertEquals(expectedPartition.getValues(), actualPartition.getValues()); assertEquals(expectedPartition.getDbName(), actualPartition.getDbName()); assertEquals(expectedPartition.getTableName(), actualPartition.getTableName()); }
/** * Create HdfsPartition objects corresponding to 'partitions'. * * <p>If there are no partitions in the Hive metadata, a single partition is added with no * partition keys. * * <p>For files that have not been changed, reuses file descriptors from oldFileDescMap. */ private void loadPartitions( List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions, org.apache.hadoop.hive.metastore.api.Table msTbl, Map<String, FileDescriptor> oldFileDescMap) throws IOException, CatalogException { partitions_.clear(); hdfsBaseDir_ = msTbl.getSd().getLocation(); List<FileDescriptor> newFileDescs = Lists.newArrayList(); // INSERT statements need to refer to this if they try to write to new partitions // Scans don't refer to this because by definition all partitions they refer to // exist. addDefaultPartition(msTbl.getSd()); if (msTbl.getPartitionKeysSize() == 0) { Preconditions.checkArgument(msPartitions == null || msPartitions.isEmpty()); // This table has no partition key, which means it has no declared partitions. // We model partitions slightly differently to Hive - every file must exist in a // partition, so add a single partition with no keys which will get all the // files in the table's root directory. addPartition(msTbl.getSd(), null, new ArrayList<LiteralExpr>(), oldFileDescMap, newFileDescs); Path location = new Path(hdfsBaseDir_); if (DFS.exists(location)) { accessLevel_ = getAvailableAccessLevel(location); } } else { // keep track of distinct partition key values and how many nulls there are Set<String>[] uniquePartitionKeys = new HashSet[numClusteringCols_]; long[] numNullKeys = new long[numClusteringCols_]; for (int i = 0; i < numClusteringCols_; ++i) { uniquePartitionKeys[i] = new HashSet<String>(); numNullKeys[i] = 0; } for (org.apache.hadoop.hive.metastore.api.Partition msPartition : msPartitions) { // load key values List<LiteralExpr> keyValues = Lists.newArrayList(); int i = 0; for (String partitionKey : msPartition.getValues()) { uniquePartitionKeys[i].add(partitionKey); // Deal with Hive's special NULL partition key. if (partitionKey.equals(nullPartitionKeyValue_)) { keyValues.add(new NullLiteral()); ++numNullKeys[i]; } else { ColumnType type = colsByPos_.get(keyValues.size()).getType(); try { Expr expr = LiteralExpr.create(partitionKey, type); // Force the literal to be of type declared in the metadata. expr = expr.castTo(type); keyValues.add((LiteralExpr) expr); } catch (AnalysisException ex) { LOG.warn("Failed to create literal expression of type: " + type, ex); throw new InvalidStorageDescriptorException(ex); } } ++i; } HdfsPartition partition = addPartition(msPartition.getSd(), msPartition, keyValues, oldFileDescMap, newFileDescs); // If the partition is null, its HDFS path does not exist, and it was not added to // this table's partition list. Skip the partition. if (partition == null) continue; if (msPartition.getParameters() != null) { partition.setNumRows(getRowCount(msPartition.getParameters())); } if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) { // TODO: READ_ONLY isn't exactly correct because the it's possible the // partition does not have READ permissions either. When we start checking // whether we can READ from a table, this should be updated to set the // table's access level to the "lowest" effective level across all // partitions. That is, if one partition has READ_ONLY and another has // WRITE_ONLY the table's access level should be NONE. accessLevel_ = TAccessLevel.READ_ONLY; } } // update col stats for partition key cols for (int i = 0; i < numClusteringCols_; ++i) { ColumnStats stats = colsByPos_.get(i).getStats(); stats.setNumNulls(numNullKeys[i]); stats.setNumDistinctValues(uniquePartitionKeys[i].size()); LOG.debug("#col=" + Integer.toString(i) + " stats=" + stats.toString()); } } if (newFileDescs.size() > 0) { loadBlockMd(newFileDescs); } uniqueHostPortsCount_ = countUniqueDataNetworkLocations(partitions_); }
public void run( SessionState sess, Set<ReadEntity> inputs, Set<WriteEntity> outputs, LineageInfo linfo, UserGroupInformation ugi) throws Exception { LogHelper console = SessionState.getConsole(); if (console == null) { return; } if (sess != null) { console.printError("POSTHOOK: query: " + sess.getCmd().trim()); console.printError("POSTHOOK: type: " + sess.getCommandType()); } PreExecutePrinter.printEntities(console, inputs, "POSTHOOK: Input: "); PreExecutePrinter.printEntities(console, outputs, "POSTHOOK: Output: "); // Also print out the generic lineage information if there is any if (linfo != null) { LinkedList<Map.Entry<DependencyKey, Dependency>> entry_list = new LinkedList<Map.Entry<DependencyKey, Dependency>>(linfo.entrySet()); Collections.sort(entry_list, new DependencyKeyComp()); Iterator<Map.Entry<DependencyKey, Dependency>> iter = entry_list.iterator(); while (iter.hasNext()) { Map.Entry<DependencyKey, Dependency> it = iter.next(); Dependency dep = it.getValue(); DependencyKey depK = it.getKey(); if (dep == null) { continue; } StringBuilder sb = new StringBuilder(); sb.append("POSTHOOK: Lineage: "); if (depK.getDataContainer().isPartition()) { Partition part = depK.getDataContainer().getPartition(); sb.append(part.getTableName()); sb.append(" PARTITION("); int i = 0; for (FieldSchema fs : depK.getDataContainer().getTable().getPartitionKeys()) { if (i != 0) { sb.append(","); } sb.append(fs.getName() + "=" + part.getValues().get(i++)); } sb.append(")"); } else { sb.append(depK.getDataContainer().getTable().getTableName()); } sb.append("." + depK.getFieldSchema().getName() + " " + dep.getType() + " "); sb.append("["); for (BaseColumnInfo col : dep.getBaseCols()) { sb.append( "(" + col.getTabAlias().getTable().getTableName() + ")" + col.getTabAlias().getAlias() + "." + col.getColumn() + ", "); } sb.append("]"); console.printError(sb.toString()); } } }
private static String createPartitionName(Partition partition, Table table) { return makePartName(table.getPartitionKeys(), partition.getValues()); }