protected static List<String> getFilesInHivePartition(Partition part, JobConf jobConf) { List<String> result = newArrayList(); String ignoreFileRegex = jobConf.get(HCatTap.IGNORE_FILE_IN_PARTITION_REGEX, ""); Pattern ignoreFilePattern = Pattern.compile(ignoreFileRegex); try { Path partitionDirPath = new Path(part.getSd().getLocation()); FileStatus[] partitionContent = partitionDirPath.getFileSystem(jobConf).listStatus(partitionDirPath); for (FileStatus currStatus : partitionContent) { if (!currStatus.isDir()) { if (!ignoreFilePattern.matcher(currStatus.getPath().getName()).matches()) { result.add(currStatus.getPath().toUri().getPath()); } else { LOG.debug( "Ignoring path {} since matches ignore regex {}", currStatus.getPath().toUri().getPath(), ignoreFileRegex); } } } } catch (IOException e) { logError("Unable to read the content of partition '" + part.getSd().getLocation() + "'", e); } return result; }
@Override public void onAddPartition(AddPartitionEvent partitionEvent) throws MetaException { if (partitionEvent != null && partitionEvent.getPartitionIterator() != null) { Iterator<Partition> it = partitionEvent.getPartitionIterator(); while (it.hasNext()) { Partition part = it.next(); if (part.getSd() != null && part.getSd().getLocation() != null) { String authzObj = part.getDbName() + "." + part.getTableName(); String path = part.getSd().getLocation(); for (SentryMetastoreListenerPlugin plugin : sentryPlugins) { plugin.addPath(authzObj, path); } } } } }
public static Partition fromMetastoreApiPartition( org.apache.hadoop.hive.metastore.api.Partition partition) { StorageDescriptor storageDescriptor = partition.getSd(); if (storageDescriptor == null) { throw new PrestoException( HIVE_INVALID_METADATA, "Partition does not contain a storage descriptor: " + partition); } Partition.Builder partitionBuilder = Partition.builder() .setDatabaseName(partition.getDbName()) .setTableName(partition.getTableName()) .setValues(partition.getValues()) .setColumns( storageDescriptor .getCols() .stream() .map(MetastoreUtil::fromMetastoreApiFieldSchema) .collect(toList())) .setParameters(partition.getParameters()); fromMetastoreApiStorageDescriptor( storageDescriptor, partitionBuilder.getStorageBuilder(), format("%s.%s", partition.getTableName(), partition.getValues())); return partitionBuilder.build(); }
/** * Convenience method for working directly on a metastore partition. See * submitCachePartitionDirective(HdfsPartition, String, short) for more details. */ public static long submitCachePartitionDirective( org.apache.hadoop.hive.metastore.api.Partition part, String poolName, short replication) throws ImpalaRuntimeException { long id = HdfsCachingUtil.submitDirective( new Path(part.getSd().getLocation()), poolName, replication); if (id != -1) part.putToParameters(CACHE_DIR_ID_PROP_NAME, Long.toString(id)); part.putToParameters(CACHE_DIR_REPLICATION_PROP_NAME, Long.toString(replication)); return id; }
@Override public CatalogProtos.PartitionDescProto getPartition( String databaseName, String tableName, String partitionName) throws CatalogException { HiveCatalogStoreClientPool.HiveCatalogStoreClient client = null; CatalogProtos.PartitionDescProto.Builder builder = null; try { client = clientPool.getClient(); Partition partition = client.getHiveClient().getPartition(databaseName, tableName, partitionName); builder = CatalogProtos.PartitionDescProto.newBuilder(); builder.setPartitionName(partitionName); builder.setPath(partition.getSd().getLocation()); String[] partitionNames = partitionName.split("/"); for (int i = 0; i < partition.getValues().size(); i++) { String value = partition.getValues().get(i); CatalogProtos.PartitionKeyProto.Builder keyBuilder = CatalogProtos.PartitionKeyProto.newBuilder(); String columnName = partitionNames[i].split("=")[0]; keyBuilder.setColumnName(columnName); keyBuilder.setPartitionValue(value); builder.addPartitionKeys(keyBuilder); } } catch (NoSuchObjectException e) { return null; } catch (Exception e) { throw new TajoInternalError(e); } finally { if (client != null) { client.release(); } } return builder.build(); }
/** * Create HdfsPartition objects corresponding to 'partitions'. * * <p>If there are no partitions in the Hive metadata, a single partition is added with no * partition keys. * * <p>For files that have not been changed, reuses file descriptors from oldFileDescMap. */ private void loadPartitions( List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions, org.apache.hadoop.hive.metastore.api.Table msTbl, Map<String, FileDescriptor> oldFileDescMap) throws IOException, CatalogException { partitions_.clear(); hdfsBaseDir_ = msTbl.getSd().getLocation(); List<FileDescriptor> newFileDescs = Lists.newArrayList(); // INSERT statements need to refer to this if they try to write to new partitions // Scans don't refer to this because by definition all partitions they refer to // exist. addDefaultPartition(msTbl.getSd()); if (msTbl.getPartitionKeysSize() == 0) { Preconditions.checkArgument(msPartitions == null || msPartitions.isEmpty()); // This table has no partition key, which means it has no declared partitions. // We model partitions slightly differently to Hive - every file must exist in a // partition, so add a single partition with no keys which will get all the // files in the table's root directory. addPartition(msTbl.getSd(), null, new ArrayList<LiteralExpr>(), oldFileDescMap, newFileDescs); Path location = new Path(hdfsBaseDir_); if (DFS.exists(location)) { accessLevel_ = getAvailableAccessLevel(location); } } else { // keep track of distinct partition key values and how many nulls there are Set<String>[] uniquePartitionKeys = new HashSet[numClusteringCols_]; long[] numNullKeys = new long[numClusteringCols_]; for (int i = 0; i < numClusteringCols_; ++i) { uniquePartitionKeys[i] = new HashSet<String>(); numNullKeys[i] = 0; } for (org.apache.hadoop.hive.metastore.api.Partition msPartition : msPartitions) { // load key values List<LiteralExpr> keyValues = Lists.newArrayList(); int i = 0; for (String partitionKey : msPartition.getValues()) { uniquePartitionKeys[i].add(partitionKey); // Deal with Hive's special NULL partition key. if (partitionKey.equals(nullPartitionKeyValue_)) { keyValues.add(new NullLiteral()); ++numNullKeys[i]; } else { ColumnType type = colsByPos_.get(keyValues.size()).getType(); try { Expr expr = LiteralExpr.create(partitionKey, type); // Force the literal to be of type declared in the metadata. expr = expr.castTo(type); keyValues.add((LiteralExpr) expr); } catch (AnalysisException ex) { LOG.warn("Failed to create literal expression of type: " + type, ex); throw new InvalidStorageDescriptorException(ex); } } ++i; } HdfsPartition partition = addPartition(msPartition.getSd(), msPartition, keyValues, oldFileDescMap, newFileDescs); // If the partition is null, its HDFS path does not exist, and it was not added to // this table's partition list. Skip the partition. if (partition == null) continue; if (msPartition.getParameters() != null) { partition.setNumRows(getRowCount(msPartition.getParameters())); } if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) { // TODO: READ_ONLY isn't exactly correct because the it's possible the // partition does not have READ permissions either. When we start checking // whether we can READ from a table, this should be updated to set the // table's access level to the "lowest" effective level across all // partitions. That is, if one partition has READ_ONLY and another has // WRITE_ONLY the table's access level should be NONE. accessLevel_ = TAccessLevel.READ_ONLY; } } // update col stats for partition key cols for (int i = 0; i < numClusteringCols_; ++i) { ColumnStats stats = colsByPos_.get(i).getStats(); stats.setNumNulls(numNullKeys[i]); stats.setNumDistinctValues(uniquePartitionKeys[i].size()); LOG.debug("#col=" + Integer.toString(i) + " stats=" + stats.toString()); } } if (newFileDescs.size() > 0) { loadBlockMd(newFileDescs); } uniqueHostPortsCount_ = countUniqueDataNetworkLocations(partitions_); }